diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt
index 03756c208acdc5..2b324674c81f26 100644
--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -14,8 +14,11 @@ if (NOT DEFINED CLANGD_BUILD_XPC)
   unset(CLANGD_BUILD_XPC_DEFAULT)
 endif ()
 
-llvm_canonicalize_cmake_booleans(CLANGD_BUILD_XPC)
-llvm_canonicalize_cmake_booleans(CLANGD_ENABLE_REMOTE)
+llvm_canonicalize_cmake_booleans(
+  CLANGD_BUILD_XPC
+  CLANGD_ENABLE_REMOTE
+  LLVM_ENABLE_ZLIB
+)
 
 configure_file(
   ${CMAKE_CURRENT_SOURCE_DIR}/Features.inc.in
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index f35a49b681ccc6..78ce77043b6f58 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -738,6 +738,8 @@ MMA_BUILTIN(pmxvbf16ger2pp, "vW512*VVi15i15i3", true)
 MMA_BUILTIN(pmxvbf16ger2pn, "vW512*VVi15i15i3", true)
 MMA_BUILTIN(pmxvbf16ger2np, "vW512*VVi15i15i3", true)
 MMA_BUILTIN(pmxvbf16ger2nn, "vW512*VVi15i15i3", true)
+MMA_BUILTIN(lxvp, "W256SLLiW256C*", false)
+MMA_BUILTIN(stxvp, "vW256SLLiW256C*", false)
 
 // FIXME: Obviously incomplete.
 
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 29c4f15e57b095..94491e45b55b74 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -1450,8 +1450,6 @@ class ASTReader
   void Error(StringRef Msg) const;
   void Error(unsigned DiagID, StringRef Arg1 = StringRef(),
              StringRef Arg2 = StringRef(), StringRef Arg3 = StringRef()) const;
-  void Error(unsigned DiagID, StringRef Arg1, StringRef Arg2,
-             unsigned Select) const;
   void Error(llvm::Error &&Err) const;
 
 public:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0b8259221d8f53..0ea149e0cbde77 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14776,6 +14776,19 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
       break;
   #include "clang/Basic/BuiltinsPPC.def"
     }
+    if (BuiltinID == PPC::BI__builtin_mma_lxvp ||
+        BuiltinID == PPC::BI__builtin_mma_stxvp) {
+      if (BuiltinID == PPC::BI__builtin_mma_lxvp) {
+        Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
+        Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
+      } else {
+        Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
+        Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
+      }
+      Ops.pop_back();
+      llvm::Function *F = CGM.getIntrinsic(ID);
+      return Builder.CreateCall(F, Ops, "");
+    }
     SmallVector<Value*, 4> CallOps;
     if (Accumulate) {
       Address Addr = EmitPointerWithAlignment(E->getArg(0));
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 68ee29fd988bb9..0a19c0bc243d69 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -1593,7 +1593,8 @@ void Parser::ProhibitCXX11Attributes(ParsedAttributesWithRange &Attrs,
     if (!AL.isCXX11Attribute() && !AL.isC2xAttribute())
       continue;
     if (AL.getKind() == ParsedAttr::UnknownAttribute)
-      Diag(AL.getLoc(), diag::warn_unknown_attribute_ignored) << AL;
+      Diag(AL.getLoc(), diag::warn_unknown_attribute_ignored)
+          << AL << AL.getRange();
     else {
       Diag(AL.getLoc(), DiagID) << AL;
       AL.setInvalid();
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index a2df339151fb43..a2b7e8dbf57c27 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -2055,7 +2055,8 @@ bool Sema::CheckAttrNoArgs(const ParsedAttr &Attrs) {
 bool Sema::CheckAttrTarget(const ParsedAttr &AL) {
   // Check whether the attribute is valid on the current target.
   if (!AL.existsInTarget(Context.getTargetInfo())) {
-    Diag(AL.getLoc(), diag::warn_unknown_attribute_ignored) << AL;
+    Diag(AL.getLoc(), diag::warn_unknown_attribute_ignored)
+        << AL << AL.getRange();
     AL.setInvalid();
     return true;
   }
@@ -7362,7 +7363,7 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
            AL.isDeclspecAttribute()
                ? (unsigned)diag::warn_unhandled_ms_attribute_ignored
                : (unsigned)diag::warn_unknown_attribute_ignored)
-        << AL;
+        << AL << AL.getRange();
     return;
   }
 
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index c49e9cab6d63c8..c90a71626ea7be 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -2612,7 +2612,7 @@ Sema::ActOnBaseSpecifier(Decl *classdecl, SourceRange SpecifierRange,
     Diag(AL.getLoc(), AL.getKind() == ParsedAttr::UnknownAttribute
                           ? (unsigned)diag::warn_unknown_attribute_ignored
                           : (unsigned)diag::err_base_specifier_attribute)
-        << AL;
+        << AL << AL.getRange();
   }
 
   TypeSourceInfo *TInfo = nullptr;
diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp
index 214952e914ace0..58f4d0dc1944fb 100644
--- a/clang/lib/Sema/SemaStmtAttr.cpp
+++ b/clang/lib/Sema/SemaStmtAttr.cpp
@@ -409,7 +409,7 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A,
     S.Diag(A.getLoc(), A.isDeclspecAttribute()
                            ? (unsigned)diag::warn_unhandled_ms_attribute_ignored
                            : (unsigned)diag::warn_unknown_attribute_ignored)
-        << A;
+        << A << A.getRange();
     return nullptr;
   case ParsedAttr::AT_FallThrough:
     return handleFallThroughAttr(S, St, A, Range);
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 3203e1365ee1c9..7f18dc77762fd0 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -8091,7 +8091,7 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
       if (attr.isCXX11Attribute() && TAL == TAL_DeclChunk)
         state.getSema().Diag(attr.getLoc(),
                              diag::warn_unknown_attribute_ignored)
-            << attr;
+            << attr << attr.getRange();
       break;
 
     case ParsedAttr::IgnoredAttribute:
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index f3ecb1e51368bf..6e09fa464940a1 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1246,12 +1246,6 @@ void ASTReader::Error(unsigned DiagID, StringRef Arg1, StringRef Arg2,
     Diag(DiagID) << Arg1 << Arg2 << Arg3;
 }
 
-void ASTReader::Error(unsigned DiagID, StringRef Arg1, StringRef Arg2,
-                      unsigned Select) const {
-  if (!Diags.isDiagnosticInFlight())
-    Diag(DiagID) << Arg1 << Arg2 << Select;
-}
-
 void ASTReader::Error(llvm::Error &&Err) const {
   Error(toString(std::move(Err)));
 }
@@ -2395,7 +2389,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
   auto FileChange = HasInputFileChanged();
   // For an overridden file, there is nothing to validate.
   if (!Overridden && FileChange != ModificationType::None) {
-    if (Complain) {
+    if (Complain && !Diags.isDiagnosticInFlight()) {
       // Build a list of the PCH imports that got us here (in reverse).
       SmallVector<ModuleFile *, 4> ImportStack(1, &F);
       while (!ImportStack.back()->ImportedBy.empty())
@@ -2406,17 +2400,17 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
       unsigned DiagnosticKind =
           moduleKindForDiagnostic(ImportStack.back()->Kind);
       if (DiagnosticKind == 0)
-        Error(diag::err_fe_pch_file_modified, Filename, TopLevelPCHName,
-              (unsigned)FileChange);
+        Diag(diag::err_fe_pch_file_modified)
+            << Filename << TopLevelPCHName << FileChange;
       else if (DiagnosticKind == 1)
-        Error(diag::err_fe_module_file_modified, Filename, TopLevelPCHName,
-              (unsigned)FileChange);
+        Diag(diag::err_fe_module_file_modified)
+            << Filename << TopLevelPCHName << FileChange;
       else
-        Error(diag::err_fe_ast_file_modified, Filename, TopLevelPCHName,
-              (unsigned)FileChange);
+        Diag(diag::err_fe_ast_file_modified)
+            << Filename << TopLevelPCHName << FileChange;
 
       // Print the import stack.
-      if (ImportStack.size() > 1 && !Diags.isDiagnosticInFlight()) {
+      if (ImportStack.size() > 1) {
         Diag(diag::note_pch_required_by)
           << Filename << ImportStack[0]->FileName;
         for (unsigned I = 1; I < ImportStack.size(); ++I)
@@ -2424,8 +2418,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
             << ImportStack[I-1]->FileName << ImportStack[I]->FileName;
       }
 
-      if (!Diags.isDiagnosticInFlight())
-        Diag(diag::note_pch_rebuild_required) << TopLevelPCHName;
+      Diag(diag::note_pch_rebuild_required) << TopLevelPCHName;
     }
 
     IsOutOfDate = true;
diff --git a/clang/test/CodeGen/builtins-ppc-mma.c b/clang/test/CodeGen/builtins-ppc-mma.c
index 820f72653876b7..88ca36aa67141d 100644
--- a/clang/test/CodeGen/builtins-ppc-mma.c
+++ b/clang/test/CodeGen/builtins-ppc-mma.c
@@ -1036,3 +1036,162 @@ void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   __builtin_mma_pmxvbf16ger2nn(&vq, vc, vc, 0, 0, 0);
   *((__vector_quad *)resp) = vq;
 }
+
+// CHECK-LABEL: @test66(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
+// CHECK-NEXT:    tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP1]], i8* [[TMP2]])
+// CHECK-NEXT:    ret void
+//
+void test66(const __vector_pair *vpp, const __vector_pair *vp2) {
+  __vector_pair vp = __builtin_mma_lxvp(0LL, vpp);
+  __builtin_mma_stxvp(vp, 0LL, vp2);
+}
+
+// CHECK-LABEL: @test67(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[OFFSET:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[OFFSET]]
+// CHECK-NEXT:    tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT:    ret void
+//
+void test67(const __vector_pair *vpp, signed long long offset, const __vector_pair *vp2) {
+  __vector_pair vp = __builtin_mma_lxvp(offset, vpp);
+  __builtin_mma_stxvp(vp, offset, vp2);
+}
+
+// CHECK-LABEL: @test68(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 18
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 18
+// CHECK-NEXT:    tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT:    ret void
+//
+void test68(const __vector_pair *vpp, const __vector_pair *vp2) {
+  __vector_pair vp = __builtin_mma_lxvp(18LL, vpp);
+  __builtin_mma_stxvp(vp, 18LL, vp2);
+}
+
+// CHECK-LABEL: @test69(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 1
+// CHECK-NEXT:    tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT:    ret void
+//
+void test69(const __vector_pair *vpp, const __vector_pair *vp2) {
+  __vector_pair vp = __builtin_mma_lxvp(1LL, vpp);
+  __builtin_mma_stxvp(vp, 1LL, vp2);
+}
+
+// CHECK-LABEL: @test70(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 42
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 42
+// CHECK-NEXT:    tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT:    ret void
+//
+void test70(const __vector_pair *vpp, const __vector_pair *vp2) {
+  __vector_pair vp = __builtin_mma_lxvp(42LL, vpp);
+  __builtin_mma_stxvp(vp, 42LL, vp2);
+}
+
+// CHECK-LABEL: @test71(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VPP:%.*]], i64 128
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <256 x i1>* [[TMP0]] to i8*
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VP2:%.*]], i64 128
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <256 x i1>* [[TMP3]] to i8*
+// CHECK-NEXT:    tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT:    ret void
+//
+void test71(const __vector_pair *vpp, const __vector_pair *vp2) {
+  __vector_pair vp = __builtin_mma_lxvp(32768LL, vpp);
+  __builtin_mma_stxvp(vp, 32768LL, vp2);
+}
+
+// CHECK-LABEL: @test72(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 32799
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 32799
+// CHECK-NEXT:    tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT:    ret void
+//
+void test72(const __vector_pair *vpp, const __vector_pair *vp2) {
+  __vector_pair vp = __builtin_mma_lxvp(32799LL, vpp);
+  __builtin_mma_stxvp(vp, 32799LL, vp2);
+}
+
+// CHECK-LABEL: @test73(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
+// CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2:!tbaa !.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 8
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP3]])
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
+// CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
+// CHECK-NEXT:    store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
+  __vector_quad vq = *((__vector_quad *)vqp);
+  __vector_pair vp = __builtin_mma_lxvp(8LL, vpp);
+  __builtin_mma_pmxvf64gernn(&vq, vp, vc, 0, 0);
+  *((__vector_quad *)resp) = vq;
+}
+
+// CHECK-LABEL: @test74(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
+// CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
+// CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
+// CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
+  __vector_quad vq = *((__vector_quad *)vqp);
+  __vector_pair vp = __builtin_mma_lxvp(0LL, vpp);
+  __builtin_mma_xvf64gernp(&vq, vp, vc);
+  *((__vector_quad *)resp) = vq;
+}
+
+// CHECK-LABEL: @test75(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
+// CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2:!tbaa !.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[OFFS:%.*]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP3]])
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]])
+// CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
+// CHECK-NEXT:    store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+void test75(unsigned char *vqp, signed long long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
+  __vector_quad vq = *((__vector_quad *)vqp);
+  __vector_pair vp = __builtin_mma_lxvp(offs, vpp);
+  __builtin_mma_xvf64gernp(&vq, vp, vc);
+  *((__vector_quad *)resp) = vq;
+}
diff --git a/clang/test/Sema/ppc-mma-types.c b/clang/test/Sema/ppc-mma-types.c
index 96644a4d9bbd68..840e34845f58b1 100644
--- a/clang/test/Sema/ppc-mma-types.c
+++ b/clang/test/Sema/ppc-mma-types.c
@@ -319,3 +319,17 @@ void testVPOperators4(int v, void *ptr) {
   __vector_pair vp2 = (__vector_pair)vpp; // expected-error {{used type '__vector_pair' where arithmetic or pointer type is required}}
 }
 
+void testBuiltinTypes1(const __vector_pair *vpp, const __vector_pair *vp2, float f) {
+  __vector_pair vp = __builtin_mma_lxvp(f, vpp); // expected-error {{passing 'float' to parameter of incompatible type 'long long'}}
+  __builtin_mma_stxvp(vp, 32799, vp2);           // expected-error {{passing 'int' to parameter of incompatible type 'long long'}}
+}
+
+void testBuiltinTypes2(__vector_pair *vpp, const __vector_pair *vp2, unsigned char c) {
+  __vector_pair vp = __builtin_mma_lxvp(6LL, vpp); // expected-error {{passing '__vector_pair *' to parameter of incompatible type 'const __vector_pair *'}}
+  __builtin_mma_stxvp(vp, c, vp2);                 // expected-error {{passing 'unsigned char' to parameter of incompatible type 'long long'}}
+}
+
+void testBuiltinTypes3(vector int v, __vector_pair *vp2, signed long long ll, unsigned short s) {
+  __vector_pair vp = __builtin_mma_lxvp(ll, v); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type 'const __vector_pair *'}}
+  __builtin_mma_stxvp(vp, ll, s);               // expected-error {{passing 'unsigned short' to parameter of incompatible type 'const __vector_pair *'}}
+}
diff --git a/libcxx/utils/ci/Dockerfile b/libcxx/utils/ci/Dockerfile
index 9085573086471e..d9cd5d9b206a05 100644
--- a/libcxx/utils/ci/Dockerfile
+++ b/libcxx/utils/ci/Dockerfile
@@ -45,7 +45,7 @@ ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && apt-get install -y bash curl
 
 # Install various tools used by the build or the test suite
-RUN apt-get update && apt-get install -y ninja-build python3 python3-sphinx git
+RUN apt-get update && apt-get install -y ninja-build python3 python3-sphinx git gdb
 
 # Install the most recently released LLVM
 RUN apt-get update && apt-get install -y lsb-release wget software-properties-common
diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp
index f33178f29b3176..06a734b69a9706 100644
--- a/lld/wasm/InputChunks.cpp
+++ b/lld/wasm/InputChunks.cpp
@@ -97,6 +97,7 @@ void InputChunk::verifyRelocTargets() const {
       break;
     case R_WASM_TABLE_INDEX_I64:
     case R_WASM_MEMORY_ADDR_I64:
+    case R_WASM_FUNCTION_OFFSET_I64:
       existingValue = read64le(loc);
       break;
     default:
@@ -176,6 +177,7 @@ void InputChunk::writeTo(uint8_t *buf) const {
       break;
     case R_WASM_TABLE_INDEX_I64:
     case R_WASM_MEMORY_ADDR_I64:
+    case R_WASM_FUNCTION_OFFSET_I64:
       write64le(loc, value);
       break;
     default:
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index 3cf177532d84c0..57db39dd76fd96 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -124,6 +124,7 @@ uint64_t ObjFile::calcNewAddend(const WasmRelocation &reloc) const {
   case R_WASM_MEMORY_ADDR_I32:
   case R_WASM_MEMORY_ADDR_I64:
   case R_WASM_FUNCTION_OFFSET_I32:
+  case R_WASM_FUNCTION_OFFSET_I64:
     return reloc.Addend;
   case R_WASM_SECTION_OFFSET_I32:
     return getSectionSymbol(reloc.Index)->section->outputOffset + reloc.Addend;
@@ -171,7 +172,8 @@ uint64_t ObjFile::calcExpectedValue(const WasmRelocation &reloc) const {
     else
       llvm_unreachable("unknown init expr opcode");
   }
-  case R_WASM_FUNCTION_OFFSET_I32: {
+  case R_WASM_FUNCTION_OFFSET_I32:
+  case R_WASM_FUNCTION_OFFSET_I64: {
     const WasmSymbol &sym = wasmObj->syms()[reloc.Index];
     InputFunction *f =
         functions[sym.Info.ElementIndex - wasmObj->getNumImportedFunctions()];
@@ -258,7 +260,8 @@ uint64_t ObjFile::calcNewValue(const WasmRelocation &reloc) const {
     return sym->getGOTIndex();
   case R_WASM_EVENT_INDEX_LEB:
     return getEventSymbol(reloc.Index)->getEventIndex();
-  case R_WASM_FUNCTION_OFFSET_I32: {
+  case R_WASM_FUNCTION_OFFSET_I32:
+  case R_WASM_FUNCTION_OFFSET_I64: {
     auto *f = cast<DefinedFunction>(sym);
     return f->function->outputOffset +
            (f->function->getFunctionCodeOffset() + reloc.Addend);
diff --git a/lldb/test/Shell/SymbolFile/PDB/udt-layout.test b/lldb/test/Shell/SymbolFile/PDB/udt-layout.test
index 726f633efe5b3c..0ee9dcf6771bd5 100644
--- a/lldb/test/Shell/SymbolFile/PDB/udt-layout.test
+++ b/lldb/test/Shell/SymbolFile/PDB/udt-layout.test
@@ -5,8 +5,8 @@ RUN: %lldb -b -s %S/Inputs/UdtLayoutTest.script -- %t.exe | FileCheck %s
 CHECK:(int) int C::abc = 123
 CHECK:(List [16]) ls = {
 CHECK:  [15] = {
-CHECK:    Prev = 0x00000000
-CHECK:    Next = 0x00000000
+CHECK:    Prev = nullptr
+CHECK:    Next = nullptr
 CHECK:    Value = {
 CHECK:      B<0> = {
 CHECK:        A = {
diff --git a/llvm/include/llvm/Analysis/Utils/Local.h b/llvm/include/llvm/Analysis/Utils/Local.h
index 9da0c6586dac81..bd82b34165d665 100644
--- a/llvm/include/llvm/Analysis/Utils/Local.h
+++ b/llvm/include/llvm/Analysis/Utils/Local.h
@@ -30,7 +30,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
                      bool NoAssumptions = false) {
   GEPOperator *GEPOp = cast<GEPOperator>(GEP);
   Type *IntIdxTy = DL.getIndexType(GEP->getType());
-  Value *Result = Constant::getNullValue(IntIdxTy);
+  Value *Result = nullptr;
 
   // If the GEP is inbounds, we know that none of the addressing operations will
   // overflow in a signed sense.
@@ -46,6 +46,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
        ++i, ++GTI) {
     Value *Op = *i;
     uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
+    Value *Offset;
     if (Constant *OpC = dyn_cast<Constant>(Op)) {
       if (OpC->isZeroValue())
         continue;
@@ -54,46 +55,47 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
       if (StructType *STy = GTI.getStructTypeOrNull()) {
         uint64_t OpValue = OpC->getUniqueInteger().getZExtValue();
         Size = DL.getStructLayout(STy)->getElementOffset(OpValue);
-
-        if (Size)
-          Result = Builder->CreateAdd(Result, ConstantInt::get(IntIdxTy, Size),
-                                      GEP->getName().str()+".offs");
-        continue;
+        if (!Size)
+          continue;
+
+        Offset = ConstantInt::get(IntIdxTy, Size);
+      } else {
+        // Splat the constant if needed.
+        if (IntIdxTy->isVectorTy() && !OpC->getType()->isVectorTy())
+          OpC = ConstantVector::getSplat(
+              cast<VectorType>(IntIdxTy)->getElementCount(), OpC);
+
+        Constant *Scale = ConstantInt::get(IntIdxTy, Size);
+        Constant *OC =
+            ConstantExpr::getIntegerCast(OpC, IntIdxTy, true /*SExt*/);
+        Offset =
+            ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/);
       }
-
-      // Splat the constant if needed.
-      if (IntIdxTy->isVectorTy() && !OpC->getType()->isVectorTy())
-        OpC = ConstantVector::getSplat(
-            cast<VectorType>(IntIdxTy)->getElementCount(), OpC);
-
-      Constant *Scale = ConstantInt::get(IntIdxTy, Size);
-      Constant *OC = ConstantExpr::getIntegerCast(OpC, IntIdxTy, true /*SExt*/);
-      Scale =
-          ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/);
-      // Emit an add instruction.
-      Result = Builder->CreateAdd(Result, Scale, GEP->getName().str()+".offs");
-      continue;
-    }
-
-    // Splat the index if needed.
-    if (IntIdxTy->isVectorTy() && !Op->getType()->isVectorTy())
-      Op = Builder->CreateVectorSplat(
-          cast<FixedVectorType>(IntIdxTy)->getNumElements(), Op);
-
-    // Convert to correct type.
-    if (Op->getType() != IntIdxTy)
-      Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName().str()+".c");
-    if (Size != 1) {
-      // We'll let instcombine(mul) convert this to a shl if possible.
-      Op = Builder->CreateMul(Op, ConstantInt::get(IntIdxTy, Size),
-                              GEP->getName().str() + ".idx", false /*NUW*/,
-                              isInBounds /*NSW*/);
+    } else {
+      // Splat the index if needed.
+      if (IntIdxTy->isVectorTy() && !Op->getType()->isVectorTy())
+        Op = Builder->CreateVectorSplat(
+            cast<FixedVectorType>(IntIdxTy)->getNumElements(), Op);
+
+      // Convert to correct type.
+      if (Op->getType() != IntIdxTy)
+        Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName().str()+".c");
+      if (Size != 1) {
+        // We'll let instcombine(mul) convert this to a shl if possible.
+        Op = Builder->CreateMul(Op, ConstantInt::get(IntIdxTy, Size),
+                                GEP->getName().str() + ".idx", false /*NUW*/,
+                                isInBounds /*NSW*/);
+      }
+      Offset = Op;
     }
 
-    // Emit an add instruction.
-    Result = Builder->CreateAdd(Op, Result, GEP->getName().str()+".offs");
+    if (Result)
+      Result = Builder->CreateAdd(Result, Offset, GEP->getName().str()+".offs",
+                                  false /*NUW*/, isInBounds /*NSW*/);
+    else
+      Result = Offset;
   }
-  return Result;
+  return Result ? Result : Constant::getNullValue(IntIdxTy);
 }
 
 }
diff --git a/llvm/include/llvm/BinaryFormat/WasmRelocs.def b/llvm/include/llvm/BinaryFormat/WasmRelocs.def
index 4eb12684eaa6bb..dca63eca945530 100644
--- a/llvm/include/llvm/BinaryFormat/WasmRelocs.def
+++ b/llvm/include/llvm/BinaryFormat/WasmRelocs.def
@@ -24,3 +24,4 @@ WASM_RELOC(R_WASM_TABLE_INDEX_SLEB64,     18)
 WASM_RELOC(R_WASM_TABLE_INDEX_I64,        19)
 WASM_RELOC(R_WASM_TABLE_NUMBER_LEB,       20)
 WASM_RELOC(R_WASM_MEMORY_ADDR_TLS_SLEB,   21)
+WASM_RELOC(R_WASM_FUNCTION_OFFSET_I64,    22)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index e337a835b4fab3..8f1cd1a1408a89 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -53,6 +53,24 @@ struct ConstantMatch {
 
 inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); }
 
+/// Matcher for a specific constant value.
+struct SpecificConstantMatch {
+  int64_t RequestedVal;
+  SpecificConstantMatch(int64_t RequestedVal) : RequestedVal(RequestedVal) {}
+  bool match(const MachineRegisterInfo &MRI, Register Reg) {
+    int64_t MatchedVal;
+    return mi_match(Reg, MRI, m_ICst(MatchedVal)) && MatchedVal == RequestedVal;
+  }
+};
+
+/// Matches a constant equal to \p RequestedValue.
+inline SpecificConstantMatch m_SpecificICst(int64_t RequestedValue) {
+  return SpecificConstantMatch(RequestedValue);
+}
+
+/// Matches an integer 0.
+inline SpecificConstantMatch m_ZeroInt() { return SpecificConstantMatch(0); }
+
 // TODO: Rework this for different kinds of MachineOperand.
 // Currently assumes the Src for a match is a register.
 // We might want to support taking in some MachineOperands and call getReg on
@@ -425,6 +443,14 @@ m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2) {
                          TargetOpcode::G_INSERT_VECTOR_ELT>(Src0, Src1, Src2);
 }
 
+/// Matches a register negated by a G_SUB.
+/// G_SUB 0, %negated_reg
+template <typename SrcTy>
+inline BinaryOp_match<SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB>
+m_Neg(const SrcTy &&Src) {
+  return m_GSub(m_ZeroInt(), Src);
+}
+
 } // namespace GMIPatternMatch
 } // namespace llvm
 
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index ac994548c506c9..fa5000d4248284 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1422,6 +1422,14 @@ let TargetPrefix = "ppc" in {
   def int_ppc_mma_xxsetaccz :
         Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>;
 
+  def int_ppc_mma_lxvp :
+        Intrinsic<[llvm_v256i1_ty], [llvm_ptr_ty],
+                  [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_ppc_mma_stxvp :
+        Intrinsic<[], [llvm_v256i1_ty, llvm_ptr_ty],
+                  [IntrWriteMem, IntrArgMemOnly]>;
+
   // MMA Reduced-Precision: Outer Product Intrinsic Definitions.
   defm int_ppc_mma_xvi4ger8 :
         PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 6f2b1f71faee4e..7a8f54bd0c6e9c 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3437,12 +3437,12 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
   // flow and the no-overflow bits may not be valid for the expression in any
   // context. This can be fixed similarly to how these flags are handled for
   // adds.
-  SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW
-                                             : SCEV::FlagAnyWrap;
+  SCEV::NoWrapFlags OffsetWrap =
+      GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
 
   Type *CurTy = GEP->getType();
   bool FirstIter = true;
-  SmallVector<const SCEV *, 4> AddOps{BaseExpr};
+  SmallVector<const SCEV *, 4> Offsets;
   for (const SCEV *IndexExpr : IndexExprs) {
     // Compute the (potentially symbolic) offset in bytes for this index.
     if (StructType *STy = dyn_cast<StructType>(CurTy)) {
@@ -3450,7 +3450,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
       ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
       unsigned FieldNo = Index->getZExtValue();
       const SCEV *FieldOffset = getOffsetOfExpr(IntIdxTy, STy, FieldNo);
-      AddOps.push_back(FieldOffset);
+      Offsets.push_back(FieldOffset);
 
       // Update CurTy to the type of the field at Index.
       CurTy = STy->getTypeAtIndex(Index);
@@ -3470,13 +3470,23 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
       IndexExpr = getTruncateOrSignExtend(IndexExpr, IntIdxTy);
 
       // Multiply the index by the element size to compute the element offset.
-      const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
-      AddOps.push_back(LocalOffset);
+      const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, OffsetWrap);
+      Offsets.push_back(LocalOffset);
     }
   }
 
-  // Add the base and all the offsets together.
-  return getAddExpr(AddOps, Wrap);
+  // Handle degenerate case of GEP without offsets.
+  if (Offsets.empty())
+    return BaseExpr;
+
+  // Add the offsets together, assuming nsw if inbounds.
+  const SCEV *Offset = getAddExpr(Offsets, OffsetWrap);
+  // Add the base address and the offset. We cannot use the nsw flag, as the
+  // base address is unsigned. However, if we know that the offset is
+  // non-negative, we can use nuw.
+  SCEV::NoWrapFlags BaseWrap = GEP->isInBounds() && isKnownNonNegative(Offset)
+                                   ? SCEV::FlagNUW : SCEV::FlagAnyWrap;
+  return getAddExpr(BaseExpr, Offset, BaseWrap);
 }
 
 std::tuple<SCEV *, FoldingSetNodeID, void *>
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 0a18718fd9b54d..e6d3727a239081 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1289,9 +1289,6 @@ static void computeKnownBitsFromOperator(const Operator *I,
     APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true);
 
     gep_type_iterator GTI = gep_type_begin(I);
-    // If the inbounds keyword is not present, the offsets are added to the
-    // base address with silently-wrapping two’s complement arithmetic.
-    bool IsInBounds = cast<GEPOperator>(I)->isInBounds();
     for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
       // TrailZ can only become smaller, short-circuit if we hit zero.
       if (Known.isUnknown())
@@ -1356,17 +1353,17 @@ static void computeKnownBitsFromOperator(const Operator *I,
       // to the width of the pointer.
       IndexBits = IndexBits.sextOrTrunc(BitWidth);
 
+      // Note that inbounds does *not* guarantee nsw for the addition, as only
+      // the offset is signed, while the base address is unsigned.
       Known = KnownBits::computeForAddSub(
-          /*Add=*/true,
-          /*NSW=*/IsInBounds, Known, IndexBits);
+          /*Add=*/true, /*NSW=*/false, Known, IndexBits);
     }
     if (!Known.isUnknown() && !AccConstIndices.isNullValue()) {
       KnownBits Index(BitWidth);
       Index.Zero = ~AccConstIndices;
       Index.One = AccConstIndices;
       Known = KnownBits::computeForAddSub(
-          /*Add=*/true,
-          /*NSW=*/IsInBounds, Known, Index);
+          /*Add=*/true, /*NSW=*/false, Known, Index);
     }
     break;
   }
diff --git a/llvm/lib/BinaryFormat/Wasm.cpp b/llvm/lib/BinaryFormat/Wasm.cpp
index a22ab5890922f7..126680ac41c241 100644
--- a/llvm/lib/BinaryFormat/Wasm.cpp
+++ b/llvm/lib/BinaryFormat/Wasm.cpp
@@ -50,6 +50,7 @@ bool llvm::wasm::relocTypeHasAddend(uint32_t Type) {
   case R_WASM_MEMORY_ADDR_I64:
   case R_WASM_MEMORY_ADDR_TLS_SLEB:
   case R_WASM_FUNCTION_OFFSET_I32:
+  case R_WASM_FUNCTION_OFFSET_I64:
   case R_WASM_SECTION_OFFSET_I32:
     return true;
   default:
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 32bdba442b56ea..79f74a47d83c84 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2614,9 +2614,7 @@ bool CombinerHelper::matchSimplifyAddToSub(
   // ((0-A) + B) -> B - A
   // (A + (0-B)) -> A - B
   auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
-    int64_t Cst;
-    if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) ||
-        Cst != 0)
+    if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
       return false;
     NewLHS = MaybeNewLHS;
     return true;
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index a72f9f66560cbe..4d9580de3b7bb6 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -682,9 +682,7 @@ static bool isBuildVectorConstantSplat(const MachineInstr &MI,
   const unsigned NumOps = MI.getNumOperands();
   for (unsigned I = 1; I != NumOps; ++I) {
     Register Element = MI.getOperand(I).getReg();
-    int64_t ElementValue;
-    if (!mi_match(Element, MRI, m_ICst(ElementValue)) ||
-        ElementValue != SplatValue)
+    if (!mi_match(Element, MRI, m_SpecificICst(SplatValue)))
       return false;
   }
 
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index f69b96a295bfc7..6f9268fbb4ecc3 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -345,7 +345,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
     if (!MDO)
       return nullptr;
     auto V = cast<ValueAsMetadata>(MDO);
-    const Function *F = cast<Function>(V->getValue());
+    const Function *F = cast<Function>(V->getValue()->stripPointerCasts());
     return TM->getSymbol(F);
   };
 
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 907993c400d9c0..6c57369f8d8007 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -1578,8 +1578,8 @@ void Verifier::visitModuleFlagCGProfileEntry(const MDOperand &MDO) {
     if (!FuncMDO)
       return;
     auto F = dyn_cast<ValueAsMetadata>(FuncMDO);
-    Assert(F && isa<Function>(F->getValue()), "expected a Function or null",
-           FuncMDO);
+    Assert(F && isa<Function>(F->getValue()->stripPointerCasts()),
+           "expected a Function or null", FuncMDO);
   };
   auto Node = dyn_cast_or_null<MDNode>(MDO);
   Assert(Node && Node->getNumOperands() == 3, "expected a MDNode triple", MDO);
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index 6bcec5f4c37c6b..0cf6e310ae3224 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -478,6 +478,7 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
   // Currently only supported for for metadata sections.
   // See: test/MC/WebAssembly/blockaddress.ll
   if (Type == wasm::R_WASM_FUNCTION_OFFSET_I32 ||
+      Type == wasm::R_WASM_FUNCTION_OFFSET_I64 ||
       Type == wasm::R_WASM_SECTION_OFFSET_I32) {
     if (!FixupSection.getKind().isMetadata())
       report_fatal_error("relocations for function or section offsets are "
@@ -564,6 +565,7 @@ WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry,
     assert(WasmIndices.count(RelEntry.Symbol) > 0 && "symbol not found in wasm index space");
     return WasmIndices[RelEntry.Symbol];
   case wasm::R_WASM_FUNCTION_OFFSET_I32:
+  case wasm::R_WASM_FUNCTION_OFFSET_I64:
   case wasm::R_WASM_SECTION_OFFSET_I32: {
     const auto &Section =
         static_cast<const MCSectionWasm &>(RelEntry.Symbol->getSection());
@@ -680,6 +682,7 @@ void WasmObjectWriter::applyRelocations(
       break;
     case wasm::R_WASM_TABLE_INDEX_I64:
     case wasm::R_WASM_MEMORY_ADDR_I64:
+    case wasm::R_WASM_FUNCTION_OFFSET_I64:
       patchI64(Stream, Value, Offset);
       break;
     case wasm::R_WASM_TABLE_INDEX_SLEB:
diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp
index d56e7be7043cde..f39262bc906102 100644
--- a/llvm/lib/Object/RelocationResolver.cpp
+++ b/llvm/lib/Object/RelocationResolver.cpp
@@ -566,6 +566,7 @@ static bool supportsWasm64(uint64_t Type) {
   case wasm::R_WASM_MEMORY_ADDR_I64:
   case wasm::R_WASM_TABLE_INDEX_SLEB64:
   case wasm::R_WASM_TABLE_INDEX_I64:
+  case wasm::R_WASM_FUNCTION_OFFSET_I64:
     return true;
   default:
     return supportsWasm32(Type);
@@ -601,6 +602,7 @@ static uint64_t resolveWasm64(RelocationRef R, uint64_t S, uint64_t A) {
   case wasm::R_WASM_MEMORY_ADDR_I64:
   case wasm::R_WASM_TABLE_INDEX_SLEB64:
   case wasm::R_WASM_TABLE_INDEX_I64:
+  case wasm::R_WASM_FUNCTION_OFFSET_I64:
     // For wasm section, its offset at 0 -- ignoring Value
     return A;
   default:
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index 63f2742e9215ff..82aa1f527fb873 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -876,6 +876,12 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
                                               object_error::parse_failed);
       Reloc.Addend = readVarint32(Ctx);
       break;
+    case wasm::R_WASM_FUNCTION_OFFSET_I64:
+      if (!isValidFunctionSymbol(Reloc.Index))
+        return make_error<GenericBinaryError>("Bad relocation function index",
+                                              object_error::parse_failed);
+      Reloc.Addend = readVarint64(Ctx);
+      break;
     case wasm::R_WASM_SECTION_OFFSET_I32:
       if (!isValidSectionSymbol(Reloc.Index))
         return make_error<GenericBinaryError>("Bad relocation section index",
@@ -903,7 +909,8 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
         Reloc.Type == wasm::R_WASM_GLOBAL_INDEX_I32)
       Size = 4;
     if (Reloc.Type == wasm::R_WASM_TABLE_INDEX_I64 ||
-        Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I64)
+        Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I64 ||
+        Reloc.Type == wasm::R_WASM_FUNCTION_OFFSET_I64)
       Size = 8;
     if (Reloc.Offset + Size > EndOffset)
       return make_error<GenericBinaryError>("Bad relocation offset",
diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp
index 81e24bfdaf8454..7b21d56be5030b 100644
--- a/llvm/lib/ObjectYAML/WasmEmitter.cpp
+++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp
@@ -547,6 +547,7 @@ void WasmWriter::writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec,
     case wasm::R_WASM_MEMORY_ADDR_I32:
     case wasm::R_WASM_MEMORY_ADDR_I64:
     case wasm::R_WASM_FUNCTION_OFFSET_I32:
+    case wasm::R_WASM_FUNCTION_OFFSET_I64:
     case wasm::R_WASM_SECTION_OFFSET_I32:
       encodeULEB128(Reloc.Addend, OS);
     }
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 218aa19d548eb6..1d5791399014c7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -1010,12 +1010,29 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True,
 
   // By default, we'll try and emit a CSEL.
   unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
+  bool Optimized = false;
+  auto TryOptNegIntoSelect = [&Opc, &False, Is32Bit, &MRI]() {
+    // Attempt to fold:
+    //
+    // sub = G_SUB 0, x
+    // select = G_SELECT cc, true, sub
+    //
+    // Into:
+    // select = CSNEG true, x, cc
+    if (!mi_match(False, MRI, m_Neg(m_Reg(False))))
+      return false;
+    Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
+    return true;
+  };
 
   // Helper lambda which tries to use CSINC/CSINV for the instruction when its
   // true/false values are constants.
   // FIXME: All of these patterns already exist in tablegen. We should be
   // able to import these.
-  auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI]() {
+  auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
+                          &Optimized]() {
+    if (Optimized)
+      return false;
     auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
     auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
     if (!TrueCst && !FalseCst)
@@ -1083,25 +1100,13 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True,
     return false;
   };
 
-  TryOptSelectCst();
+  Optimized |= TryOptNegIntoSelect();
+  Optimized |= TryOptSelectCst();
   auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
   constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
   return &*SelectInst;
 }
 
-/// Returns true if \p P is an unsigned integer comparison predicate.
-static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
-  switch (P) {
-  default:
-    return false;
-  case CmpInst::ICMP_UGT:
-  case CmpInst::ICMP_UGE:
-  case CmpInst::ICMP_ULT:
-  case CmpInst::ICMP_ULE:
-    return true;
-  }
-}
-
 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
   switch (P) {
   default:
@@ -1837,10 +1842,7 @@ bool AArch64InstructionSelector::convertPtrAddToAdd(
   // Also take the opportunity here to try to do some optimization.
   // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
   Register NegatedReg;
-  int64_t Cst;
-  if (!mi_match(I.getOperand(2).getReg(), MRI,
-                m_GSub(m_ICst(Cst), m_Reg(NegatedReg))) ||
-      Cst != 0)
+  if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
     return true;
   I.getOperand(2).setReg(NegatedReg);
   I.setDesc(TII.get(TargetOpcode::G_SUB));
@@ -4382,7 +4384,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
   // Produce this if the compare is signed:
   //
   // tst x, y
-  if (!isUnsignedICMPPred(P) && LHSDef &&
+  if (!CmpInst::isUnsigned(P) && LHSDef &&
       LHSDef->getOpcode() == TargetOpcode::G_AND) {
     // Make sure that the RHS is 0.
     auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 645e85388490e0..43f28729baa1fc 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -291,8 +291,7 @@ static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
     return false;
 
   // Match the index constant 0.
-  int64_t Index = 0;
-  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
+  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
     return false;
 
   MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 638ec0e7ce0607..e5cbcb3ccdb76e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -633,7 +633,6 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
 
   Register ShiftSrc0;
   Register ShiftSrc1;
-  int64_t ShiftAmt;
 
   // With multiple uses of the shift, this will duplicate the shift and
   // increase register pressure.
@@ -645,14 +644,11 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
   // (build_vector_trunc $src0, $src1)
   //  => (S_PACK_LL_B32_B16 $src0, $src1)
 
-  // FIXME: This is an inconvenient way to check a specific value
   bool Shift0 = mi_match(
-    Src0, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc0), m_ICst(ShiftAmt)))) &&
-    ShiftAmt == 16;
+      Src0, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc0), m_SpecificICst(16))));
 
   bool Shift1 = mi_match(
-    Src1, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc1), m_ICst(ShiftAmt)))) &&
-    ShiftAmt == 16;
+      Src1, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc1), m_SpecificICst(16))));
 
   unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
   if (Shift0 && Shift1) {
@@ -3474,9 +3470,7 @@ static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) {
   if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
     return false;
 
-  int64_t MergeRHS;
-  if (mi_match(Def->getOperand(2).getReg(), MRI, m_ICst(MergeRHS)) &&
-      MergeRHS == 0) {
+  if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) {
     return Def->getOperand(1).getReg();
   }
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index c67cf897c397c2..a66a015ac2efe2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -293,6 +293,13 @@ namespace {
                                               Align(16));
     }
 
+    /// SelectAddrImmX34 - Returns true if the address N can be represented by
+    /// a base register plus a signed 34-bit displacement. Suitable for use by
+    /// PSTXVP and friends.
+    bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
+      return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
+    }
+
     // Select an address into a single register.
     bool SelectAddr(SDValue N, SDValue &Base) {
       Base = N;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5b5504a458ed93..2a77d53a78177d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2399,6 +2399,20 @@ bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
   return false;
 }
 
+/// isIntS34Immediate - This method tests if value of node given can be
+/// accurately represented as a sign extension from a 34-bit value.  If so,
+/// this returns true and the immediate.
+bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
+  if (!isa<ConstantSDNode>(N))
+    return false;
+
+  Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+  return isInt<34>(Imm);
+}
+bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
+  return isIntS34Immediate(Op.getNode(), Imm);
+}
+
 /// SelectAddressRegReg - Given the specified addressed, check to see if it
 /// can be represented as an indexed [r+r] operation.  Returns false if it
 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
@@ -2599,6 +2613,55 @@ bool PPCTargetLowering::SelectAddressRegImm(
   return true;      // [r+0]
 }
 
+/// Similar to the 16-bit case but for instructions that take a 34-bit
+/// displacement field (prefixed loads/stores).
+bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
+                                              SDValue &Base,
+                                              SelectionDAG &DAG) const {
+  // Only on 64-bit targets.
+  if (N.getValueType() != MVT::i64)
+    return false;
+
+  SDLoc dl(N);
+  int64_t Imm = 0;
+
+  if (N.getOpcode() == ISD::ADD) {
+    if (!isIntS34Immediate(N.getOperand(1), Imm))
+      return false;
+    Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
+    if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
+      Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+    else
+      Base = N.getOperand(0);
+    return true;
+  }
+
+  if (N.getOpcode() == ISD::OR) {
+    if (!isIntS34Immediate(N.getOperand(1), Imm))
+      return false;
+    // If this is an or of disjoint bitfields, we can codegen this as an add
+    // (for better address arithmetic) if the LHS and RHS of the OR are
+    // provably disjoint.
+    KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
+    if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
+      return false;
+    if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
+      Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+    else
+      Base = N.getOperand(0);
+    Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
+    return true;
+  }
+
+  if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
+    Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
+    Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
+    return true;
+  }
+
+  return false;
+}
+
 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
 /// represented as an indexed [r+r] operation.
 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 70dcef4658ff91..ca7c68624c68aa 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -770,6 +770,8 @@ namespace llvm {
     bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
                              SelectionDAG &DAG,
                              MaybeAlign EncodingAlignment) const;
+    bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base,
+                               SelectionDAG &DAG) const;
 
     /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
     /// represented as an indexed [r+r] operation.
@@ -1325,6 +1327,8 @@ namespace llvm {
 
   bool isIntS16Immediate(SDNode *N, int16_t &Imm);
   bool isIntS16Immediate(SDValue Op, int16_t &Imm);
+  bool isIntS34Immediate(SDNode *N, int64_t &Imm);
+  bool isIntS34Immediate(SDValue Op, int64_t &Imm);
 
   bool convertToNonDenormSingle(APInt &ArgAPInt);
   bool convertToNonDenormSingle(APFloat &ArgAPFloat);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 1a128f1ddf0d83..2e77d04d4a79e2 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1031,11 +1031,13 @@ def pred : Operand<OtherVT> {
 // Define PowerPC specific addressing mode.
 
 // d-form
-def iaddr    : ComplexPattern<iPTR, 2, "SelectAddrImm",     [], []>;  // "stb"
+def iaddr    : ComplexPattern<iPTR, 2, "SelectAddrImm",     [], []>; // "stb"
 // ds-form
-def iaddrX4  : ComplexPattern<iPTR, 2, "SelectAddrImmX4",   [], []>;  // "std"
+def iaddrX4  : ComplexPattern<iPTR, 2, "SelectAddrImmX4",   [], []>; // "std"
 // dq-form
-def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16",  [], []>;  // "stxv"
+def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16",  [], []>; // "stxv"
+// 8LS:d-form
+def iaddrX34 : ComplexPattern<iPTR, 2, "SelectAddrImmX34",  [], []>; // "pstxvp"
 
 // Below forms are all x-form addressing mode, use three different ones so we
 // can make a accurate check for x-form instructions in ISEL.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 9f20bfcebe3c90..e1b76bb3bd0016 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1654,6 +1654,24 @@ let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] i
                                 "pstxvp $XTp, $D_RA", IIC_LdStLFD>;
 }
 
+let Predicates = [PairedVectorMemops] in {
+  // Intrinsics for Paired Vector Loads.
+  def : Pat<(v256i1 (int_ppc_mma_lxvp iaddrX16:$src)), (LXVP memrix16:$src)>;
+  def : Pat<(v256i1 (int_ppc_mma_lxvp xaddrX16:$src)), (LXVPX xaddrX16:$src)>;
+  let Predicates = [PairedVectorMemops, PrefixInstrs] in {
+    def : Pat<(v256i1 (int_ppc_mma_lxvp iaddrX34:$src)), (PLXVP memri34:$src)>;
+  }
+  // Intrinsics for Paired Vector Stores.
+  def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, iaddrX16:$dst),
+            (STXVP $XSp, memrix16:$dst)>;
+  def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, xaddrX16:$dst),
+            (STXVPX $XSp, xaddrX16:$dst)>;
+  let Predicates = [PairedVectorMemops, PrefixInstrs] in {
+    def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, iaddrX34:$dst),
+              (PSTXVP $XSp, memri34:$dst)>;
+  }
+}
+
 // TODO: We have an added complexity of 500 here. This is only a temporary
 // solution to have tablegen consider these patterns first. The way we do
 // addressing for PowerPC is complex depending on available D form, X form, or
diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index 4f1825bfc1c50f..ccbaea88d2f1d3 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -60,6 +60,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
@@ -277,8 +278,11 @@ static Value *GetPointerOperand(Value *MemI) {
   } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
     return SMemI->getPointerOperand();
   } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
-    if (IMemI->getIntrinsicID() == Intrinsic::prefetch)
+    if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
+        IMemI->getIntrinsicID() == Intrinsic::ppc_mma_lxvp)
       return IMemI->getArgOperand(0);
+    if (IMemI->getIntrinsicID() == Intrinsic::ppc_mma_stxvp)
+      return IMemI->getArgOperand(1);
   }
 
   return nullptr;
@@ -345,9 +349,13 @@ SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
         MemI = SMemI;
         PtrValue = SMemI->getPointerOperand();
       } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(&J)) {
-        if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
+        if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
+            IMemI->getIntrinsicID() == Intrinsic::ppc_mma_lxvp) {
           MemI = IMemI;
           PtrValue = IMemI->getArgOperand(0);
+        } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_mma_stxvp) {
+          MemI = IMemI;
+          PtrValue = IMemI->getArgOperand(1);
         } else continue;
       } else continue;
 
@@ -827,6 +835,11 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
     if (ST && ST->hasAltivec() &&
         PtrValue->getType()->getPointerElementType()->isVectorTy())
       return false;
+    // There are no update forms for P10 lxvp/stxvp intrinsic.
+    auto *II = dyn_cast<IntrinsicInst>(I);
+    if (II && ((II->getIntrinsicID() == Intrinsic::ppc_mma_lxvp) ||
+               II->getIntrinsicID() == Intrinsic::ppc_mma_stxvp))
+      return false;
     // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
     // be 4's multiple (DS-form). For i64 loads/stores when the displacement
     // fits in a 16-bit signed field but isn't a multiple of 4, it will be
@@ -864,7 +877,13 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
   // Check if a load/store has DQ form.
   auto isDQFormCandidate = [&] (const Instruction *I, const Value *PtrValue) {
     assert((PtrValue && I) && "Invalid parameter!");
-    return !isa<IntrinsicInst>(I) && ST && ST->hasP9Vector() &&
+    // Check if it is a P10 lxvp/stxvp intrinsic.
+    auto *II = dyn_cast<IntrinsicInst>(I);
+    if (II)
+      return II->getIntrinsicID() == Intrinsic::ppc_mma_lxvp ||
+             II->getIntrinsicID() == Intrinsic::ppc_mma_stxvp;
+    // Check if it is a P9 vector load/store.
+    return ST && ST->hasP9Vector() &&
            (PtrValue->getType()->getPointerElementType()->isVectorTy());
   };
 
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index f3134460514cb4..be4f3354ede4b9 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1223,7 +1223,8 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
   case Intrinsic::ppc_vsx_lxvd2x_be:
   case Intrinsic::ppc_vsx_lxvw4x_be:
   case Intrinsic::ppc_vsx_lxvl:
-  case Intrinsic::ppc_vsx_lxvll: {
+  case Intrinsic::ppc_vsx_lxvll:
+  case Intrinsic::ppc_mma_lxvp: {
     Info.PtrVal = Inst->getArgOperand(0);
     Info.ReadMem = true;
     Info.WriteMem = false;
@@ -1239,7 +1240,8 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
   case Intrinsic::ppc_vsx_stxvd2x_be:
   case Intrinsic::ppc_vsx_stxvw4x_be:
   case Intrinsic::ppc_vsx_stxvl:
-  case Intrinsic::ppc_vsx_stxvll: {
+  case Intrinsic::ppc_vsx_stxvll:
+  case Intrinsic::ppc_mma_stxvp: {
     Info.PtrVal = Inst->getArgOperand(1);
     Info.ReadMem = false;
     Info.WriteMem = true;
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index 7cbe9e38fb47fb..c3d259e6ff2055 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -23,9 +23,7 @@ WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() = default; // anchor.
 
 WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T,
                                            const MCTargetOptions &Options) {
-  CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
-  // So far this is used for DWARF DW_AT_low_pc which is always 32-bit in Wasm.
-  CodePointerSize = 4;
+  CodePointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
 
   // TODO: What should MaxInstLength be?
 
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index f0f5d9834771ee..aa7e2311d24086 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -131,7 +131,7 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
     if (auto Section = static_cast<const MCSectionWasm *>(
             getFixupSection(Fixup.getValue()))) {
       if (Section->getKind().isText())
-        llvm_unreachable("unimplemented R_WASM_FUNCTION_OFFSET_I64");
+        return wasm::R_WASM_FUNCTION_OFFSET_I64;
       else if (!Section->isWasmData())
         llvm_unreachable("unimplemented R_WASM_SECTION_OFFSET_I64");
     }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index abeddf45f22c69..b8431a5a453212 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1672,13 +1672,11 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
   Value *Result = EmitGEPOffset(GEP1);
 
   // If this is a single inbounds GEP and the original sub was nuw,
-  // then the final multiplication is also nuw. We match an extra add zero
-  // here, because that's what EmitGEPOffset() generates.
-  Instruction *I;
-  if (IsNUW && !GEP2 && !Swapped && GEP1->isInBounds() &&
-      match(Result, m_Add(m_Instruction(I), m_Zero())) &&
-      I->getOpcode() == Instruction::Mul)
-    I->setHasNoUnsignedWrap();
+  // then the final multiplication is also nuw.
+  if (auto *I = dyn_cast<Instruction>(Result))
+    if (IsNUW && !GEP2 && !Swapped && GEP1->isInBounds() &&
+        I->getOpcode() == Instruction::Mul)
+      I->setHasNoUnsignedWrap();
 
   // If we had a constant expression GEP on the other side offsetting the
   // pointer, subtract it from the offset we have.
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fix.ll b/llvm/test/Analysis/CostModel/X86/arith-fix.ll
index 57648fead67ea0..6c89c14eed97fb 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fix.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fix.ll
@@ -1,15 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQ
 ;
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2
 
 declare i64        @llvm.smul.fix.i64(i64, i64, i32)
 declare <2 x i64>  @llvm.smul.fix.v2i64(<2 x i64>, <2 x i64>, i32)
@@ -31,8 +31,6 @@ declare <16 x i8>  @llvm.smul.fix.v16i8(<16 x i8>, <16 x i8>, i32)
 declare <32 x i8>  @llvm.smul.fix.v32i8(<32 x i8>, <32 x i8>, i32)
 declare <64 x i8>  @llvm.smul.fix.v64i8(<64 x i8>, <64 x i8>, i32)
 
-; CHECK: {{^}}
-
 define i32 @smul(i32 %arg) {
 ; SSSE3-LABEL: 'smul'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3)
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fma.ll b/llvm/test/Analysis/CostModel/X86/arith-fma.ll
index 2fb72c1bc0a919..99864faa037b78 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fma.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fma.ll
@@ -1,11 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+xop | FileCheck %s --check-prefixes=CHECK,XOP
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 --allow-unused-prefixes
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 --allow-unused-prefixes
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F --allow-unused-prefixes
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW --allow-unused-prefixes
-
-; CHECK: {{^}}
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+xop | FileCheck %s --check-prefixes=XOP
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx | FileCheck %s --check-prefixes=AVX
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx2 | FileCheck %s --check-prefixes=AVX
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx512f | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512
 
 define i32 @fma(i32 %arg) {
 ; XOP-LABEL: 'fma'
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll
index fe1e48a993c133..d5d47a99e91996 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll
@@ -1,17 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,SSE1
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F --allow-unused-prefixes
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW --allow-unused-prefixes
-;
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
-; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,BTVER2
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=-sse2 | FileCheck %s --check-prefixes=SSE1
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512
+;
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1
 
-; CHECK: {{^}}
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
@@ -103,17 +102,6 @@ define i32 @fadd(i32 %arg) {
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; BTVER2-LABEL: 'fadd'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %F32 = fadd float undef, undef
   %V4F32 = fadd <4 x float> undef, undef
@@ -216,17 +204,6 @@ define i32 @fsub(i32 %arg) {
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; BTVER2-LABEL: 'fsub'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %F32 = fsub float undef, undef
   %V4F32 = fsub <4 x float> undef, undef
@@ -329,17 +306,6 @@ define i32 @fneg_idiom(i32 %arg) {
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; BTVER2-LABEL: 'fneg_idiom'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %F32 = fsub float -0.0, undef
   %V4F32 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, undef
@@ -533,17 +499,6 @@ define i32 @fmul(i32 %arg) {
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; BTVER2-LABEL: 'fmul'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %F32 = fmul float undef, undef
   %V4F32 = fmul <4 x float> undef, undef
@@ -646,17 +601,6 @@ define i32 @fdiv(i32 %arg) {
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 260 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; BTVER2-LABEL: 'fdiv'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %F32 = fdiv float undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = fdiv <4 x float> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = fdiv <8 x float> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = fdiv <16 x float> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %F64 = fdiv double undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = fdiv <2 x double> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = fdiv <4 x double> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = fdiv <8 x double> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %F32 = fdiv float undef, undef
   %V4F32 = fdiv <4 x float> undef, undef
@@ -850,17 +794,6 @@ define i32 @fsqrt(i32 %arg) {
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 268 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; BTVER2-LABEL: 'fsqrt'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %F32 = call float @llvm.sqrt.f32(float undef)
   %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index cfb52d3c4c9ffa..fac508f592bb27 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -1,17 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ
-;
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2
-
-; CHECK: {{^}}
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQ
+;
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2
 
 ;
 ; sadd.with.overflow
diff --git a/llvm/test/Analysis/CostModel/X86/arith-sminmax.ll b/llvm/test/Analysis/CostModel/X86/arith-sminmax.ll
index 0eca3160c726aa..8e9ef5bba0bb52 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-sminmax.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-sminmax.ll
@@ -1,14 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+ssse3 | FileCheck %s -check-prefixes=CHECK,SSSE3
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,SSE42
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=CHECK,AVX1
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=CHECK,AVX512F
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512dq | FileCheck %s -check-prefixes=CHECK,AVX512DQ
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw | FileCheck %s -check-prefixes=CHECK,AVX512BW
-
-; CHECK: {{^}}
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+ssse3 | FileCheck %s -check-prefixes=SSSE3
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512dq | FileCheck %s -check-prefixes=AVX512DQ
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw | FileCheck %s -check-prefixes=AVX512BW
 
 declare i64        @llvm.smax.i64(i64, i64)
 declare <2 x i64>  @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
diff --git a/llvm/test/Analysis/CostModel/X86/arith-ssat.ll b/llvm/test/Analysis/CostModel/X86/arith-ssat.ll
index 15b48e4ad62bda..783add32f6c136 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-ssat.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-ssat.ll
@@ -1,17 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQ
 ;
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2
-
-; CHECK: {{^}}
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/llvm/test/Analysis/CostModel/X86/arith-uminmax.ll b/llvm/test/Analysis/CostModel/X86/arith-uminmax.ll
index 909b624457bb34..b8e748d3fda014 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-uminmax.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-uminmax.ll
@@ -1,14 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+ssse3 | FileCheck %s -check-prefixes=CHECK,SSSE3
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,SSE42
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=CHECK,AVX1
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=CHECK,AVX512F
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512dq | FileCheck %s -check-prefixes=CHECK,AVX512DQ
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw | FileCheck %s -check-prefixes=CHECK,AVX512BW
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+ssse3 | FileCheck %s -check-prefixes=SSSE3
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512dq | FileCheck %s -check-prefixes=AVX512DQ
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw | FileCheck %s -check-prefixes=AVX512BW
 
-; CHECK: {{^}}
 declare i64        @llvm.umax.i64(i64, i64)
 declare <2 x i64>  @llvm.umax.v2i64(<2 x i64>, <2 x i64>)
 declare <4 x i64>  @llvm.umax.v4i64(<4 x i64>, <4 x i64>)
diff --git a/llvm/test/Analysis/CostModel/X86/arith-usat.ll b/llvm/test/Analysis/CostModel/X86/arith-usat.ll
index e7b2b413aa1ced..8c0e818c20c233 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-usat.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-usat.ll
@@ -1,17 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQ
 ;
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2
 
-; CHECK: {{^}}
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/llvm/test/Analysis/CostModel/X86/arith.ll b/llvm/test/Analysis/CostModel/X86/arith.ll
index fdc3a519e1d6fb..f0bf501f605e71 100644
--- a/llvm/test/Analysis/CostModel/X86/arith.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith.ll
@@ -1,17 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQ
 ;
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SLM
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,GLM
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,BTVER2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,BTVER2
 
-; CHECK: {{^}}
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
diff --git a/llvm/test/Analysis/CostModel/X86/bitreverse.ll b/llvm/test/Analysis/CostModel/X86/bitreverse.ll
index ddbcadb19fdbe6..2497c4bc4fa14c 100644
--- a/llvm/test/Analysis/CostModel/X86/bitreverse.ll
+++ b/llvm/test/Analysis/CostModel/X86/bitreverse.ll
@@ -1,20 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,X86,SSE2
-; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,X86,SSE42
-; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=CHECK,X86,AVX,AVX1
-; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,X86,AVX,AVX2
-; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=CHECK,X86,AVX512,AVX512F
-; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,avx512bw,avx512dq | FileCheck %s -check-prefixes=CHECK,X86,AVX512,AVX512BW
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,X64,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,X64,SSE42
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=CHECK,X64,AVX,AVX1
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,X64,AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=CHECK,X64,AVX512,AVX512F
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=CHECK,X64,AVX512,AVX512BW
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop | FileCheck %s -check-prefixes=CHECK,XOP
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx2 | FileCheck %s -check-prefixes=CHECK,XOP
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=X86,SSE2
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=X86,SSE42
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=X86,AVX,AVX1
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=X86,AVX,AVX2
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=X86,AVX512,AVX512F
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,avx512bw,avx512dq | FileCheck %s -check-prefixes=X86,AVX512,AVX512BW
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=X64,SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=X64,SSE42
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=X64,AVX,AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=X64,AVX,AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=X64,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=X64,AVX512,AVX512BW
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop | FileCheck %s -check-prefixes=XOP
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx2 | FileCheck %s -check-prefixes=XOP
 
-; CHECK: {{^}}
 ; Verify the cost of scalar bitreverse instructions.
 
 declare i64 @llvm.bitreverse.i64(i64)
diff --git a/llvm/test/Analysis/CostModel/X86/bswap.ll b/llvm/test/Analysis/CostModel/X86/bswap.ll
index 69dd328a521934..c8d1ddf656a795 100644
--- a/llvm/test/Analysis/CostModel/X86/bswap.ll
+++ b/llvm/test/Analysis/CostModel/X86/bswap.ll
@@ -1,12 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,SSE42
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=CHECK,AVX,AVX1
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx | FileCheck %s -check-prefixes=CHECK,AVX,AVX1
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx2 | FileCheck %s -check-prefixes=CHECK,AVX,AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx | FileCheck %s -check-prefixes=AVX,AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx2 | FileCheck %s -check-prefixes=AVX,AVX2
 
-; CHECK: {{^}}
 ; Verify the cost of vector bswap instructions.
 
 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
diff --git a/llvm/test/Analysis/CostModel/X86/cast.ll b/llvm/test/Analysis/CostModel/X86/cast.ll
index abc0ea19cb8ce4..f388eecf26d6b6 100644
--- a/llvm/test/Analysis/CostModel/X86/cast.ll
+++ b/llvm/test/Analysis/CostModel/X86/cast.ll
@@ -1,13 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE41
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
 
-; CHECK: {{^}}
 define i32 @add(i32 %arg) {
 ; SSE-LABEL: 'add'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %A = zext <4 x i1> undef to <4 x i32>
diff --git a/llvm/test/Analysis/CostModel/X86/ctlz.ll b/llvm/test/Analysis/CostModel/X86/ctlz.ll
index 5bf2fc2f80a9dd..a897e3783eb530 100644
--- a/llvm/test/Analysis/CostModel/X86/ctlz.ll
+++ b/llvm/test/Analysis/CostModel/X86/ctlz.ll
@@ -1,15 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=-lzcnt,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2,NOLZCNT
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2,LZCNT
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+sse4.2 | FileCheck %s -check-prefixes=CHECK,LZCNT,SSE42
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX1
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx2 | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512f | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX512,AVX512F
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX512,AVX512BW
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq,+avx512cd | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX512CD
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=-lzcnt,+sse2 | FileCheck %s -check-prefixes=SSE2,NOLZCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+sse2 | FileCheck %s -check-prefixes=SSE2,LZCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+sse4.2 | FileCheck %s -check-prefixes=LZCNT,SSE42
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx | FileCheck %s -check-prefixes=LZCNT,AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx2 | FileCheck %s -check-prefixes=LZCNT,AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512f | FileCheck %s -check-prefixes=LZCNT,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=LZCNT,AVX512,AVX512BW
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq,+avx512cd | FileCheck %s -check-prefixes=LZCNT,AVX512CD
 
 ; Verify the cost of scalar leading zero count instructions.
-; CHECK: {{^}}
 
 declare i64 @llvm.ctlz.i64(i64, i1)
 declare i32 @llvm.ctlz.i32(i32, i1)
diff --git a/llvm/test/Analysis/CostModel/X86/ctpop.ll b/llvm/test/Analysis/CostModel/X86/ctpop.ll
index 84d4fe143cc486..d15255a1697d2f 100644
--- a/llvm/test/Analysis/CostModel/X86/ctpop.ll
+++ b/llvm/test/Analysis/CostModel/X86/ctpop.ll
@@ -1,14 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=-popcnt,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2,NOPOPCNT
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2,POPCNT
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+sse4.2 | FileCheck %s -check-prefixes=CHECK,POPCNT,SSE42
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx | FileCheck %s -check-prefixes=CHECK,POPCNT,AVX1
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx2 | FileCheck %s -check-prefixes=CHECK,POPCNT,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx512f | FileCheck %s -check-prefixes=CHECK,POPCNT,AVX512,AVX512F
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=CHECK,POPCNT,AVX512,AVX512BW
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=-popcnt,+sse2 | FileCheck %s -check-prefixes=SSE2,NOPOPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+sse2 | FileCheck %s -check-prefixes=SSE2,POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+sse4.2 | FileCheck %s -check-prefixes=POPCNT,SSE42
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx | FileCheck %s -check-prefixes=POPCNT,AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx2 | FileCheck %s -check-prefixes=POPCNT,AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx512f | FileCheck %s -check-prefixes=POPCNT,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=POPCNT,AVX512,AVX512BW
 
 ; Verify the cost of scalar population count instructions.
-; CHECK: {{^}}
 
 declare i64 @llvm.ctpop.i64(i64)
 declare i32 @llvm.ctpop.i32(i32)
diff --git a/llvm/test/Analysis/CostModel/X86/cttz.ll b/llvm/test/Analysis/CostModel/X86/cttz.ll
index 3091c2ed141135..24266fdfa2ad44 100644
--- a/llvm/test/Analysis/CostModel/X86/cttz.ll
+++ b/llvm/test/Analysis/CostModel/X86/cttz.ll
@@ -1,13 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=-bmi,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2,NOBMI
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2,BMI
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+sse4.2 | FileCheck %s -check-prefixes=CHECK,BMI,SSE42
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx | FileCheck %s -check-prefixes=CHECK,BMI,AVX,AVX1
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx2 | FileCheck %s -check-prefixes=CHECK,BMI,AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx512f | FileCheck %s -check-prefixes=CHECK,BMI,AVX512,AVX512F
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=CHECK,BMI,AVX512,AVX512BW
-
-; CHECK: {{^}}
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=-bmi,+sse2 | FileCheck %s -check-prefixes=SSE2,NOBMI
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+sse2 | FileCheck %s -check-prefixes=SSE2,BMI
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+sse4.2 | FileCheck %s -check-prefixes=BMI,SSE42
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx | FileCheck %s -check-prefixes=BMI,AVX,AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx2 | FileCheck %s -check-prefixes=BMI,AVX,AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx512f | FileCheck %s -check-prefixes=BMI,AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=BMI,AVX512,AVX512BW
+
 ; Verify the cost of scalar trailing zero count instructions.
 
 declare i64 @llvm.cttz.i64(i64, i1)
diff --git a/llvm/test/Analysis/CostModel/X86/div.ll b/llvm/test/Analysis/CostModel/X86/div.ll
index 608e15018e9a79..98a1d2380df9b0 100644
--- a/llvm/test/Analysis/CostModel/X86/div.ll
+++ b/llvm/test/Analysis/CostModel/X86/div.ll
@@ -11,7 +11,6 @@
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2
 
-; CHECK: {{^}}
 define i32 @sdiv() {
 ; CHECK-LABEL: 'sdiv'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, undef
diff --git a/llvm/test/Analysis/CostModel/X86/extend.ll b/llvm/test/Analysis/CostModel/X86/extend.ll
index 346073b00e1023..44325c1d6aa8e8 100644
--- a/llvm/test/Analysis/CostModel/X86/extend.ll
+++ b/llvm/test/Analysis/CostModel/X86/extend.ll
@@ -1,17 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
-;
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
+;
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2
 
-; CHECK: {{^}}
 define i32 @zext_vXi32() {
 ; SSE2-LABEL: 'zext_vXi32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64
diff --git a/llvm/test/Analysis/CostModel/X86/fcmp.ll b/llvm/test/Analysis/CostModel/X86/fcmp.ll
index ceac3606f6223c..00929d0d328ae1 100644
--- a/llvm/test/Analysis/CostModel/X86/fcmp.ll
+++ b/llvm/test/Analysis/CostModel/X86/fcmp.ll
@@ -1,19 +1,18 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,SSE3
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE41
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
-;
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512
+;
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX
 
-; CHECK: {{^}}
 define i32 @cmp_float_oeq(i32 %arg) {
 ; SSE2-LABEL: 'cmp_float_oeq'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp oeq float undef, undef
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll b/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll
index 5495280d4acdd2..d918eaa9e652b3 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll
@@ -9,7 +9,6 @@
 ; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512
 ; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512
 
-; CHECK: {{^}}
 define void @reduce_f64(double %arg) {
 ; SSE2-LABEL: 'reduce_f64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double %arg, <1 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll b/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll
index a3926eaec82d1d..e74f7ff02b2ba9 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll
@@ -9,7 +9,6 @@
 ; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512
 ; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512
 
-; CHECK: {{^}}
 define void @reduce_f64(double %arg) {
 ; SSE2-LABEL: 'reduce_f64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double %arg, <1 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll
index 1313c65a1e51ed..ee3cffa69896ef 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll
@@ -1,15 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE41
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE42
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
 
-; CHECK: {{^}}
 define i32 @reduce_i64(i32 %arg) {
 ; SSE2-LABEL: 'reduce_i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll
index 457d327de4f630..2a48aaf97c24cb 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll
@@ -1,15 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE41
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE42
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
 
-; CHECK: {{^}}
 define i32 @reduce_i64(i32 %arg) {
 ; SSE2-LABEL: 'reduce_i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll
index 7a4ae17f45cb91..7be97c184cd864 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll
@@ -1,15 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE41
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE42
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
 
-; CHECK: {{^}}
 define i32 @reduce_i64(i32 %arg) {
 ; SSE2-LABEL: 'reduce_i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll
index d665c07c403a56..4916a98ad962d3 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll
@@ -1,15 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE41
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE42
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
-; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
+; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
 
-; CHECK: {{^}}
 define i32 @reduce_i64(i32 %arg) {
 ; SSE2-LABEL: 'reduce_i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll
index 94034bfd6fbc09..3d459a90b839cd 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll
@@ -46,10 +46,10 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 ; CHECK-NEXT:    {0,+,1}<%for.body> Added Flags: <nusw>
 ; CHECK:         Expressions re-written:
 ; CHECK-NEXT:    [PSE]  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom:
-; CHECK-NEXT:      ((4 * (zext i32 {1,+,1}<%for.body> to i64))<nuw><nsw> + %a)<nsw>
+; CHECK-NEXT:      ((4 * (zext i32 {1,+,1}<%for.body> to i64))<nuw><nsw> + %a)<nuw>
 ; CHECK-NEXT:      --> {(4 + %a),+,4}<%for.body>
 ; CHECK-NEXT:    [PSE]  %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11:
-; CHECK-NEXT:      ((4 * (zext i32 {0,+,1}<%for.body> to i64))<nuw><nsw> + %b)<nsw>
+; CHECK-NEXT:      ((4 * (zext i32 {0,+,1}<%for.body> to i64))<nuw><nsw> + %b)<nuw>
 ; CHECK-NEXT:      --> {%b,+,4}<%for.body>
 define void @test1(i64 %x, i32* %a, i32* %b) {
 entry:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll b/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
index 34dddbe5cc1b33..e6416cfb491184 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
@@ -97,11 +97,11 @@ for.end:                                          ; preds = %for.body
 ; CHECK-NEXT:   Grouped accesses:
 ; CHECK-NEXT:    Group {{.*}}[[ZERO]]:
 ; CHECK-NEXT:       (Low: %c High: (80 + %c))
-; CHECK-NEXT:         Member: {(2 + %c)<nsw>,+,4}
+; CHECK-NEXT:         Member: {(2 + %c)<nuw>,+,4}
 ; CHECK-NEXT:         Member: {%c,+,4}
 ; CHECK-NEXT:     Group {{.*}}[[ONE]]:
 ; CHECK-NEXT:       (Low: %a High: (42 + %a))
-; CHECK-NEXT:         Member: {(2 + %a)<nsw>,+,2}
+; CHECK-NEXT:         Member: {(2 + %a)<nuw>,+,2}
 ; CHECK-NEXT:         Member: {%a,+,2}
 ; CHECK-NEXT:     Group {{.*}}[[TWO]]:
 ; CHECK-NEXT:       (Low: %b High: (40 + %b))
@@ -169,7 +169,7 @@ for.end:                                          ; preds = %for.body
 ; CHECK-NEXT:   Grouped accesses:
 ; CHECK-NEXT:     Group {{.*}}[[ZERO]]:
 ; CHECK-NEXT:       (Low: %c High: (80 + %c))
-; CHECK-NEXT:         Member: {(2 + %c)<nsw>,+,4}
+; CHECK-NEXT:         Member: {(2 + %c)<nuw>,+,4}
 ; CHECK-NEXT:         Member: {%c,+,4}
 ; CHECK-NEXT:     Group {{.*}}[[ONE]]:
 ; CHECK-NEXT:       (Low: %a High: (42 + %a))
@@ -247,8 +247,8 @@ for.end:                                          ; preds = %for.body
 ; CHECK-NEXT:       %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
 ; CHECK-NEXT:   Grouped accesses:
 ; CHECK-NEXT:     Group {{.*}}[[ZERO]]:
-; CHECK-NEXT:       (Low: ((2 * %offset) + %a)<nsw> High: (10000 + (2 * %offset) + %a))
-; CHECK-NEXT:         Member: {((2 * %offset) + %a)<nsw>,+,2}<nsw><%for.body>
+; CHECK-NEXT:       (Low: ((2 * %offset) + %a) High: (10000 + (2 * %offset) + %a))
+; CHECK-NEXT:         Member: {((2 * %offset) + %a),+,2}<nw><%for.body>
 ; CHECK-NEXT:     Group {{.*}}[[ONE]]:
 ; CHECK-NEXT:       (Low: %a High: (10000 + %a))
 ; CHECK-NEXT:         Member: {%a,+,2}<nw><%for.body>
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll b/llvm/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
index 8113c8d7106b2f..be51167dab8a9a 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
@@ -58,8 +58,8 @@ for.end:                                          ; preds = %for.body
 
 ; Here it is not obvious what the limits are, since 'step' could be negative.
 
-; CHECK: Low: ((60000 + %a)<nsw> umin (60000 + (-40000 * %step) + %a)) 
-; CHECK: High: (4 + ((60000 + %a)<nsw> umax (60000 + (-40000 * %step) + %a)))
+; CHECK: Low: ((60000 + %a) umin (60000 + (-40000 * %step) + %a)) 
+; CHECK: High: (4 + ((60000 + %a) umax (60000 + (-40000 * %step) + %a)))
 
 define void @g(i64 %step) {
 entry:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
index 5b5c821e998307..80ada5292d0979 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
@@ -365,7 +365,7 @@ for.end:                                          ; preds = %for.body
 ; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
 
 ; LAA: [PSE]  %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul:
-; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)<nsw>
+; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
 ; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
 
 ; LV-LABEL: f5
diff --git a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
index c60eafd413d54c..a8575c4f66fb32 100644
--- a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
+++ b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
@@ -29,7 +29,7 @@ define i32 @d(i32 %base) {
 ; CHECK-NEXT:    %idxprom = sext i32 %f.0 to i64
 ; CHECK-NEXT:    --> {(sext i32 %base to i64),+,1}<nsw><%for.cond> U: [-2147483648,-9223372036854775808) S: [-2147483648,-9223372036854775808) Exits: <<Unknown>> LoopDispositions: { %for.cond: Computable }
 ; CHECK-NEXT:    %arrayidx = getelementptr inbounds [1 x [1 x i8]], [1 x [1 x i8]]* %e, i64 0, i64 %idxprom
-; CHECK-NEXT:    --> {((sext i32 %base to i64) + %e)<nsw>,+,1}<nsw><%for.cond> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Computable }
+; CHECK-NEXT:    --> {((sext i32 %base to i64) + %e),+,1}<nw><%for.cond> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Computable }
 ; CHECK-NEXT:    %1 = load i32*, i32** @c, align 8
 ; CHECK-NEXT:    --> %1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
 ; CHECK-NEXT:    %sub.ptr.lhs.cast = ptrtoint i32* %1 to i64
@@ -39,7 +39,7 @@ define i32 @d(i32 %base) {
 ; CHECK-NEXT:    %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 4
 ; CHECK-NEXT:    --> %sub.ptr.div U: full-set S: [-2305843009213693952,2305843009213693952) Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
 ; CHECK-NEXT:    %arrayidx1 = getelementptr inbounds [1 x i8], [1 x i8]* %arrayidx, i64 0, i64 %sub.ptr.div
-; CHECK-NEXT:    --> ({((sext i32 %base to i64) + %e)<nsw>,+,1}<nsw><%for.cond> + %sub.ptr.div)<nsw> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
+; CHECK-NEXT:    --> ({((sext i32 %base to i64) + %e),+,1}<nw><%for.cond> + %sub.ptr.div) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
 ; CHECK-NEXT:    %2 = load i8, i8* %arrayidx1, align 1
 ; CHECK-NEXT:    --> %2 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
 ; CHECK-NEXT:    %conv = sext i8 %2 to i32
diff --git a/llvm/test/Analysis/ScalarEvolution/load.ll b/llvm/test/Analysis/ScalarEvolution/load.ll
index 707908ff78cdf5..f41d20cc95769d 100644
--- a/llvm/test/Analysis/ScalarEvolution/load.ll
+++ b/llvm/test/Analysis/ScalarEvolution/load.ll
@@ -17,11 +17,11 @@ define i32 @test1() nounwind readnone {
 ; CHECK-NEXT:    %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
 ; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%for.body> U: [0,50) S: [0,50) Exits: 49 LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %arrayidx = getelementptr inbounds [50 x i32], [50 x i32]* @arr1, i32 0, i32 %i.03
-; CHECK-NEXT:    --> {@arr1,+,4}<nsw><%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (196 + @arr1) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {@arr1,+,4}<nuw><%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (196 + @arr1) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %0 = load i32, i32* %arrayidx, align 4
 ; CHECK-NEXT:    --> %0 U: full-set S: full-set Exits: 50 LoopDispositions: { %for.body: Variant }
 ; CHECK-NEXT:    %arrayidx1 = getelementptr inbounds [50 x i32], [50 x i32]* @arr2, i32 0, i32 %i.03
-; CHECK-NEXT:    --> {@arr2,+,4}<nsw><%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (196 + @arr2) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {@arr2,+,4}<nuw><%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (196 + @arr2) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %1 = load i32, i32* %arrayidx1, align 4
 ; CHECK-NEXT:    --> %1 U: full-set S: full-set Exits: 0 LoopDispositions: { %for.body: Variant }
 ; CHECK-NEXT:    %add = add i32 %0, %sum.04
@@ -74,7 +74,7 @@ define i32 @test2() nounwind uwtable readonly {
 ; CHECK-NEXT:    %n.01 = phi %struct.ListNode* [ bitcast ({ %struct.ListNode*, i32, [4 x i8] }* @node5 to %struct.ListNode*), %entry ], [ %1, %for.body ]
 ; CHECK-NEXT:    --> %n.01 U: full-set S: full-set Exits: @node1 LoopDispositions: { %for.body: Variant }
 ; CHECK-NEXT:    %i = getelementptr inbounds %struct.ListNode, %struct.ListNode* %n.01, i64 0, i32 1
-; CHECK-NEXT:    --> (4 + %n.01)<nsw> U: [-2147483644,-2147483648) S: [-2147483644,-2147483648) Exits: (4 + @node1)<nuw><nsw> LoopDispositions: { %for.body: Variant }
+; CHECK-NEXT:    --> (4 + %n.01)<nuw> U: [4,0) S: [4,0) Exits: (4 + @node1)<nuw><nsw> LoopDispositions: { %for.body: Variant }
 ; CHECK-NEXT:    %0 = load i32, i32* %i, align 4
 ; CHECK-NEXT:    --> %0 U: full-set S: full-set Exits: 0 LoopDispositions: { %for.body: Variant }
 ; CHECK-NEXT:    %add = add nsw i32 %0, %sum.02
diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
index 12094ac14e7177..48dc484635a6ff 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
@@ -10,7 +10,7 @@ define void @test_guard_less_than_16(i32* nocapture %a, i64 %i) {
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ %i, %entry ]
 ; CHECK-NEXT:    --> {%i,+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 15 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %idx = getelementptr inbounds i32, i32* %a, i64 %iv
-; CHECK-NEXT:    --> {((4 * %i) + %a)<nsw>,+,4}<nsw><%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {((4 * %i) + %a),+,4}<nw><%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i64 %iv, 1
 ; CHECK-NEXT:    --> {(1 + %i),+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 16 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_guard_less_than_16
@@ -42,7 +42,7 @@ define void @test_guard_less_than_16_operands_swapped(i32* nocapture %a, i64 %i)
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ %i, %entry ]
 ; CHECK-NEXT:    --> {%i,+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 15 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %idx = getelementptr inbounds i32, i32* %a, i64 %iv
-; CHECK-NEXT:    --> {((4 * %i) + %a)<nsw>,+,4}<nsw><%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {((4 * %i) + %a),+,4}<nw><%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i64 %iv, 1
 ; CHECK-NEXT:    --> {(1 + %i),+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 16 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_guard_less_than_16_operands_swapped
@@ -74,7 +74,7 @@ define void @test_guard_less_than_16_branches_flipped(i32* nocapture %a, i64 %i)
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ %i, %entry ]
 ; CHECK-NEXT:    --> {%i,+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 15 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %idx = getelementptr inbounds i32, i32* %a, i64 %iv
-; CHECK-NEXT:    --> {((4 * %i) + %a)<nsw>,+,4}<nsw><%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {((4 * %i) + %a),+,4}<nw><%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i64 %iv, 1
 ; CHECK-NEXT:    --> {(1 + %i),+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 16 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_guard_less_than_16_branches_flipped
@@ -106,7 +106,7 @@ define void @test_guard_uge_16_branches_flipped(i32* nocapture %a, i64 %i) {
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ %i, %entry ]
 ; CHECK-NEXT:    --> {%i,+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 15 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %idx = getelementptr inbounds i32, i32* %a, i64 %iv
-; CHECK-NEXT:    --> {((4 * %i) + %a)<nsw>,+,4}<nsw><%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {((4 * %i) + %a),+,4}<nw><%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i64 %iv, 1
 ; CHECK-NEXT:    --> {(1 + %i),+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 16 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_guard_uge_16_branches_flipped
@@ -138,7 +138,7 @@ define void @test_guard_eq_12(i32* nocapture %a, i64 %N) {
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
 ; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: %N LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %idx = getelementptr inbounds i32, i32* %a, i64 %iv
-; CHECK-NEXT:    --> {%a,+,4}<nsw><%loop> U: full-set S: full-set Exits: ((4 * %N) + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %N) + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i64 %iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%loop> U: [1,14) S: [1,14) Exits: (1 + %N) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_guard_eq_12
@@ -170,7 +170,7 @@ define void @test_guard_ule_12(i32* nocapture %a, i64 %N) {
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
 ; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: %N LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %idx = getelementptr inbounds i32, i32* %a, i64 %iv
-; CHECK-NEXT:    --> {%a,+,4}<nsw><%loop> U: full-set S: full-set Exits: ((4 * %N) + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %N) + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i64 %iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%loop> U: [1,14) S: [1,14) Exits: (1 + %N) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_guard_ule_12
@@ -202,7 +202,7 @@ define void @test_guard_ule_12_step2(i32* nocapture %a, i64 %N) {
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
 ; CHECK-NEXT:    --> {0,+,2}<nuw><nsw><%loop> U: [0,-9223372036854775808) S: [0,9223372036854775807) Exits: (2 * (%N /u 2))<nuw> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %idx = getelementptr inbounds i32, i32* %a, i64 %iv
-; CHECK-NEXT:    --> {%a,+,8}<nsw><%loop> U: full-set S: full-set Exits: ((8 * (%N /u 2)) + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%a,+,8}<nuw><%loop> U: full-set S: full-set Exits: ((8 * (%N /u 2)) + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i64 %iv, 2
 ; CHECK-NEXT:    --> {2,+,2}<nuw><%loop> U: [2,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 + (2 * (%N /u 2))<nuw>) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_guard_ule_12_step2
@@ -234,7 +234,7 @@ define void @test_multiple_const_guards_order1(i32* nocapture %a, i64 %i) {
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
 ; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,10) S: [0,10) Exits: %i LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %idx = getelementptr inbounds i32, i32* %a, i64 %iv
-; CHECK-NEXT:    --> {%a,+,4}<nsw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i64 %iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%loop> U: [1,11) S: [1,11) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_multiple_const_guards_order1
@@ -270,7 +270,7 @@ define void @test_multiple_const_guards_order2(i32* nocapture %a, i64 %i) {
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
 ; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,10) S: [0,10) Exits: %i LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %idx = getelementptr inbounds i32, i32* %a, i64 %iv
-; CHECK-NEXT:    --> {%a,+,4}<nsw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i64 %iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%loop> U: [1,11) S: [1,11) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_multiple_const_guards_order2
@@ -307,7 +307,7 @@ define void @test_multiple_var_guards_order1(i32* nocapture %a, i64 %i, i64 %N)
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
 ; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: %i LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %idx = getelementptr inbounds i32, i32* %a, i64 %iv
-; CHECK-NEXT:    --> {%a,+,4}<nsw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i64 %iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><%loop> U: [1,0) S: [1,0) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_multiple_var_guards_order1
@@ -344,7 +344,7 @@ define void @test_multiple_var_guards_order2(i32* nocapture %a, i64 %i, i64 %N)
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
 ; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: %i LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %idx = getelementptr inbounds i32, i32* %a, i64 %iv
-; CHECK-NEXT:    --> {%a,+,4}<nsw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i64 %iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><%loop> U: [1,0) S: [1,0) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_multiple_var_guards_order2
@@ -381,7 +381,7 @@ define void @test_multiple_var_guards_cycle(i32* nocapture %a, i64 %i, i64 %N) {
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
 ; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: %N LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %idx = getelementptr inbounds i32, i32* %a, i64 %iv
-; CHECK-NEXT:    --> {%a,+,4}<nsw><%loop> U: full-set S: full-set Exits: ((4 * %N) + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %N) + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i64 %iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><%loop> U: [1,0) S: [1,0) Exits: (1 + %N) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_multiple_var_guards_cycle
@@ -417,7 +417,7 @@ define void @test_guard_ult_ne(i32* nocapture readonly %data, i64 %count) {
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
 ; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: [0,4) S: [0,4) Exits: (-1 + %count) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %idx = getelementptr inbounds i32, i32* %data, i64 %iv
-; CHECK-NEXT:    --> {%data,+,4}<nw><%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%data,+,4}<nuw><%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw i64 %iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><%loop> U: [1,5) S: [1,5) Exits: %count LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_guard_ult_ne
@@ -456,7 +456,7 @@ define void @test_guard_and_assume(i32* nocapture readonly %data, i64 %count) {
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
 ; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: [0,4) S: [0,4) Exits: (-1 + %count) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %idx = getelementptr inbounds i32, i32* %data, i64 %iv
-; CHECK-NEXT:    --> {%data,+,4}<nw><%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%data,+,4}<nuw><%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw i64 %iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><%loop> U: [1,5) S: [1,5) Exits: %count LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_guard_and_assume
@@ -537,7 +537,7 @@ define void @crash(i8* %ptr) {
 ; CHECK-NEXT:    %lastout.2271 = phi i8* [ %incdec.ptr126, %while.body125 ], [ %ptr, %while.end117 ]
 ; CHECK-NEXT:    --> {%ptr,+,1}<nuw><%while.body125> U: full-set S: full-set Exits: {-2,+,-1}<nw><%while.cond111> LoopDispositions: { %while.body125: Computable }
 ; CHECK-NEXT:    %incdec.ptr126 = getelementptr inbounds i8, i8* %lastout.2271, i64 1
-; CHECK-NEXT:    --> {(1 + %ptr)<nsw>,+,1}<nuw><%while.body125> U: full-set S: full-set Exits: {-1,+,-1}<nw><%while.cond111> LoopDispositions: { %while.body125: Computable }
+; CHECK-NEXT:    --> {(1 + %ptr)<nuw>,+,1}<nuw><%while.body125> U: [1,0) S: [1,0) Exits: {-1,+,-1}<nw><%while.cond111> LoopDispositions: { %while.body125: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @crash
 ; CHECK-NEXT:  Loop %while.body125: backedge-taken count is {(-2 + (-1 * %ptr)),+,-1}<nw><%while.cond111>
 ; CHECK-NEXT:  Loop %while.body125: max backedge-taken count is -1
diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
index e2fc84c8d71c31..fd34306861ea0d 100644
--- a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
+++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
@@ -210,7 +210,7 @@ define void @f3(i8* %x_addr, i8* %y_addr, i32* %tmp_addr) {
 ; CHECK-NEXT:    %sunkaddr3 = mul i64 %add4.zext, 4
 ; CHECK-NEXT:    --> (4 * (zext i32 (4 + (4 * (%tmp /u 4))<nuw>) to i64))<nuw><nsw> U: [0,17179869169) S: [0,17179869181)
 ; CHECK-NEXT:    %sunkaddr4 = getelementptr inbounds i8, i8* bitcast ({ %union, [2000 x i8] }* @tmp_addr to i8*), i64 %sunkaddr3
-; CHECK-NEXT:    --> ((4 * (zext i32 (4 + (4 * (%tmp /u 4))<nuw>) to i64))<nuw><nsw> + @tmp_addr)<nsw> U: [0,-3) S: [-9223372036854775808,9223372036854775805)
+; CHECK-NEXT:    --> ((4 * (zext i32 (4 + (4 * (%tmp /u 4))<nuw>) to i64))<nuw><nsw> + @tmp_addr)<nuw> U: [0,-3) S: [-9223372036854775808,9223372036854775805)
 ; CHECK-NEXT:    %sunkaddr5 = getelementptr inbounds i8, i8* %sunkaddr4, i64 4096
 ; CHECK-NEXT:    --> (4096 + (4 * (zext i32 (4 + (4 * (%tmp /u 4))<nuw>) to i64))<nuw><nsw> + @tmp_addr) U: [0,-3) S: [-9223372036854775808,9223372036854775805)
 ; CHECK-NEXT:    %addr4.cast = bitcast i8* %sunkaddr5 to i32*
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll b/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll
index b6867ca471f7a8..0647d328fe5149 100644
--- a/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll
+++ b/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll
@@ -26,7 +26,7 @@ bb:                                               ; preds = %bb.nph, %bb1
   %1 = sext i32 %i.01 to i64                      ; <i64> [#uses=1]
 
 ; CHECK: %2 = getelementptr inbounds double, double* %d, i64 %1
-; CHECK: -->  {%d,+,16}<nsw><%bb>
+; CHECK: -->  {%d,+,16}<nuw><%bb>
   %2 = getelementptr inbounds double, double* %d, i64 %1  ; <double*> [#uses=1]
 
   %3 = load double, double* %2, align 8                   ; <double> [#uses=1]
@@ -40,7 +40,7 @@ bb:                                               ; preds = %bb.nph, %bb1
   %8 = sext i32 %7 to i64                         ; <i64> [#uses=1]
 
 ; CHECK: %9 = getelementptr inbounds double, double* %q, i64 %8
-; CHECK: {(8 + %q)<nsw>,+,16}<nsw><%bb>
+; CHECK: {(8 + %q)<nuw>,+,16}<nuw><%bb>
   %9 = getelementptr inbounds double, double* %q, i64 %8  ; <double*> [#uses=1]
 
 ; Artificially repeat the above three instructions, this time using
@@ -52,7 +52,7 @@ bb:                                               ; preds = %bb.nph, %bb1
   %t8 = sext i32 %t7 to i64                         ; <i64> [#uses=1]
 
 ; CHECK: %t9 = getelementptr inbounds double, double* %q, i64 %t8
-; CHECK: {(8 + %q)<nsw>,+,16}<nsw><%bb>
+; CHECK: {(8 + %q)<nuw>,+,16}<nuw><%bb>
   %t9 = getelementptr inbounds double, double* %q, i64 %t8  ; <double*> [#uses=1]
 
   %10 = load double, double* %9, align 8                  ; <double> [#uses=1]
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll b/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll
index 0310ff341516b9..6a4e76a32adb09 100644
--- a/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll
+++ b/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll
@@ -24,7 +24,7 @@ bb:                                               ; preds = %bb.nph, %bb1
   %1 = sext i32 %i.01 to i64                      ; <i64> [#uses=1]
 
 ; CHECK: %2 = getelementptr inbounds double, double* %d, i64 %1
-; CHECK: -->  {%d,+,16}<nsw><%bb>
+; CHECK: -->  {%d,+,16}<nuw><%bb>
   %2 = getelementptr inbounds double, double* %d, i64 %1  ; <double*> [#uses=1]
 
   %3 = load double, double* %2, align 8                   ; <double> [#uses=1]
@@ -38,7 +38,7 @@ bb:                                               ; preds = %bb.nph, %bb1
   %8 = sext i32 %7 to i64                         ; <i64> [#uses=1]
 
 ; CHECK: %9 = getelementptr inbounds double, double* %q, i64 %8
-; CHECK: {(8 + %q)<nsw>,+,16}<nsw><%bb>
+; CHECK: {(8 + %q)<nuw>,+,16}<nuw><%bb>
   %9 = getelementptr inbounds double, double* %q, i64 %8  ; <double*> [#uses=1]
 
 ; Artificially repeat the above three instructions, this time using
@@ -50,7 +50,7 @@ bb:                                               ; preds = %bb.nph, %bb1
   %t8 = sext i32 %t7 to i64                         ; <i64> [#uses=1]
 
 ; CHECK: %t9 = getelementptr inbounds double, double* %q, i64 %t8
-; CHECK: {(8 + %q)<nsw>,+,16}<nsw><%bb>
+; CHECK: {(8 + %q)<nuw>,+,16}<nuw><%bb>
   %t9 = getelementptr inbounds double, double* %q, i64 %t8  ; <double*> [#uses=1]
 
   %10 = load double, double* %9, align 8                  ; <double> [#uses=1]
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw.ll b/llvm/test/Analysis/ScalarEvolution/nsw.ll
index 39f199868eaa5e..b80b3ada315ee5 100644
--- a/llvm/test/Analysis/ScalarEvolution/nsw.ll
+++ b/llvm/test/Analysis/ScalarEvolution/nsw.ll
@@ -41,7 +41,7 @@ bb1:		; preds = %bb
 ; CHECK-NEXT: -->  {1,+,1}<nuw><nsw><%bb>
 	%tmp9 = getelementptr inbounds double, double* %p, i64 %phitmp		; <double*> [#uses=1]
 ; CHECK: %tmp9
-; CHECK-NEXT:  -->  {(8 + %p)<nsw>,+,8}<nsw><%bb>
+; CHECK-NEXT:  -->  {(8 + %p)<nuw>,+,8}<nuw><%bb>
 	%tmp10 = load double, double* %tmp9, align 8		; <double> [#uses=1]
 	%tmp11 = fcmp ogt double %tmp10, 2.000000e+00		; <i1> [#uses=1]
 	br i1 %tmp11, label %bb, label %bb1.return_crit_edge
@@ -69,7 +69,7 @@ for.body.i.i:                                     ; preds = %for.body.i.i, %for.
   store i32 0, i32* %__first.addr.02.i.i, align 4
   %ptrincdec.i.i = getelementptr inbounds i32, i32* %__first.addr.02.i.i, i64 1
 ; CHECK: %ptrincdec.i.i
-; CHECK-NEXT: -->  {(4 + %begin)<nsw>,+,4}<nuw><%for.body.i.i>
+; CHECK-NEXT: -->  {(4 + %begin)<nuw>,+,4}<nuw><%for.body.i.i>
   %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end
   br i1 %cmp.i.i, label %for.cond.for.end_crit_edge.i.i, label %for.body.i.i
 
@@ -95,10 +95,10 @@ for.body.i.i:                                     ; preds = %entry, %for.body.i.
 ; CHECK: {1,+,1}<nuw><nsw><%for.body.i.i>
   %ptrincdec.i.i = getelementptr inbounds i32, i32* %begin, i64 %tmp
 ; CHECK: %ptrincdec.i.i =
-; CHECK: {(4 + %begin)<nsw>,+,4}<nsw><%for.body.i.i>
+; CHECK: {(4 + %begin)<nuw>,+,4}<nuw><%for.body.i.i>
   %__first.addr.08.i.i = getelementptr inbounds i32, i32* %begin, i64 %indvar.i.i
 ; CHECK: %__first.addr.08.i.i
-; CHECK: {%begin,+,4}<nsw><%for.body.i.i>
+; CHECK: {%begin,+,4}<nuw><%for.body.i.i>
   store i32 0, i32* %__first.addr.08.i.i, align 4
   %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end
   br i1 %cmp.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i
@@ -127,7 +127,8 @@ exit:
 }
 
 ; CHECK-LABEL: PR12375
-; CHECK: -->  {(4 + %arg)<nsw>,+,4}<nuw><%bb1>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg)<nsw> umax (8 + %arg)<nsw>)) /u 4))<nuw> + %arg)
+; CHECK: -->  {(4 + %arg)<nuw>,+,4}<nuw><%bb1>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (8 + %arg)<nuw>
+
 define i32 @PR12375(i32* readnone %arg) {
 bb:
   %tmp = getelementptr inbounds i32, i32* %arg, i64 2
@@ -146,7 +147,7 @@ bb7:                                              ; preds = %bb1
 }
 
 ; CHECK-LABEL: PR12376
-; CHECK: -->  {(4 + %arg)<nsw>,+,4}<nuw><%bb2>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg)<nsw> umax %arg1)) /u 4))<nuw> + %arg)
+; CHECK: -->  {(4 + %arg)<nuw>,+,4}<nuw><%bb2>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg)<nuw> umax %arg1)) /u 4))<nuw> + %arg)
 define void @PR12376(i32* nocapture %arg, i32* nocapture %arg1)  {
 bb:
   br label %bb2
diff --git a/llvm/test/Analysis/ScalarEvolution/pr46786.ll b/llvm/test/Analysis/ScalarEvolution/pr46786.ll
index 9255807f02c59d..f19a33cf355dcb 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr46786.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr46786.ll
@@ -10,7 +10,7 @@ define i8* @FSE_decompress_usingDTable(i8* %arg, i32 %arg1, i32 %arg2, i32 %arg3
 ; CHECK-LABEL: 'FSE_decompress_usingDTable'
 ; CHECK-NEXT:  Classifying expressions for: @FSE_decompress_usingDTable
 ; CHECK-NEXT:    %i = getelementptr inbounds i8, i8* %arg, i32 %arg2
-; CHECK-NEXT:    --> (%arg2 + %arg)<nsw> U: full-set S: full-set
+; CHECK-NEXT:    --> (%arg2 + %arg) U: full-set S: full-set
 ; CHECK-NEXT:    %i4 = sub nsw i32 0, %arg1
 ; CHECK-NEXT:    --> (-1 * %arg1) U: full-set S: full-set
 ; CHECK-NEXT:    %i5 = getelementptr inbounds i8, i8* %i, i32 %i4
diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
index 70fb37984d8a7c..788a268e0f2272 100644
--- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
@@ -194,17 +194,17 @@ define void @ptrtoint_of_gep(i8* %in, i64* %out0) {
 ; X64-LABEL: 'ptrtoint_of_gep'
 ; X64-NEXT:  Classifying expressions for: @ptrtoint_of_gep
 ; X64-NEXT:    %in_adj = getelementptr inbounds i8, i8* %in, i64 42
-; X64-NEXT:    --> (42 + %in)<nsw> U: [-9223372036854775766,-9223372036854775808) S: [-9223372036854775766,-9223372036854775808)
+; X64-NEXT:    --> (42 + %in)<nuw> U: [42,0) S: [42,0)
 ; X64-NEXT:    %p0 = ptrtoint i8* %in_adj to i64
-; X64-NEXT:    --> (42 + (ptrtoint i8* %in to i64))<nsw> U: [-9223372036854775766,-9223372036854775808) S: [-9223372036854775766,-9223372036854775808)
+; X64-NEXT:    --> (42 + (ptrtoint i8* %in to i64))<nuw> U: [42,0) S: [42,0)
 ; X64-NEXT:  Determining loop execution counts for: @ptrtoint_of_gep
 ;
 ; X32-LABEL: 'ptrtoint_of_gep'
 ; X32-NEXT:  Classifying expressions for: @ptrtoint_of_gep
 ; X32-NEXT:    %in_adj = getelementptr inbounds i8, i8* %in, i64 42
-; X32-NEXT:    --> (42 + %in)<nsw> U: [-2147483606,-2147483648) S: [-2147483606,-2147483648)
+; X32-NEXT:    --> (42 + %in)<nuw> U: [42,0) S: [42,0)
 ; X32-NEXT:    %p0 = ptrtoint i8* %in_adj to i64
-; X32-NEXT:    --> (zext i32 (42 + (ptrtoint i8* %in to i32))<nsw> to i64) U: [0,4294967296) S: [0,4294967296)
+; X32-NEXT:    --> (42 + (zext i32 (ptrtoint i8* %in to i32) to i64))<nuw><nsw> U: [42,4294967338) S: [42,4294967338)
 ; X32-NEXT:  Determining loop execution counts for: @ptrtoint_of_gep
 ;
   %in_adj = getelementptr inbounds i8, i8* %in, i64 42
@@ -224,9 +224,9 @@ define void @ptrtoint_of_addrec(i32* %in, i32 %count) {
 ; X64-NEXT:    %i6 = phi i64 [ 0, %entry ], [ %i9, %loop ]
 ; X64-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: (-1 + (zext i32 %count to i64))<nsw> LoopDispositions: { %loop: Computable }
 ; X64-NEXT:    %i7 = getelementptr inbounds i32, i32* %in, i64 %i6
-; X64-NEXT:    --> {%in,+,4}<nsw><%loop> U: full-set S: full-set Exits: (-4 + (4 * (zext i32 %count to i64))<nuw><nsw> + %in) LoopDispositions: { %loop: Computable }
+; X64-NEXT:    --> {%in,+,4}<nuw><%loop> U: full-set S: full-set Exits: (-4 + (4 * (zext i32 %count to i64))<nuw><nsw> + %in) LoopDispositions: { %loop: Computable }
 ; X64-NEXT:    %i8 = ptrtoint i32* %i7 to i64
-; X64-NEXT:    --> {(ptrtoint i32* %in to i64),+,4}<nsw><%loop> U: full-set S: full-set Exits: (-4 + (4 * (zext i32 %count to i64))<nuw><nsw> + (ptrtoint i32* %in to i64)) LoopDispositions: { %loop: Computable }
+; X64-NEXT:    --> {(ptrtoint i32* %in to i64),+,4}<nuw><%loop> U: full-set S: full-set Exits: (-4 + (4 * (zext i32 %count to i64))<nuw><nsw> + (ptrtoint i32* %in to i64)) LoopDispositions: { %loop: Computable }
 ; X64-NEXT:    %i9 = add nuw nsw i64 %i6, 1
 ; X64-NEXT:    --> {1,+,1}<nuw><%loop> U: [1,0) S: [1,0) Exits: (zext i32 %count to i64) LoopDispositions: { %loop: Computable }
 ; X64-NEXT:  Determining loop execution counts for: @ptrtoint_of_addrec
@@ -394,7 +394,7 @@ define void @pr46786_c26_char(i8* %arg, i8* %arg1, i8* %arg2) {
 ; X64-NEXT:    %i13 = add i8 %i12, %i8
 ; X64-NEXT:    --> (%i12 + %i8) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X64-NEXT:    %i14 = getelementptr inbounds i8, i8* %i7, i64 1
-; X64-NEXT:    --> {(1 + %arg)<nsw>,+,1}<nuw><%bb6> U: full-set S: full-set Exits: %arg1 LoopDispositions: { %bb6: Computable }
+; X64-NEXT:    --> {(1 + %arg)<nuw>,+,1}<nuw><%bb6> U: [1,0) S: [1,0) Exits: %arg1 LoopDispositions: { %bb6: Computable }
 ; X64-NEXT:  Determining loop execution counts for: @pr46786_c26_char
 ; X64-NEXT:  Loop %bb6: backedge-taken count is (-1 + (-1 * %arg) + %arg1)
 ; X64-NEXT:  Loop %bb6: max backedge-taken count is -2
@@ -421,7 +421,7 @@ define void @pr46786_c26_char(i8* %arg, i8* %arg1, i8* %arg2) {
 ; X32-NEXT:    %i13 = add i8 %i12, %i8
 ; X32-NEXT:    --> (%i12 + %i8) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X32-NEXT:    %i14 = getelementptr inbounds i8, i8* %i7, i64 1
-; X32-NEXT:    --> {(1 + %arg)<nsw>,+,1}<nuw><%bb6> U: full-set S: full-set Exits: %arg1 LoopDispositions: { %bb6: Computable }
+; X32-NEXT:    --> {(1 + %arg)<nuw>,+,1}<nuw><%bb6> U: [1,0) S: [1,0) Exits: %arg1 LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:  Determining loop execution counts for: @pr46786_c26_char
 ; X32-NEXT:  Loop %bb6: backedge-taken count is (-1 + (-1 * %arg) + %arg1)
 ; X32-NEXT:  Loop %bb6: max backedge-taken count is -2
@@ -475,13 +475,13 @@ define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) {
 ; X64-NEXT:    %i11 = ashr exact i64 %i10, 2
 ; X64-NEXT:    --> ((({0,+,4}<nw><%bb6> smax {0,+,-4}<nw><%bb6>) /u 4) * (1 smin (-1 smax {0,+,4}<nw><%bb6>)))<nsw> U: [-4611686018427387903,4611686018427387904) S: [-4611686018427387903,4611686018427387904) Exits: ((((4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw> smax (-4 * ((-4 + (-1 * %arg) + %arg1) /u 4))) /u 4) * (1 smin (-1 smax (4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw>)))<nsw> LoopDispositions: { %bb6: Computable }
 ; X64-NEXT:    %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11
-; X64-NEXT:    --> ((4 * (({0,+,4}<nw><%bb6> smax {0,+,-4}<nw><%bb6>) /u 4) * (1 smin (-1 smax {0,+,4}<nw><%bb6>))) + %arg2)<nsw> U: full-set S: full-set Exits: ((4 * (((4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw> smax (-4 * ((-4 + (-1 * %arg) + %arg1) /u 4))) /u 4) * (1 smin (-1 smax (4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw>))) + %arg2)<nsw> LoopDispositions: { %bb6: Computable }
+; X64-NEXT:    --> ((4 * (({0,+,4}<nw><%bb6> smax {0,+,-4}<nw><%bb6>) /u 4) * (1 smin (-1 smax {0,+,4}<nw><%bb6>))) + %arg2) U: full-set S: full-set Exits: ((4 * (((4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw> smax (-4 * ((-4 + (-1 * %arg) + %arg1) /u 4))) /u 4) * (1 smin (-1 smax (4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw>))) + %arg2) LoopDispositions: { %bb6: Computable }
 ; X64-NEXT:    %i13 = load i32, i32* %i12, align 4
 ; X64-NEXT:    --> %i13 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X64-NEXT:    %i14 = add nsw i32 %i13, %i8
 ; X64-NEXT:    --> (%i13 + %i8) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X64-NEXT:    %i15 = getelementptr inbounds i32, i32* %i7, i64 1
-; X64-NEXT:    --> {(4 + %arg)<nsw>,+,4}<nuw><%bb6> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw> + %arg) LoopDispositions: { %bb6: Computable }
+; X64-NEXT:    --> {(4 + %arg)<nuw>,+,4}<nuw><%bb6> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw> + %arg) LoopDispositions: { %bb6: Computable }
 ; X64-NEXT:  Determining loop execution counts for: @pr46786_c26_int
 ; X64-NEXT:  Loop %bb6: backedge-taken count is ((-4 + (-1 * %arg) + %arg1) /u 4)
 ; X64-NEXT:  Loop %bb6: max backedge-taken count is 4611686018427387903
@@ -504,13 +504,13 @@ define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) {
 ; X32-NEXT:    %i11 = ashr exact i64 %i10, 2
 ; X32-NEXT:    --> ({0,+,1}<nw><%bb6> * (1 smin {0,+,4}<nuw><nsw><%bb6>))<nuw><nsw> U: [0,1073741824) S: [0,1073741824) Exits: (((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4) * (1 smin (4 * ((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4))<nuw><nsw>))<nuw><nsw> LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:    %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11
-; X32-NEXT:    --> (((trunc i64 (1 smin {0,+,4}<nuw><nsw><%bb6>) to i32) * {0,+,4}<%bb6>) + %arg2)<nsw> U: full-set S: full-set Exits: ((4 * (trunc i64 (1 smin (4 * ((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4))<nuw><nsw>) to i32) * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg2)<nsw> LoopDispositions: { %bb6: Computable }
+; X32-NEXT:    --> (((trunc i64 (1 smin {0,+,4}<nuw><nsw><%bb6>) to i32) * {0,+,4}<%bb6>) + %arg2) U: full-set S: full-set Exits: ((4 * (trunc i64 (1 smin (4 * ((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4))<nuw><nsw>) to i32) * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg2) LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:    %i13 = load i32, i32* %i12, align 4
 ; X32-NEXT:    --> %i13 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X32-NEXT:    %i14 = add nsw i32 %i13, %i8
 ; X32-NEXT:    --> (%i13 + %i8) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X32-NEXT:    %i15 = getelementptr inbounds i32, i32* %i7, i64 1
-; X32-NEXT:    --> {(4 + %arg)<nsw>,+,4}<nuw><%bb6> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw> + %arg) LoopDispositions: { %bb6: Computable }
+; X32-NEXT:    --> {(4 + %arg)<nuw>,+,4}<nuw><%bb6> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw> + %arg) LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:  Determining loop execution counts for: @pr46786_c26_int
 ; X32-NEXT:  Loop %bb6: backedge-taken count is ((-4 + (-1 * %arg) + %arg1) /u 4)
 ; X32-NEXT:  Loop %bb6: max backedge-taken count is 1073741823
diff --git a/llvm/test/Analysis/ScalarEvolution/sdiv.ll b/llvm/test/Analysis/ScalarEvolution/sdiv.ll
index 89a3e77564ae54..bc919288a566a8 100644
--- a/llvm/test/Analysis/ScalarEvolution/sdiv.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sdiv.ll
@@ -19,7 +19,7 @@ define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 {
 ; CHECK-NEXT:    %idxprom = sext i32 %rem to i64
 ; CHECK-NEXT:    --> ({0,+,1}<nuw><nsw><%for.cond> /u 2) U: [0,2147483648) S: [0,2147483648) Exits: ((zext i32 %width to i64) /u 2) LoopDispositions: { %for.cond: Computable }
 ; CHECK-NEXT:    %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %storage, i64 0, i64 %idxprom
-; CHECK-NEXT:    --> ((4 * ({0,+,1}<nuw><nsw><%for.cond> /u 2))<nuw><nsw> + %storage)<nsw> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((4 * ((zext i32 %width to i64) /u 2))<nuw><nsw> + %storage)<nsw> LoopDispositions: { %for.cond: Computable }
+; CHECK-NEXT:    --> ((4 * ({0,+,1}<nuw><nsw><%for.cond> /u 2))<nuw><nsw> + %storage)<nuw> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((4 * ((zext i32 %width to i64) /u 2))<nuw><nsw> + %storage)<nuw> LoopDispositions: { %for.cond: Computable }
 ; CHECK-NEXT:    %1 = load i32, i32* %arrayidx, align 4
 ; CHECK-NEXT:    --> %1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
 ; CHECK-NEXT:    %call = call i32 @_Z3adji(i32 %1)
diff --git a/llvm/test/Analysis/ScalarEvolution/srem.ll b/llvm/test/Analysis/ScalarEvolution/srem.ll
index 197437b51ca120..089dc2408a30d4 100644
--- a/llvm/test/Analysis/ScalarEvolution/srem.ll
+++ b/llvm/test/Analysis/ScalarEvolution/srem.ll
@@ -19,7 +19,7 @@ define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 {
 ; CHECK-NEXT:    %idxprom = sext i32 %rem to i64
 ; CHECK-NEXT:    --> (zext i1 {false,+,true}<%for.cond> to i64) U: [0,2) S: [0,2) Exits: (zext i1 (trunc i32 %width to i1) to i64) LoopDispositions: { %for.cond: Computable }
 ; CHECK-NEXT:    %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %storage, i64 0, i64 %idxprom
-; CHECK-NEXT:    --> ((4 * (zext i1 {false,+,true}<%for.cond> to i64))<nuw><nsw> + %storage)<nsw> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((4 * (zext i1 (trunc i32 %width to i1) to i64))<nuw><nsw> + %storage)<nsw> LoopDispositions: { %for.cond: Computable }
+; CHECK-NEXT:    --> ((4 * (zext i1 {false,+,true}<%for.cond> to i64))<nuw><nsw> + %storage)<nuw> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((4 * (zext i1 (trunc i32 %width to i1) to i64))<nuw><nsw> + %storage)<nuw> LoopDispositions: { %for.cond: Computable }
 ; CHECK-NEXT:    %1 = load i32, i32* %arrayidx, align 4
 ; CHECK-NEXT:    --> %1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
 ; CHECK-NEXT:    %call = call i32 @_Z3adji(i32 %1)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
index fd7620e317cedb..621229d9a8cafe 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
@@ -345,3 +345,95 @@ body:             |
     %select:gpr(s64) = G_SELECT %cond(s1), %t, %f
     $x0 = COPY %select(s64)
     RET_ReallyLR implicit $x0
+
+...
+---
+name:            csneg_s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $w2
+    ; G_SELECT cc, true, (G_SUB 0, x) -> CSNEG true, x, cc
+
+    ; CHECK-LABEL: name: csneg_s32
+    ; CHECK: liveins: $w0, $w1, $w2
+    ; CHECK: %reg0:gpr32 = COPY $w0
+    ; CHECK: %reg1:gpr32 = COPY $w1
+    ; CHECK: %t:gpr32 = COPY $w2
+    ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %reg0, 0, implicit-def $nzcv
+    ; CHECK: %select:gpr32 = CSNEGWr %t, %reg1, 1, implicit $nzcv
+    ; CHECK: $w0 = COPY %select
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s32) = COPY $w0
+    %cond:gpr(s1) = G_TRUNC %reg0(s32)
+    %reg1:gpr(s32) = COPY $w1
+    %t:gpr(s32) = COPY $w2
+    %zero:gpr(s32) = G_CONSTANT i32 0
+    %sub:gpr(s32) = G_SUB %zero(s32), %reg1
+    %select:gpr(s32) = G_SELECT %cond(s1), %t, %sub
+    $w0 = COPY %select(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            csneg_s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+    ; G_SELECT cc, true, (G_SUB 0, x) -> CSNEG true, x, cc
+
+    ; CHECK-LABEL: name: csneg_s64
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %reg0:gpr64 = COPY $x0
+    ; CHECK: %cond:gpr32 = COPY %reg0.sub_32
+    ; CHECK: %reg1:gpr64 = COPY $x1
+    ; CHECK: %t:gpr64 = COPY $x2
+    ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %cond, 0, implicit-def $nzcv
+    ; CHECK: %select:gpr64 = CSNEGXr %t, %reg1, 1, implicit $nzcv
+    ; CHECK: $x0 = COPY %select
+    ; CHECK: RET_ReallyLR implicit $x0
+    %reg0:gpr(s64) = COPY $x0
+    %cond:gpr(s1) = G_TRUNC %reg0(s64)
+    %reg1:gpr(s64) = COPY $x1
+    %t:gpr(s64) = COPY $x2
+    %zero:gpr(s64) = G_CONSTANT i64 0
+    %sub:gpr(s64) = G_SUB %zero(s64), %reg1
+    %select:gpr(s64) = G_SELECT %cond(s1), %t, %sub
+    $x0 = COPY %select(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            csneg_with_true_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $w2
+    ; We should prefer eliminating the G_SUB over eliminating the constant true
+    ; value.
+
+    ; CHECK-LABEL: name: csneg_with_true_cst
+    ; CHECK: liveins: $w0, $w1, $w2
+    ; CHECK: %reg0:gpr32 = COPY $w0
+    ; CHECK: %t:gpr32 = MOVi32imm 1
+    ; CHECK: %reg2:gpr32 = COPY $w2
+    ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %reg0, 0, implicit-def $nzcv
+    ; CHECK: %select:gpr32 = CSNEGWr %t, %reg2, 1, implicit $nzcv
+    ; CHECK: $w0 = COPY %select
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s32) = COPY $w0
+    %cond:gpr(s1) = G_TRUNC %reg0(s32)
+    %reg1:gpr(s32) = COPY $w1
+    %t:gpr(s32) = G_CONSTANT i32 1
+    %zero:gpr(s32) = G_CONSTANT i32 0
+    %reg2:gpr(s32) = COPY $w2
+    %sub:gpr(s32) = G_SUB %zero(s32), %reg2
+    %select:gpr(s32) = G_SELECT %cond(s1), %t, %sub
+    $w0 = COPY %select(s32)
+    RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll b/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll
new file mode 100644
index 00000000000000..a9041d8d978287
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:   --check-prefix=CHECK-BE
+
+; This test checks that LSR properly recognizes lxvp/stxvp as load/store
+; intrinsics to avoid generating x-form instructions instead of d-forms.
+
+declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
+declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
+define void @foo(i32 zeroext %n, <256 x i1>* %ptr, <256 x i1>* %ptr2) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmplwi r3, 0
+; CHECK-NEXT:    beqlr cr0
+; CHECK-NEXT:  # %bb.1: # %for.body.lr.ph
+; CHECK-NEXT:    clrldi r6, r3, 32
+; CHECK-NEXT:    addi r3, r4, 64
+; CHECK-NEXT:    addi r4, r5, 64
+; CHECK-NEXT:    mtctr r6
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_2: # %for.body
+; CHECK-NEXT:    #
+; CHECK-NEXT:    lxvp vsp0, -64(r3)
+; CHECK-NEXT:    lxvp vsp2, -32(r3)
+; CHECK-NEXT:    lxvp vsp4, 0(r3)
+; CHECK-NEXT:    lxvp vsp6, 32(r3)
+; CHECK-NEXT:    addi r3, r3, 1
+; CHECK-NEXT:    stxvp vsp0, -64(r4)
+; CHECK-NEXT:    stxvp vsp2, -32(r4)
+; CHECK-NEXT:    stxvp vsp4, 0(r4)
+; CHECK-NEXT:    stxvp vsp6, 32(r4)
+; CHECK-NEXT:    addi r4, r4, 1
+; CHECK-NEXT:    bdnz .LBB0_2
+; CHECK-NEXT:  # %bb.3: # %for.cond.cleanup
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: foo:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    cmplwi r3, 0
+; CHECK-BE-NEXT:    beqlr cr0
+; CHECK-BE-NEXT:  # %bb.1: # %for.body.lr.ph
+; CHECK-BE-NEXT:    clrldi r6, r3, 32
+; CHECK-BE-NEXT:    addi r3, r4, 64
+; CHECK-BE-NEXT:    addi r4, r5, 64
+; CHECK-BE-NEXT:    mtctr r6
+; CHECK-BE-NEXT:    .p2align 4
+; CHECK-BE-NEXT:  .LBB0_2: # %for.body
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    lxvp vsp0, -64(r3)
+; CHECK-BE-NEXT:    lxvp vsp2, -32(r3)
+; CHECK-BE-NEXT:    lxvp vsp4, 0(r3)
+; CHECK-BE-NEXT:    lxvp vsp6, 32(r3)
+; CHECK-BE-NEXT:    addi r3, r3, 1
+; CHECK-BE-NEXT:    stxvp vsp0, -64(r4)
+; CHECK-BE-NEXT:    stxvp vsp2, -32(r4)
+; CHECK-BE-NEXT:    stxvp vsp4, 0(r4)
+; CHECK-BE-NEXT:    stxvp vsp6, 32(r4)
+; CHECK-BE-NEXT:    addi r4, r4, 1
+; CHECK-BE-NEXT:    bdnz .LBB0_2
+; CHECK-BE-NEXT:  # %bb.3: # %for.cond.cleanup
+; CHECK-BE-NEXT:    blr
+entry:
+  %cmp35.not = icmp eq i32 %n, 0
+  br i1 %cmp35.not, label %for.cond.cleanup, label %for.body.lr.ph
+
+for.body.lr.ph:
+  %0 = bitcast <256 x i1>* %ptr to i8*
+  %1 = bitcast <256 x i1>* %ptr2 to i8*
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %2 = getelementptr i8, i8* %0, i64 %indvars.iv
+  %3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2)
+  %add2 = add nuw nsw i64 %indvars.iv, 32
+  %4 = getelementptr i8, i8* %0, i64 %add2
+  %5 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %4)
+  %add4 = add nuw nsw i64 %indvars.iv, 64
+  %6 = getelementptr i8, i8* %0, i64 %add4
+  %7 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %6)
+  %add6 = add nuw nsw i64 %indvars.iv, 96
+  %8 = getelementptr i8, i8* %0, i64 %add6
+  %9 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %8)
+  %10 = getelementptr i8, i8* %1, i64 %indvars.iv
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %3, i8* %10)
+  %11 = getelementptr i8, i8* %1, i64 %add2
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %5, i8* %11)
+  %12 = getelementptr i8, i8* %1, i64 %add4
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %7, i8* %12)
+  %13 = getelementptr i8, i8* %1, i64 %add6
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %9, i8* %13)
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
diff --git a/llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll
new file mode 100644
index 00000000000000..816a28a61241b9
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll
@@ -0,0 +1,114 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -disable-lsr \
+; RUN:   -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 < %s | FileCheck %s
+; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -disable-lsr \
+; RUN:   -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr10 < %s | FileCheck %s \
+; RUN:   --check-prefix=CHECK-BE
+
+; This test checks the PPCLoopInstrFormPrep pass supports the lxvp and stxvp
+; intrinsics so we generate more dq-form instructions instead of x-forms.
+
+%_elem_type_of_x = type <{ double }>
+%_elem_type_of_y = type <{ double }>
+
+define void @foo(i64* %.n, [0 x %_elem_type_of_x]* %.x, [0 x %_elem_type_of_y]* %.y, <2 x double>* %.sum) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld r5, 0(r3)
+; CHECK-NEXT:    cmpdi r5, 1
+; CHECK-NEXT:    bltlr cr0
+; CHECK-NEXT:  # %bb.1: # %_loop_1_do_.lr.ph
+; CHECK-NEXT:    addi r3, r4, 1
+; CHECK-NEXT:    addi r4, r5, -1
+; CHECK-NEXT:    lxv vs0, 0(r6)
+; CHECK-NEXT:    rldicl r4, r4, 60, 4
+; CHECK-NEXT:    addi r4, r4, 1
+; CHECK-NEXT:    mtctr r4
+; CHECK-NEXT:    .p2align 5
+; CHECK-NEXT:  .LBB0_2: # %_loop_1_do_
+; CHECK-NEXT:    #
+; CHECK-NEXT:    lxvp vsp2, 0(r3)
+; CHECK-NEXT:    lxvp vsp4, 32(r3)
+; CHECK-NEXT:    addi r3, r3, 128
+; CHECK-NEXT:    xvadddp vs0, vs0, vs3
+; CHECK-NEXT:    xvadddp vs0, vs0, vs2
+; CHECK-NEXT:    xvadddp vs0, vs0, vs5
+; CHECK-NEXT:    xvadddp vs0, vs0, vs4
+; CHECK-NEXT:    bdnz .LBB0_2
+; CHECK-NEXT:  # %bb.3: # %_loop_1_loopHeader_._return_bb_crit_edge
+; CHECK-NEXT:    stxv vs0, 0(r6)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: foo:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    ld r5, 0(r3)
+; CHECK-BE-NEXT:    cmpdi r5, 1
+; CHECK-BE-NEXT:    bltlr cr0
+; CHECK-BE-NEXT:  # %bb.1: # %_loop_1_do_.lr.ph
+; CHECK-BE-NEXT:    addi r3, r4, 1
+; CHECK-BE-NEXT:    addi r4, r5, -1
+; CHECK-BE-NEXT:    lxv vs0, 0(r6)
+; CHECK-BE-NEXT:    rldicl r4, r4, 60, 4
+; CHECK-BE-NEXT:    addi r4, r4, 1
+; CHECK-BE-NEXT:    mtctr r4
+; CHECK-BE-NEXT:    .p2align 5
+; CHECK-BE-NEXT:  .LBB0_2: # %_loop_1_do_
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    lxvp vsp2, 0(r3)
+; CHECK-BE-NEXT:    lxvp vsp4, 32(r3)
+; CHECK-BE-NEXT:    addi r3, r3, 128
+; CHECK-BE-NEXT:    xvadddp vs0, vs0, vs2
+; CHECK-BE-NEXT:    xvadddp vs0, vs0, vs3
+; CHECK-BE-NEXT:    xvadddp vs0, vs0, vs4
+; CHECK-BE-NEXT:    xvadddp vs0, vs0, vs5
+; CHECK-BE-NEXT:    bdnz .LBB0_2
+; CHECK-BE-NEXT:  # %bb.3: # %_loop_1_loopHeader_._return_bb_crit_edge
+; CHECK-BE-NEXT:    stxv vs0, 0(r6)
+; CHECK-BE-NEXT:    blr
+entry:
+  %_val_n_2 = load i64, i64* %.n, align 8
+  %_grt_tmp7 = icmp slt i64 %_val_n_2, 1
+  br i1 %_grt_tmp7, label %_return_bb, label %_loop_1_do_.lr.ph
+
+_loop_1_do_.lr.ph:                                ; preds = %entry
+  %x_rvo_based_addr_5 = getelementptr inbounds [0 x %_elem_type_of_x], [0 x %_elem_type_of_x]* %.x, i64 0, i64 -1
+  %.sum.promoted = load <2 x double>, <2 x double>* %.sum, align 16
+  br label %_loop_1_do_
+
+_loop_1_do_:                                      ; preds = %_loop_1_do_.lr.ph, %_loop_1_do_
+  %_val_sum_9 = phi <2 x double> [ %.sum.promoted, %_loop_1_do_.lr.ph ], [ %_add_tmp49, %_loop_1_do_ ]
+  %i.08 = phi i64 [ 1, %_loop_1_do_.lr.ph ], [ %_loop_1_update_loop_ix, %_loop_1_do_ ]
+  %x_ix_dim_0_6 = getelementptr %_elem_type_of_x, %_elem_type_of_x* %x_rvo_based_addr_5, i64 %i.08
+  %x_ix_dim_0_ = bitcast %_elem_type_of_x* %x_ix_dim_0_6 to i8*
+  %0 = getelementptr i8, i8* %x_ix_dim_0_, i64 1
+  %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
+  %2 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %1)
+  %.fca.0.extract1 = extractvalue { <16 x i8>, <16 x i8> } %2, 0
+  %.fca.1.extract2 = extractvalue { <16 x i8>, <16 x i8> } %2, 1
+  %3 = getelementptr i8, i8* %x_ix_dim_0_, i64 33
+  %4 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %3)
+  %5 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %4)
+  %.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %5, 0
+  %.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %5, 1
+  %6 = bitcast <16 x i8> %.fca.0.extract1 to <2 x double>
+  %_add_tmp23 = fadd contract <2 x double> %_val_sum_9, %6
+  %7 = bitcast <16 x i8> %.fca.1.extract2 to <2 x double>
+  %_add_tmp32 = fadd contract <2 x double> %_add_tmp23, %7
+  %8 = bitcast <16 x i8> %.fca.0.extract to <2 x double>
+  %_add_tmp40 = fadd contract <2 x double> %_add_tmp32, %8
+  %9 = bitcast <16 x i8> %.fca.1.extract to <2 x double>
+  %_add_tmp49 = fadd contract <2 x double> %_add_tmp40, %9
+  %_loop_1_update_loop_ix = add nuw nsw i64 %i.08, 16
+  %_grt_tmp = icmp sgt i64 %_loop_1_update_loop_ix, %_val_n_2
+  br i1 %_grt_tmp, label %_loop_1_loopHeader_._return_bb_crit_edge, label %_loop_1_do_
+
+_loop_1_loopHeader_._return_bb_crit_edge:         ; preds = %_loop_1_do_
+  store <2 x double> %_add_tmp49, <2 x double>* %.sum, align 16
+  br label %_return_bb
+
+_return_bb:                                       ; preds = %_loop_1_loopHeader_._return_bb_crit_edge, %entry
+  ret void
+}
+
+declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
+declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>)
diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
index a0f855200b682e..0eb633ab3f2c9c 100644
--- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -698,3 +698,315 @@ entry:
 
 declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
 declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
+
+; Function Attrs: nounwind
+define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) {
+; CHECK-LABEL: test_ldst_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp0, 0(r3)
+; CHECK-NEXT:    stxvp vsp0, 0(r4)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_1:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp0, 0(r3)
+; CHECK-BE-NEXT:    stxvp vsp0, 0(r4)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
+  %2 = bitcast <256 x i1>* %vp2 to i8*
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
+
+; Function Attrs: argmemonly nounwind writeonly
+declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
+
+; Function Attrs: nounwind
+define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvpx vsp0, r3, r4
+; CHECK-NEXT:    stxvpx vsp0, r5, r4
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_2:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvpx vsp0, r3, r4
+; CHECK-BE-NEXT:    stxvpx vsp0, r5, r4
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 %offset
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 %offset
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li r5, 18
+; CHECK-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_3:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r5, 18
+; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 18
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 18
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li r5, 1
+; CHECK-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_4:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r5, 1
+; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 1
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 1
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li r5, 42
+; CHECK-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_5:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r5, 42
+; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 42
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 42
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_6(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp0, 4096(r3)
+; CHECK-NEXT:    stxvp vsp0, 4096(r4)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_6:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp0, 4096(r3)
+; CHECK-BE-NEXT:    stxvp vsp0, 4096(r4)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = getelementptr <256 x i1>, <256 x i1>* %vpp, i64 128
+  %1 = bitcast <256 x i1>* %0 to i8*
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = getelementptr <256 x i1>, <256 x i1>* %vp2, i64 128
+  %4 = bitcast <256 x i1>* %3 to i8*
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_7(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; FIXME: A prefixed load (plxvp) is expected here as the offset in this
+; test case is a constant that fits within 34-bits.
+; CHECK-LABEL: test_ldst_7:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li r5, 0
+; CHECK-NEXT:    ori r5, r5, 32799
+; CHECK-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_7:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r5, 0
+; CHECK-BE-NEXT:    ori r5, r5, 32799
+; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 32799
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 32799
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nofree nounwind
+define void @test_ldst_8(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp)  {
+; CHECK-LABEL: test_ldst_8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxv vs1, 32(r3)
+; CHECK-NEXT:    lxv vs0, 48(r3)
+; CHECK-NEXT:    lxv vs3, 0(r3)
+; CHECK-NEXT:    lxv vs2, 16(r3)
+; CHECK-NEXT:    li r3, 8
+; CHECK-NEXT:    lxvpx vsp4, r4, r3
+; CHECK-NEXT:    xxmtacc acc0
+; CHECK-NEXT:    pmxvf64gernn acc0, vsp4, v2, 0, 0
+; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    stxv vs0, 48(r7)
+; CHECK-NEXT:    stxv vs1, 32(r7)
+; CHECK-NEXT:    stxv vs2, 16(r7)
+; CHECK-NEXT:    stxv vs3, 0(r7)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_8:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs3, 48(r3)
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    li r3, 8
+; CHECK-BE-NEXT:    lxvpx vsp4, r4, r3
+; CHECK-BE-NEXT:    xxmtacc acc0
+; CHECK-BE-NEXT:    pmxvf64gernn acc0, vsp4, v2, 0, 0
+; CHECK-BE-NEXT:    xxmfacc acc0
+; CHECK-BE-NEXT:    stxv vs1, 16(r7)
+; CHECK-BE-NEXT:    stxv vs0, 0(r7)
+; CHECK-BE-NEXT:    stxv vs3, 48(r7)
+; CHECK-BE-NEXT:    stxv vs2, 32(r7)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i8* %vqp to <512 x i1>*
+  %1 = load <512 x i1>, <512 x i1>* %0, align 64
+  %2 = bitcast <256 x i1>* %vpp to i8*
+  %3 = getelementptr i8, i8* %2, i64 8
+  %4 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %3)
+  %5 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %1, <256 x i1> %4, <16 x i8> %vc, i32 0, i32 0)
+  %6 = bitcast i8* %resp to <512 x i1>*
+  store <512 x i1> %5, <512 x i1>* %6, align 64
+  ret void
+}
+
+; Function Attrs: nofree nounwind
+define void @test_ldst_9(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp)  {
+; CHECK-LABEL: test_ldst_9:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxv vs1, 32(r3)
+; CHECK-NEXT:    lxv vs0, 48(r3)
+; CHECK-NEXT:    lxv vs3, 0(r3)
+; CHECK-NEXT:    lxv vs2, 16(r3)
+; CHECK-NEXT:    lxvp vsp4, 0(r4)
+; CHECK-NEXT:    xxmtacc acc0
+; CHECK-NEXT:    xvf64gernp acc0, vsp4, v2
+; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    stxv vs0, 48(r7)
+; CHECK-NEXT:    stxv vs1, 32(r7)
+; CHECK-NEXT:    stxv vs2, 16(r7)
+; CHECK-NEXT:    stxv vs3, 0(r7)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_9:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs3, 48(r3)
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxvp vsp4, 0(r4)
+; CHECK-BE-NEXT:    xxmtacc acc0
+; CHECK-BE-NEXT:    xvf64gernp acc0, vsp4, v2
+; CHECK-BE-NEXT:    xxmfacc acc0
+; CHECK-BE-NEXT:    stxv vs1, 16(r7)
+; CHECK-BE-NEXT:    stxv vs0, 0(r7)
+; CHECK-BE-NEXT:    stxv vs3, 48(r7)
+; CHECK-BE-NEXT:    stxv vs2, 32(r7)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i8* %vqp to <512 x i1>*
+  %1 = load <512 x i1>, <512 x i1>* %0, align 64
+  %2 = bitcast <256 x i1>* %vpp to i8*
+  %3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2)
+  %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc)
+  %5 = bitcast i8* %resp to <512 x i1>*
+  store <512 x i1> %4, <512 x i1>* %5, align 64
+  ret void
+}
+
+; Function Attrs: nofree nounwind
+define void @test_ldst_10(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp)  {
+; CHECK-LABEL: test_ldst_10:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxv vs1, 32(r3)
+; CHECK-NEXT:    lxv vs0, 48(r3)
+; CHECK-NEXT:    lxv vs3, 0(r3)
+; CHECK-NEXT:    lxv vs2, 16(r3)
+; CHECK-NEXT:    lxvp vsp4, 0(r5)
+; CHECK-NEXT:    xxmtacc acc0
+; CHECK-NEXT:    xvf64gernp acc0, vsp4, v2
+; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    stxv vs0, 48(r9)
+; CHECK-NEXT:    stxv vs1, 32(r9)
+; CHECK-NEXT:    stxv vs2, 16(r9)
+; CHECK-NEXT:    stxv vs3, 0(r9)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_10:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs3, 48(r3)
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxvp vsp4, 0(r5)
+; CHECK-BE-NEXT:    xxmtacc acc0
+; CHECK-BE-NEXT:    xvf64gernp acc0, vsp4, v2
+; CHECK-BE-NEXT:    xxmfacc acc0
+; CHECK-BE-NEXT:    stxv vs1, 16(r9)
+; CHECK-BE-NEXT:    stxv vs0, 0(r9)
+; CHECK-BE-NEXT:    stxv vs3, 48(r9)
+; CHECK-BE-NEXT:    stxv vs2, 32(r9)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i8* %vqp to <512 x i1>*
+  %1 = load <512 x i1>, <512 x i1>* %0, align 64
+  %2 = bitcast <256 x i1>* %vpp to i8*
+  %3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2)
+  %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc)
+  %5 = bitcast i8* %resp to <512 x i1>*
+  store <512 x i1> %4, <512 x i1>* %5, align 64
+  ret void
+}
diff --git a/llvm/test/MC/WebAssembly/debug-info64.ll b/llvm/test/MC/WebAssembly/debug-info64.ll
new file mode 100644
index 00000000000000..48f46ee10694f5
--- /dev/null
+++ b/llvm/test/MC/WebAssembly/debug-info64.ll
@@ -0,0 +1,289 @@
+; RUN: llc -filetype=obj %s -o - | llvm-readobj -r -S --symbols - | FileCheck %s
+
+; CHECK:      Format: WASM
+; CHECK-NEXT: Arch: wasm64
+; CHECK-NEXT: AddressSize: 64bit
+; CHECK-NEXT: Sections [
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: TYPE (0x1)
+; CHECK-NEXT:     Size: 4
+; CHECK-NEXT:     Offset: 8
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: IMPORT (0x2)
+; CHECK-NEXT:     Size: 81
+; CHECK-NEXT:     Offset: 18
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: FUNCTION (0x3)
+; CHECK-NEXT:     Size: 2
+; CHECK-NEXT:     Offset: 105
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: ELEM (0x9)
+; CHECK-NEXT:     Size: 7
+; CHECK-NEXT:     Offset: 113
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: DATACOUNT (0xC)
+; CHECK-NEXT:     Size: 1
+; CHECK-NEXT:     Offset: 126
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: CODE (0xA)
+; CHECK-NEXT:     Size: 4
+; CHECK-NEXT:     Offset: 133
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: DATA (0xB)
+; CHECK-NEXT:     Size: 27
+; CHECK-NEXT:     Offset: 143
+; CHECK-NEXT:     Segments [
+; CHECK-NEXT:       Segment {
+; CHECK-NEXT:         Name: .data.foo
+; CHECK-NEXT:         Size: 8
+; CHECK-NEXT:         Offset: 0
+; CHECK-NEXT:       }
+; CHECK-NEXT:       Segment {
+; CHECK-NEXT:         Name: .data.ptr2
+; CHECK-NEXT:         Size: 8
+; CHECK-NEXT:         Offset: 8
+; CHECK-NEXT:       }
+; CHECK-NEXT:     ]
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: CUSTOM (0x0)
+; CHECK-NEXT:     Size: 86
+; CHECK-NEXT:     Offset: 176
+; CHECK-NEXT:     Name: .debug_abbrev
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: CUSTOM (0x0)
+; CHECK-NEXT:     Size: 130
+; CHECK-NEXT:     Offset: 282
+; CHECK-NEXT:     Name: .debug_info
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: CUSTOM (0x0)
+; CHECK-NEXT:     Size: 121
+; CHECK-NEXT:     Offset: 430
+; CHECK-NEXT:     Name: .debug_str
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: CUSTOM (0x0)
+; CHECK-NEXT:     Size: 42
+; CHECK-NEXT:     Offset: 568
+; CHECK-NEXT:     Name: .debug_pubnames
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: CUSTOM (0x0)
+; CHECK-NEXT:     Size: 26
+; CHECK-NEXT:     Offset: 632
+; CHECK-NEXT:     Name: .debug_pubtypes
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: CUSTOM (0x0)
+; CHECK-NEXT:     Size: 61
+; CHECK-NEXT:     Offset: 680
+; CHECK-NEXT:     Name: .debug_line
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: CUSTOM (0x0)
+; CHECK-NEXT:     Size: 91
+; CHECK-NEXT:     Offset: 759
+; CHECK-NEXT:     Name: linking
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: CUSTOM (0x0)
+; CHECK-NEXT:     Size: 9
+; CHECK-NEXT:     Offset: 864
+; CHECK-NEXT:     Name: reloc.DATA
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: CUSTOM (0x0)
+; CHECK-NEXT:     Size: 61
+; CHECK-NEXT:     Offset: 890
+; CHECK-NEXT:     Name: reloc..debug_info
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: CUSTOM (0x0)
+; CHECK-NEXT:     Size: 6
+; CHECK-NEXT:     Offset: 975
+; CHECK-NEXT:     Name: reloc..debug_pubnames
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: CUSTOM (0x0)
+; CHECK-NEXT:     Size: 6
+; CHECK-NEXT:     Offset: 1009
+; CHECK-NEXT:     Name: reloc..debug_pubtypes
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: CUSTOM (0x0)
+; CHECK-NEXT:     Size: 6
+; CHECK-NEXT:     Offset: 1043
+; CHECK-NEXT:     Name: reloc..debug_line
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section {
+; CHECK-NEXT:     Type: CUSTOM (0x0)
+; CHECK-NEXT:     Size: 77
+; CHECK-NEXT:     Offset: 1073
+; CHECK-NEXT:     Name: producers
+; CHECK-NEXT:   }
+; CHECK-NEXT: ]
+; CHECK-NEXT: Relocations [
+; CHECK-NEXT:   Section (7) DATA {
+; CHECK-NEXT:     0x6 R_WASM_MEMORY_ADDR_I64 myextern 0
+; CHECK-NEXT:     0x13 R_WASM_TABLE_INDEX_I64 f2
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section (9) .debug_info {
+; CHECK-NEXT:     0x6 R_WASM_SECTION_OFFSET_I32 .debug_abbrev 0
+; CHECK-NEXT:     0xC R_WASM_SECTION_OFFSET_I32 .debug_str 0
+; CHECK-NEXT:     0x12 R_WASM_SECTION_OFFSET_I32 .debug_str 55
+; CHECK-NEXT:     0x16 R_WASM_SECTION_OFFSET_I32 .debug_line 0
+; CHECK-NEXT:     0x1A R_WASM_SECTION_OFFSET_I32 .debug_str 62
+; CHECK-NEXT:     0x1E R_WASM_FUNCTION_OFFSET_I64 f2 0
+; CHECK-NEXT:     0x2B R_WASM_SECTION_OFFSET_I32 .debug_str 105
+; CHECK-NEXT:     0x37 R_WASM_MEMORY_ADDR_I64 foo 0
+; CHECK-NEXT:     0x45 R_WASM_SECTION_OFFSET_I32 .debug_str 109
+; CHECK-NEXT:     0x4C R_WASM_SECTION_OFFSET_I32 .debug_str 113
+; CHECK-NEXT:     0x58 R_WASM_MEMORY_ADDR_I64 ptr2 0
+; CHECK-NEXT:     0x67 R_WASM_FUNCTION_OFFSET_I64 f2 0
+; CHECK-NEXT:     0x76 R_WASM_GLOBAL_INDEX_I32 __stack_pointer
+; CHECK-NEXT:     0x7B R_WASM_SECTION_OFFSET_I32 .debug_str 118
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section (11) .debug_pubnames {
+; CHECK-NEXT:     0x6 R_WASM_SECTION_OFFSET_I32 .debug_info 0
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section (12) .debug_pubtypes {
+; CHECK-NEXT:     0x6 R_WASM_SECTION_OFFSET_I32 .debug_info 0
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section (13) .debug_line {
+; CHECK-NEXT:     0x2B R_WASM_FUNCTION_OFFSET_I64 f2 0
+; CHECK-NEXT:   }
+; CHECK-NEXT: ]
+; CHECK-NEXT: Symbols [
+; CHECK-NEXT:   Symbol {
+; CHECK-NEXT:     Name: f2
+; CHECK-NEXT:     Type: FUNCTION (0x0)
+; CHECK-NEXT:     Flags [ (0x4)
+; CHECK-NEXT:       VISIBILITY_HIDDEN (0x4)
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     ElementIndex: 0x0
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Symbol {
+; CHECK-NEXT:     Name: foo
+; CHECK-NEXT:     Type: DATA (0x1)
+; CHECK-NEXT:     Flags [ (0x4)
+; CHECK-NEXT:       VISIBILITY_HIDDEN (0x4)
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     Offset: 0x0
+; CHECK-NEXT:     Segment: 0x0
+; CHECK-NEXT:     Size: 0x8
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Symbol {
+; CHECK-NEXT:     Name: myextern
+; CHECK-NEXT:     Type: DATA (0x1)
+; CHECK-NEXT:     Flags [ (0x10)
+; CHECK-NEXT:       UNDEFINED (0x10)
+; CHECK-NEXT:     ]
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Symbol {
+; CHECK-NEXT:     Name: ptr2
+; CHECK-NEXT:     Type: DATA (0x1)
+; CHECK-NEXT:     Flags [ (0x4)
+; CHECK-NEXT:       VISIBILITY_HIDDEN (0x4)
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     Offset: 0x0
+; CHECK-NEXT:     Segment: 0x1
+; CHECK-NEXT:     Size: 0x8
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Symbol {
+; CHECK-NEXT:     Name: .debug_abbrev
+; CHECK-NEXT:     Type: SECTION (0x3)
+; CHECK-NEXT:     Flags [ (0x2)
+; CHECK-NEXT:       BINDING_LOCAL (0x2)
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     ElementIndex: 0x7
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Symbol {
+; CHECK-NEXT:     Name: .debug_info
+; CHECK-NEXT:     Type: SECTION (0x3)
+; CHECK-NEXT:     Flags [ (0x2)
+; CHECK-NEXT:       BINDING_LOCAL (0x2)
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     ElementIndex: 0x8
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Symbol {
+; CHECK-NEXT:     Name: __stack_pointer
+; CHECK-NEXT:     Type: GLOBAL (0x2)
+; CHECK-NEXT:     Flags [ (0x10)
+; CHECK-NEXT:       UNDEFINED (0x10)
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     ImportName: __stack_pointer
+; CHECK-NEXT:     ImportModule: env
+; CHECK-NEXT:     ElementIndex: 0x0
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Symbol {
+; CHECK-NEXT:     Name: .debug_str
+; CHECK-NEXT:     Type: SECTION (0x3)
+; CHECK-NEXT:     Flags [ (0x2)
+; CHECK-NEXT:       BINDING_LOCAL (0x2)
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     ElementIndex: 0x9
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Symbol {
+; CHECK-NEXT:     Name: .debug_line
+; CHECK-NEXT:     Type: SECTION (0x3)
+; CHECK-NEXT:     Flags [ (0x2)
+; CHECK-NEXT:       BINDING_LOCAL (0x2)
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     ElementIndex: 0xC
+; CHECK-NEXT:   }
+; CHECK-NEXT: ]
+
+; generated from the following C code using: clang --target=wasm64 -g -O0 -S -emit-llvm test.c
+; extern int myextern;
+; void f2(void) { return; }
+;
+; int* foo = &myextern;
+; void (*ptr2)(void) = f2;
+
+target triple = "wasm64-unknown-unknown"
+
+source_filename = "test.c"
+
+@myextern = external global i32, align 4
+@foo = hidden global i32* @myextern, align 4, !dbg !0
+@ptr2 = hidden global void ()* @f2, align 4, !dbg !6
+
+; Function Attrs: noinline nounwind optnone
+define hidden void @f2() #0 !dbg !17 {
+entry:
+  ret void, !dbg !18
+}
+
+attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!13, !14, !15}
+!llvm.ident = !{!16}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "foo", scope: !2, file: !3, line: 4, type: !11, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 7.0.0 (trunk 332303) (llvm/trunk 332406)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5)
+!3 = !DIFile(filename: "test.c", directory: "/usr/local/google/home/sbc/dev/wasm/simple")
+!4 = !{}
+!5 = !{!0, !6}
+!6 = !DIGlobalVariableExpression(var: !7, expr: !DIExpression())
+!7 = distinct !DIGlobalVariable(name: "ptr2", scope: !2, file: !3, line: 5, type: !8, isLocal: false, isDefinition: true)
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 32)
+!9 = !DISubroutineType(types: !10)
+!10 = !{null}
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 32)
+!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{i32 1, !"wchar_size", i32 4}
+!16 = !{!"clang version 7.0.0 (trunk 332303) (llvm/trunk 332406)"}
+!17 = distinct !DISubprogram(name: "f2", scope: !3, file: !3, line: 2, type: !9, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !2, retainedNodes: !4)
+!18 = !DILocation(line: 2, column: 17, scope: !17)
diff --git a/llvm/test/MC/WebAssembly/dwarfdump64.ll b/llvm/test/MC/WebAssembly/dwarfdump64.ll
index 0166858c913425..ff711ed5398822 100644
--- a/llvm/test/MC/WebAssembly/dwarfdump64.ll
+++ b/llvm/test/MC/WebAssembly/dwarfdump64.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -filetype=obj %s -o - | llvm-dwarfdump --show-form - | FileCheck %s
 
 ; CHECK: .debug_info contents:
-; CHECK-NEXT: 0x00000000: Compile Unit: length = 0x0000006e, format = DWARF32, version = 0x0004, abbr_offset = 0x0000, addr_size = 0x04 (next unit at 0x00000072)
+; CHECK-NEXT: 0x00000000: Compile Unit: length = 0x0000007e, format = DWARF32, version = 0x0004, abbr_offset = 0x0000, addr_size = 0x08 (next unit at 0x00000082)
 
 ; CHECK: 0x0000000b: DW_TAG_compile_unit
 ; CHECK-NEXT:              DW_AT_producer [DW_FORM_strp]     ("clang version 6.0.0 (trunk 315924) (llvm/trunk 315960)")
@@ -11,46 +11,41 @@
 ; CHECK-NEXT:              DW_AT_stmt_list [DW_FORM_sec_offset]      (0x00000000)
 ; CHECK-NEXT:              DW_AT_comp_dir [DW_FORM_strp]     ("/usr/local/google/home/sbc/dev/wasm/simple")
 ; CHECK-NEXT:              DW_AT_GNU_pubnames [DW_FORM_flag_present] (true)
-; CHECK-NEXT:              DW_AT_low_pc [DW_FORM_addr]       (0x00000002)
+; CHECK-NEXT:              DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000002)
 ; CHECK-NEXT:              DW_AT_high_pc [DW_FORM_data4]     (0x00000002)
 
-; CHECK: 0x00000026:   DW_TAG_variable
+; CHECK: 0x0000002a:   DW_TAG_variable
 ; CHECK-NEXT:                DW_AT_name [DW_FORM_strp]       ("foo")
-; CHECK-NEXT:                DW_AT_type [DW_FORM_ref4]       (0x00000037 "int*")
+; CHECK-NEXT:                DW_AT_type [DW_FORM_ref4]       (0x0000003f "int*")
 ; CHECK-NEXT:                DW_AT_external [DW_FORM_flag_present]   (true)
 ; CHECK-NEXT:                DW_AT_decl_file [DW_FORM_data1] ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c")
 ; CHECK-NEXT:                DW_AT_decl_line [DW_FORM_data1] (4)
 ; CHECK-NEXT:                DW_AT_location [DW_FORM_exprloc]        (DW_OP_addr 0x0)
 
-; CHECK: 0x00000037:   DW_TAG_pointer_type
-; CHECK-NEXT:                DW_AT_type [DW_FORM_ref4]	(0x0000003c "int")
+; CHECK: 0x0000003f:   DW_TAG_pointer_type
+; CHECK-NEXT:                DW_AT_type [DW_FORM_ref4]       (0x00000044 "int")
 
-; CHECK: 0x0000003c:   DW_TAG_base_type
+; CHECK: 0x00000044:   DW_TAG_base_type
 ; CHECK-NEXT:                DW_AT_name [DW_FORM_strp]       ("int")
 ; CHECK-NEXT:                DW_AT_encoding [DW_FORM_data1]  (DW_ATE_signed)
 ; CHECK-NEXT:                DW_AT_byte_size [DW_FORM_data1] (0x04)
 
-; CHECK: 0x00000043:   DW_TAG_variable
+; CHECK: 0x0000004b:   DW_TAG_variable
 ; CHECK-NEXT:                DW_AT_name [DW_FORM_strp]       ("ptr2")
-; CHECK-NEXT:                DW_AT_type [DW_FORM_ref4]       (0x00000054 "void()*")
+; CHECK-NEXT:                DW_AT_type [DW_FORM_ref4]       (0x00000060 "void()*")
 ; CHECK-NEXT:                DW_AT_external [DW_FORM_flag_present]   (true)
 ; CHECK-NEXT:                DW_AT_decl_file [DW_FORM_data1] ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c")
 ; CHECK-NEXT:                DW_AT_decl_line [DW_FORM_data1] (5)
-
-
-
-; TODO: is this correct?
-
 ; CHECK-NEXT:                DW_AT_location [DW_FORM_exprloc]        (DW_OP_addr 0x8)
 
-; CHECK: 0x00000054:   DW_TAG_pointer_type
-; CHECK-NEXT:                DW_AT_type [DW_FORM_ref4]	(0x00000059 "void()")
+; CHECK: 0x00000060:   DW_TAG_pointer_type
+; CHECK-NEXT:                DW_AT_type [DW_FORM_ref4]       (0x00000065 "void()")
 
-; CHECK: 0x00000059:   DW_TAG_subroutine_type
-; CHECK-NEXT:                DW_AT_prototyped [DW_FORM_flag_present]	(true)
+; CHECK: 0x00000065:   DW_TAG_subroutine_type
+; CHECK-NEXT:                DW_AT_prototyped [DW_FORM_flag_present] (true)
 
-; CHECK: 0x0000005a:   DW_TAG_subprogram
-; CHECK-NEXT:                DW_AT_low_pc [DW_FORM_addr]     (0x00000002)
+; CHECK: 0x00000066:   DW_TAG_subprogram
+; CHECK-NEXT:                DW_AT_low_pc [DW_FORM_addr]     (0x0000000000000002)
 ; CHECK-NEXT:                DW_AT_high_pc [DW_FORM_data4]   (0x00000002)
 ; CHECK-NEXT:                DW_AT_frame_base [DW_FORM_exprloc]      (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value)
 ; CHECK-NEXT:                DW_AT_name [DW_FORM_strp]       ("f2")
@@ -59,7 +54,7 @@
 ; CHECK-NEXT:                DW_AT_prototyped [DW_FORM_flag_present] (true)
 ; CHECK-NEXT:                DW_AT_external [DW_FORM_flag_present]   (true)
 
-; CHECK: 0x00000071:   NULL
+; CHECK: 0x00000081:   NULL
 
 target triple = "wasm64-unknown-unknown"
 
diff --git a/llvm/test/Transforms/FunctionImport/Inputs/cg_profile.ll b/llvm/test/Transforms/FunctionImport/Inputs/cg_profile.ll
new file mode 100644
index 00000000000000..5143575e1712b7
--- /dev/null
+++ b/llvm/test/Transforms/FunctionImport/Inputs/cg_profile.ll
@@ -0,0 +1,12 @@
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%class.A = type { [16 x i8] }
+
+define void @bar(%class.A*) {
+  ret void
+}
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"EnableSplitLTOUnit", i32 0}
diff --git a/llvm/test/Transforms/FunctionImport/cg_profile.ll b/llvm/test/Transforms/FunctionImport/cg_profile.ll
new file mode 100644
index 00000000000000..2cf920c2a5ddcc
--- /dev/null
+++ b/llvm/test/Transforms/FunctionImport/cg_profile.ll
@@ -0,0 +1,32 @@
+; Check that bitcast in "CG Profile" related metadata nodes (in this test case,
+; generated during function importing in IRMover's RAUW operations) are accepted
+; by verifier.
+; RUN: opt -cg-profile -module-summary %s -o %t.bc
+; RUN: opt -module-summary %p/Inputs/cg_profile.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+; RUN: opt -function-import -print-imports -summary-file %t3.thinlto.bc %t.bc \
+; RUN:   -S 2>&1 | FileCheck %s
+
+; CHECK:      !0 = !{i32 1, !"EnableSplitLTOUnit", i32 0}
+; CHECK-NEXT: !1 = !{i32 5, !"CG Profile", !2}
+; CHECK-NEXT: !2 = !{!3}
+; CHECK-NEXT: !3 = !{void ()* @foo, void (%class.A*)* bitcast (void (%class.A.0*)* @bar to void (%class.A*)*), i64 2753}
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; %class.A is defined differently in %p/Inputs/cg_profile.ll. This is to trigger
+; bitcast.
+%class.A = type { i8 }
+
+define void @foo() !prof !2 {
+  call void @bar(%class.A* null)
+  ret void
+}
+
+declare void @bar(%class.A*)
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"EnableSplitLTOUnit", i32 0}
+!2 = !{!"function_entry_count", i64 2753}
diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll
index ce9657433bb785..9868ed1cdf5758 100644
--- a/llvm/test/Transforms/InstCombine/sub-gep.ll
+++ b/llvm/test/Transforms/InstCombine/sub-gep.ll
@@ -143,8 +143,8 @@ define i64 @test_inbounds_nuw_multi_index([0 x [2 x i32]]* %base, i64 %idx, i64
 ; CHECK-LABEL: @test_inbounds_nuw_multi_index(
 ; CHECK-NEXT:    [[P2_IDX:%.*]] = shl nsw i64 [[IDX:%.*]], 3
 ; CHECK-NEXT:    [[P2_IDX1:%.*]] = shl nsw i64 [[IDX2:%.*]], 2
-; CHECK-NEXT:    [[P2_OFFS2:%.*]] = add i64 [[P2_IDX1]], [[P2_IDX]]
-; CHECK-NEXT:    ret i64 [[P2_OFFS2]]
+; CHECK-NEXT:    [[P2_OFFS:%.*]] = add nsw i64 [[P2_IDX]], [[P2_IDX1]]
+; CHECK-NEXT:    ret i64 [[P2_OFFS]]
 ;
   %p1 = getelementptr inbounds [0 x [2 x i32]], [0 x [2 x i32]]* %base, i64 0, i64 0, i64 0
   %p2 = getelementptr inbounds [0 x [2 x i32]], [0 x [2 x i32]]* %base, i64 0, i64 %idx, i64 %idx2
diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index 3e9c066dfbf3ac..d9c67d0568f74a 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -1075,8 +1075,8 @@ define i64 @test58([100 x [100 x i8]]* %foo, i64 %i, i64 %j) {
 ; "%sub = i64 %i, %j, ret i64 %sub"
 ; gep1 and gep2 have only one use
 ; CHECK-LABEL: @test58(
-; CHECK-NEXT:    [[GEP1_OFFS:%.*]] = add i64 [[I:%.*]], 4200
-; CHECK-NEXT:    [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200
+; CHECK-NEXT:    [[GEP1_OFFS:%.*]] = add nsw i64 [[I:%.*]], 4200
+; CHECK-NEXT:    [[GEP2_OFFS:%.*]] = add nsw i64 [[J:%.*]], 4200
 ; CHECK-NEXT:    [[GEPDIFF:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]]
 ; CHECK-NEXT:    ret i64 [[GEPDIFF]]
 ;
diff --git a/llvm/test/Transforms/LoopFusion/simple.ll b/llvm/test/Transforms/LoopFusion/simple.ll
index 97591ddf5d9975..bb4cf17d84697c 100644
--- a/llvm/test/Transforms/LoopFusion/simple.ll
+++ b/llvm/test/Transforms/LoopFusion/simple.ll
@@ -303,9 +303,8 @@ define void @forward_dep(i32* noalias %arg) {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    br label [[BB7:%.*]]
 ; CHECK:       bb7:
-; CHECK-NEXT:    [[DOT013:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB25:%.*]] ]
-; CHECK-NEXT:    [[INDVARS_IV22:%.*]] = phi i64 [ 0, [[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[BB25]] ]
-; CHECK-NEXT:    [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB25]] ], [ 0, [[BB]] ]
+; CHECK-NEXT:    [[DOT013:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB14:%.*]] ]
+; CHECK-NEXT:    [[INDVARS_IV22:%.*]] = phi i64 [ 0, [[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[BB14]] ]
 ; CHECK-NEXT:    [[TMP:%.*]] = add nsw i32 [[DOT013]], -3
 ; CHECK-NEXT:    [[TMP8:%.*]] = add nuw nsw i64 [[INDVARS_IV22]], 3
 ; CHECK-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
@@ -314,8 +313,16 @@ define void @forward_dep(i32* noalias %arg) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = srem i32 [[TMP10]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[ARG:%.*]], i64 [[INDVARS_IV22]]
 ; CHECK-NEXT:    store i32 [[TMP12]], i32* [[TMP13]], align 4
-; CHECK-NEXT:    br label [[BB14:%.*]]
+; CHECK-NEXT:    br label [[BB14]]
 ; CHECK:       bb14:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
+; CHECK-NEXT:    [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
+; CHECK-NEXT:    [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND4]], label [[BB7]], label [[BB19_PREHEADER:%.*]]
+; CHECK:       bb19.preheader:
+; CHECK-NEXT:    br label [[BB19:%.*]]
+; CHECK:       bb19:
+; CHECK-NEXT:    [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB25:%.*]] ], [ 0, [[BB19_PREHEADER]] ]
 ; CHECK-NEXT:    [[TMP20:%.*]] = add nsw i64 [[INDVARS_IV1]], -3
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[ARG]], i64 [[TMP20]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4
@@ -324,12 +331,9 @@ define void @forward_dep(i32* noalias %arg) {
 ; CHECK-NEXT:    store i32 [[TMP23]], i32* [[TMP24]], align 4
 ; CHECK-NEXT:    br label [[BB25]]
 ; CHECK:       bb25:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
-; CHECK-NEXT:    [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
-; CHECK-NEXT:    [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[BB7]], label [[BB26:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[BB19]], label [[BB26:%.*]]
 ; CHECK:       bb26:
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp b/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp
index 6a54084cacafbc..c6a7f8e87bb01e 100644
--- a/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp
@@ -392,6 +392,71 @@ TEST_F(AArch64GISelMITest, MatchMiscellaneous) {
   match = mi_match(MIBAdd.getReg(0), *MRI, m_OneUse(m_GAdd(m_Reg(), m_Reg())));
   EXPECT_FALSE(match);
 }
+
+TEST_F(AArch64GISelMITest, MatchSpecificConstant) {
+  setUp();
+  if (!TM)
+    return;
+
+  // Basic case: Can we match a G_CONSTANT with a specific value?
+  auto FortyTwo = B.buildConstant(LLT::scalar(64), 42);
+  EXPECT_TRUE(mi_match(FortyTwo.getReg(0), *MRI, m_SpecificICst(42)));
+  EXPECT_FALSE(mi_match(FortyTwo.getReg(0), *MRI, m_SpecificICst(123)));
+
+  // Test that this works inside of a more complex pattern.
+  LLT s64 = LLT::scalar(64);
+  auto MIBAdd = B.buildAdd(s64, Copies[0], FortyTwo);
+  EXPECT_TRUE(mi_match(MIBAdd.getReg(2), *MRI, m_SpecificICst(42)));
+
+  // Wrong constant.
+  EXPECT_FALSE(mi_match(MIBAdd.getReg(2), *MRI, m_SpecificICst(123)));
+
+  // No constant on the LHS.
+  EXPECT_FALSE(mi_match(MIBAdd.getReg(1), *MRI, m_SpecificICst(42)));
+}
+
+TEST_F(AArch64GISelMITest, MatchZeroInt) {
+  setUp();
+  if (!TM)
+    return;
+  auto Zero = B.buildConstant(LLT::scalar(64), 0);
+  EXPECT_TRUE(mi_match(Zero.getReg(0), *MRI, m_ZeroInt()));
+
+  auto FortyTwo = B.buildConstant(LLT::scalar(64), 42);
+  EXPECT_FALSE(mi_match(FortyTwo.getReg(0), *MRI, m_ZeroInt()));
+}
+
+TEST_F(AArch64GISelMITest, MatchNeg) {
+  setUp();
+  if (!TM)
+    return;
+
+  LLT s64 = LLT::scalar(64);
+  auto Zero = B.buildConstant(LLT::scalar(64), 0);
+  auto NegInst = B.buildSub(s64, Zero, Copies[0]);
+  Register NegatedReg;
+
+  // Match: G_SUB = 0, %Reg
+  EXPECT_TRUE(mi_match(NegInst.getReg(0), *MRI, m_Neg(m_Reg(NegatedReg))));
+  EXPECT_EQ(NegatedReg, Copies[0]);
+
+  // Don't match: G_SUB = %Reg, 0
+  auto NotNegInst1 = B.buildSub(s64, Copies[0], Zero);
+  EXPECT_FALSE(mi_match(NotNegInst1.getReg(0), *MRI, m_Neg(m_Reg(NegatedReg))));
+
+  // Don't match: G_SUB = 42, %Reg
+  auto FortyTwo = B.buildConstant(LLT::scalar(64), 42);
+  auto NotNegInst2 = B.buildSub(s64, FortyTwo, Copies[0]);
+  EXPECT_FALSE(mi_match(NotNegInst2.getReg(0), *MRI, m_Neg(m_Reg(NegatedReg))));
+
+  // Complex testcase.
+  // %sub = G_SUB = 0, %negated_reg
+  // %add = G_ADD = %x, %sub
+  auto AddInst = B.buildAdd(s64, Copies[1], NegInst);
+  NegatedReg = Register();
+  EXPECT_TRUE(mi_match(AddInst.getReg(2), *MRI, m_Neg(m_Reg(NegatedReg))));
+  EXPECT_EQ(NegatedReg, Copies[0]);
+}
 } // namespace
 
 int main(int argc, char **argv) {
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn
index c929f88dec73d1..e6ea3b5c56c676 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn
@@ -1,5 +1,6 @@
 import("//clang-tools-extra/clangd/xpc/enable.gni")
 import("//llvm/triples.gni")
+import("//llvm/utils/gn/build/libs/zlib/enable.gni")
 import("//llvm/utils/gn/build/write_cmake_config.gni")
 import("clangd_lit_site_cfg_files.gni")
 
@@ -42,6 +43,12 @@ write_lit_config("lit_site_cfg") {
   } else {
     extra_values += [ "CLANGD_BUILD_XPC=0" ]
   }
+
+  if (llvm_enable_zlib) {
+    extra_values += [ "LLVM_ENABLE_ZLIB=1" ]
+  } else {
+    extra_values += [ "LLVM_ENABLE_ZLIB=0" ]
+  }
 }
 
 write_lit_config("lit_unit_site_cfg") {