From 5c4fc581d5fe8427f03ec90b0d745453398aa3ad Mon Sep 17 00:00:00 2001
From: Amy Huang <akhuang@google.com>
Date: Tue, 29 Sep 2020 16:19:08 -0700
Subject: [PATCH 01/12] [DebugInfo] Add types from constructor homing to the
 retained types list.

Add class types to the retained types list to make sure they
don't get dropped if the constructor is optimized out later.

Differential Revision: https://reviews.llvm.org/D88522
---
 clang/lib/CodeGen/CGDebugInfo.cpp                 | 2 +-
 clang/test/CodeGenCXX/debug-info-limited-ctor.cpp | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 27c584ff0795a9..88aace8b85dd18 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -1726,7 +1726,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
   // info is emitted.
   if (DebugKind == codegenoptions::DebugInfoConstructor)
     if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(Method))
-      completeClass(CD->getParent());
+      completeUnusedClass(*CD->getParent());
 
   llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit);
   llvm::DISubprogram *SP = DBuilder.createMethod(
diff --git a/clang/test/CodeGenCXX/debug-info-limited-ctor.cpp b/clang/test/CodeGenCXX/debug-info-limited-ctor.cpp
index cf2e89e35522f7..cf7adad6b44929 100644
--- a/clang/test/CodeGenCXX/debug-info-limited-ctor.cpp
+++ b/clang/test/CodeGenCXX/debug-info-limited-ctor.cpp
@@ -9,7 +9,7 @@ struct B {
   B();
 } TestB;
 
-// CHECK-DAG: !DICompositeType(tag: DW_TAG_structure_type, name: "C"{{.*}}DIFlagTypePassByValue
+// CHECK-DAG: ![[C:[0-9]+]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C"{{.*}}DIFlagTypePassByValue
 struct C {
   C() {}
 } TestC;
@@ -73,3 +73,7 @@ void f(K k) {}
 void L() {
   auto func = [&]() {};
 }
+
+// Check that types are being added to retained types list.
+// CHECK-DAG: !DICompileUnit{{.*}}retainedTypes: ![[RETAINED:[0-9]+]]
+// CHECK-DAG: ![[RETAINED]] = {{.*}}![[C]]

From f71849c74ed58e5d9ed3681cc6294128098012dc Mon Sep 17 00:00:00 2001
From: Vedant Kumar <vsk@apple.com>
Date: Tue, 29 Sep 2020 17:07:06 -0700
Subject: [PATCH 02/12] [docs] Recommend dropLocation() over
 setDebugLoc(DebugLoc())

---
 llvm/docs/HowToUpdateDebugInfo.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/HowToUpdateDebugInfo.rst b/llvm/docs/HowToUpdateDebugInfo.rst
index 3283bfd893393b..7df2a8a2582759 100644
--- a/llvm/docs/HowToUpdateDebugInfo.rst
+++ b/llvm/docs/HowToUpdateDebugInfo.rst
@@ -117,7 +117,7 @@ When to drop an instruction location
 A transformation should drop debug locations if the rules for
 :ref:`preserving<WhenToPreserveLocation>` and
 :ref:`merging<WhenToMergeLocation>` debug locations do not apply. The API to
-use is ``Instruction::setDebugLoc()``.
+use is ``Instruction::dropLocation()``.
 
 The purpose of this rule is to prevent erratic or misleading single-stepping
 behavior in situations in which an instruction has no clear, unambiguous

From 616c68aab75016d5d7ebc0b79bb3c38405b18ae6 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Tue, 29 Sep 2020 14:38:56 -0700
Subject: [PATCH 03/12] [NFC][MSAN] Remove an attribute in test

---
 llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll
index a8ce0561c3b87a..54493c9cdc8c5f 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll
@@ -7,7 +7,7 @@
 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define <4 x i64> @test_mm256_abs_epi8(<4 x i64> noundef %a) local_unnamed_addr #0 {
+define <4 x i64> @test_mm256_abs_epi8(<4 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-LABEL: @test_mm256_abs_epi8(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8

From 795d94fdb9d2377452f86952dcf0921a6c68d2b5 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Tue, 29 Sep 2020 15:31:25 -0700
Subject: [PATCH 04/12] [NFC][Msan] Add llvm.fabs test

llvm.fabs does not need a special handler as llvm.abs as its
single argument type match the return type.
---
 .../MemorySanitizer/abs-vector.ll             | 25 ++++++++++++++++---
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll
index 54493c9cdc8c5f..d3b29d65f2f224 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -S -msan-check-access-address=0 -passes=msan 2>&1 | FileCheck %s
-; RUN: opt < %s -S -msan-check-access-address=0 -msan | FileCheck %s
-; RUN: opt < %s -S -msan-check-access-address=0 -msan-track-origins=2 -passes=msan 2>&1 | FileCheck %s --check-prefixes=CHECK,ORIGIN
-; RUN: opt < %s -S -msan-check-access-address=0 -msan-track-origins=2 -msan | FileCheck %s --check-prefixes=CHECK,ORIGIN
+; RUN: opt %s -S -msan-check-access-address=0 -passes=msan 2>&1 | FileCheck %s
+; RUN: opt %s -S -msan-check-access-address=0 -msan | FileCheck %s
+; RUN: opt %s -S -msan-check-access-address=0 -msan-track-origins=2 -passes=msan 2>&1 | FileCheck %s --check-prefixes=CHECK,ORIGIN
+; RUN: opt %s -S -msan-check-access-address=0 -msan-track-origins=2 -msan | FileCheck %s --check-prefixes=CHECK,ORIGIN
 
 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -73,9 +73,26 @@ entry:
   ret <4 x i64> %2
 }
 
+define <4 x double> @test_fabs(<4 x double> %a) local_unnamed_addr #0 {
+; CHECK-LABEL: @test_fabs(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8
+; ORIGIN-NEXT:   [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4
+; CHECK:         call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> [[A:%.*]])
+; CHECK-NEXT:    store <4 x i64> [[TMP0]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8
+; ORIGIN-NEXT:   store i32 [[TMP1]], i32* @__msan_retval_origin_tls, align 4
+; CHECK:         ret <4 x double> [[TMP2]]
+;
+entry:
+  %0 = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %a)
+  ret <4 x double> %0
+}
+
 declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1 immarg) #1
 declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1 immarg) #1
 declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1 immarg) #1
+declare <4 x double> @llvm.fabs.v4f64(<4 x double>) #1
 
 attributes #0 = { nounwind readnone sanitize_memory }
 attributes #1 = { nounwind readnone speculatable willreturn }

From afcf9c47c5e74a0b567531547b677ff1d383ae50 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Tue, 29 Sep 2020 17:08:42 -0700
Subject: [PATCH 05/12] Fix test failures with trunk clang

- Make the consteval constructor for the zero type be noexcept
- Don't expect three-way comparison of 0 against a comparison category
  to fail
---
 libcxx/include/compare                                      | 2 +-
 .../cmp/cmp.categories.pre/zero_type.verify.cpp             | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libcxx/include/compare b/libcxx/include/compare
index c1cd81bb6fc1ac..596505f8860d4b 100644
--- a/libcxx/include/compare
+++ b/libcxx/include/compare
@@ -156,7 +156,7 @@ enum class _LIBCPP_ENUM_VIS _NCmpResult : signed char {
 
 struct _CmpUnspecifiedParam {
   _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEVAL
-  _CmpUnspecifiedParam(int _CmpUnspecifiedParam::*) {}
+  _CmpUnspecifiedParam(int _CmpUnspecifiedParam::*) noexcept {}
 
   template<typename _Tp, typename = _VSTD::enable_if_t<!_VSTD::is_same_v<_Tp, int>>>
   _CmpUnspecifiedParam(_Tp) = delete;
diff --git a/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp b/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp
index 40f6677d43c9be..fc21c03a3ddf07 100644
--- a/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp
+++ b/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp
@@ -46,9 +46,9 @@ void test_category(T v) {
   void(0 > v);
   void(v >= 0);
   void(0 >= v);
-#ifndef _LIBCPP_HAS_NO_THREE_WAY_COMPARISON
-  void(v <=> 0); // expected-error 3 {{}}
-  void(0 <=> v); // expected-error 3 {{}}
+#ifndef _LIBCPP_HAS_NO_SPACESHIP_OPERATOR
+  void(v <=> 0);
+  void(0 <=> v);
 #endif
 }
 

From bd14d6ea1517c93ceecaec29dad016d9a122fa1b Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 29 Sep 2020 17:22:16 -0700
Subject: [PATCH 06/12] [lldb] Hoist -s (trace directory) argument out of
 LLDB_TEST_COMMON_ARGS (NFC)

Give the trace directory argument its own variable
(LLDB_TEST_TRACE_DIRECTORY) so that we can configure it in
lit.site.cfg.py if we so desire.
---
 lldb/test/API/CMakeLists.txt          |  7 +++++--
 lldb/test/API/lit.cfg.py              |  3 +++
 lldb/test/API/lit.site.cfg.py.in      |  1 +
 lldb/utils/lldb-dotest/CMakeLists.txt |  5 +++++
 lldb/utils/lldb-dotest/lldb-dotest.in | 10 ++++++----
 5 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/lldb/test/API/CMakeLists.txt b/lldb/test/API/CMakeLists.txt
index fe92012e37678b..f4802e2f5ca2cb 100644
--- a/lldb/test/API/CMakeLists.txt
+++ b/lldb/test/API/CMakeLists.txt
@@ -36,13 +36,14 @@ set(LLDB_TEST_USER_ARGS
 # hash of filename and .text section, there *will* be conflicts inside
 # the build directory.
 set(LLDB_TEST_COMMON_ARGS
-  -s
-  ${CMAKE_BINARY_DIR}/lldb-test-traces
   -S nm
   -u CXXFLAGS
   -u CFLAGS
   )
 
+# Configure the traces directory.
+set(LLDB_TEST_TRACE_DIRECTORY "${PROJECT_BINARY_DIR}/lldb-test-traces" CACHE PATH "The test traces directory.")
+
 # Set the path to the default lldb test executable.
 set(LLDB_DEFAULT_TEST_EXECUTABLE "${LLVM_RUNTIME_OUTPUT_INTDIR}/lldb${CMAKE_EXECUTABLE_SUFFIX}")
 
@@ -141,6 +142,7 @@ if(LLDB_BUILT_STANDALONE)
   string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_SOURCE_DIR "${LLDB_SOURCE_DIR}")
   string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_FRAMEWORK_DIR "${LLDB_FRAMEWORK_DIR}")
   string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_BUILD_DIRECTORY "${LLDB_TEST_BUILD_DIRECTORY}")
+  string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_TRACE_DIRECTORY "${LLDB_TEST_TRACE_DIRECTORY}")
   string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_EXECUTABLE "${LLDB_TEST_EXECUTABLE}")
   string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_COMPILER "${LLDB_TEST_COMPILER}")
   string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_DSYMUTIL "${LLDB_TEST_DSYMUTIL}")
@@ -170,6 +172,7 @@ endif()
 string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_DOTEST_ARGS "${LLDB_DOTEST_ARGS}")
 string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_SOURCE_DIR "${LLDB_SOURCE_DIR}")
 string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_BUILD_DIRECTORY "${LLDB_TEST_BUILD_DIRECTORY}")
+string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_TRACE_DIRECTORY "${LLDB_TEST_TRACE_DIRECTORY}")
 string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_EXECUTABLE "${LLDB_TEST_EXECUTABLE}")
 string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_COMPILER "${LLDB_TEST_COMPILER}")
 string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_DSYMUTIL "${LLDB_TEST_DSYMUTIL}")
diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py
index d78a1aae546752..a4d4d83fd366df 100644
--- a/lldb/test/API/lit.cfg.py
+++ b/lldb/test/API/lit.cfg.py
@@ -177,6 +177,9 @@ def delete_module_cache(path):
 if is_configured('lldb_build_directory'):
   dotest_cmd += ['--build-dir', config.lldb_build_directory]
 
+if is_configured('lldb_trace_directory'):
+  dotest_cmd += ['-s', config.lldb_trace_directory]
+
 if is_configured('lldb_module_cache'):
   delete_module_cache(config.lldb_module_cache)
   dotest_cmd += ['--lldb-module-cache-dir', config.lldb_module_cache]
diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in
index 271faf371f9d1d..0481e8fecc73a2 100644
--- a/lldb/test/API/lit.site.cfg.py.in
+++ b/lldb/test/API/lit.site.cfg.py.in
@@ -19,6 +19,7 @@ config.shared_libs = @LLVM_ENABLE_SHARED_LIBS@
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
 config.target_triple = "@TARGET_TRIPLE@"
 config.lldb_build_directory = "@LLDB_TEST_BUILD_DIRECTORY@"
+config.lldb_trace_directory = "@LLDB_TEST_TRACE_DIRECTORY@"
 config.lldb_reproducer_directory = os.path.join("@LLDB_TEST_BUILD_DIRECTORY@", "reproducers")
 config.python_executable = "@Python3_EXECUTABLE@"
 config.dotest_args_str = "@LLDB_DOTEST_ARGS@"
diff --git a/lldb/utils/lldb-dotest/CMakeLists.txt b/lldb/utils/lldb-dotest/CMakeLists.txt
index 1001fbf04ebe79..cba04f3499b957 100644
--- a/lldb/utils/lldb-dotest/CMakeLists.txt
+++ b/lldb/utils/lldb-dotest/CMakeLists.txt
@@ -23,6 +23,7 @@ if(LLDB_BUILT_STANDALONE)
     string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}")
     string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}")
     string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}")
+    string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}")
     string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}")
     string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}")
     string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}")
@@ -37,6 +38,7 @@ if(LLDB_BUILT_STANDALONE)
       string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}")
       string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}")
       string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}")
+      string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}")
       string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}")
       string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}")
       string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}")
@@ -50,6 +52,7 @@ if(LLDB_BUILT_STANDALONE)
       string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}")
       string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}")
       string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}")
+      string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}")
       string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}")
       string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}")
       string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}")
@@ -71,6 +74,7 @@ elseif(NOT "${CMAKE_CFG_INTDIR}" STREQUAL ".")
     string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}")
     string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}")
     string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}")
+    string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}")
     string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}")
     string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}")
     string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}")
@@ -89,6 +93,7 @@ else()
   set(LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}")
   set(LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}")
   set(LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}")
+  set(LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}")
   set(LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}")
   set(LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}")
   set(LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}")
diff --git a/lldb/utils/lldb-dotest/lldb-dotest.in b/lldb/utils/lldb-dotest/lldb-dotest.in
index cfd73f5b32a6e4..d66968955a7409 100755
--- a/lldb/utils/lldb-dotest/lldb-dotest.in
+++ b/lldb/utils/lldb-dotest/lldb-dotest.in
@@ -3,7 +3,6 @@ import subprocess
 import sys
 
 dotest_path = '@LLDB_SOURCE_DIR_CONFIGURED@/test/API/dotest.py'
-build_dir = '@LLDB_TEST_BUILD_DIRECTORY_CONFIGURED@'
 dotest_args_str = '@LLDB_DOTEST_ARGS_CONFIGURED@'
 arch = '@LLDB_TEST_ARCH@'
 executable = '@LLDB_TEST_EXECUTABLE_CONFIGURED@'
@@ -12,9 +11,11 @@ dsymutil = '@LLDB_TEST_DSYMUTIL_CONFIGURED@'
 filecheck = '@LLDB_TEST_FILECHECK_CONFIGURED@'
 yaml2obj = '@LLDB_TEST_YAML2OBJ_CONFIGURED@'
 server = '@LLDB_TEST_SERVER_CONFIGURED@'
-lldb_libs_dir = "@LLDB_LIBS_DIR_CONFIGURED@"
-lldb_framework_dir = "@LLDB_FRAMEWORK_DIR_CONFIGURED@"
+lldb_build_dir = '@LLDB_TEST_BUILD_DIRECTORY_CONFIGURED@'
 lldb_build_intel_pt = "@LLDB_BUILD_INTEL_PT@"
+lldb_framework_dir = "@LLDB_FRAMEWORK_DIR_CONFIGURED@"
+lldb_libs_dir = "@LLDB_LIBS_DIR_CONFIGURED@"
+lldb_trace_dir = '@LLDB_TEST_TRACE_DIRECTORY_CONFIGURED@'
 
 if __name__ == '__main__':
     wrapper_args = sys.argv[1:]
@@ -23,7 +24,8 @@ if __name__ == '__main__':
     cmd = [sys.executable, dotest_path]
     cmd.extend(['--arch', arch])
     cmd.extend(dotest_args)
-    cmd.extend(['--build-dir', build_dir])
+    cmd.extend(['-s', lldb_trace_dir])
+    cmd.extend(['--build-dir', lldb_build_dir])
     cmd.extend(['--executable', executable])
     cmd.extend(['--compiler', compiler])
     cmd.extend(['--dsymutil', dsymutil])

From 674f57870f4c8a7fd7b629bffc85b149cbefd3e0 Mon Sep 17 00:00:00 2001
From: Vedant Kumar <vsk@apple.com>
Date: Tue, 29 Sep 2020 17:37:36 -0700
Subject: [PATCH 07/12] [gardening] Replace some uses of
 setDebugLoc(DebugLoc()) with dropLocation(), NFC

---
 llvm/lib/Transforms/Scalar/LICM.cpp       | 2 +-
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 631fa2f27c5b3f..bc581e7ad40f32 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2159,7 +2159,7 @@ bool llvm::promoteLoopAccessesToScalars(
   if (SawUnorderedAtomic)
     PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
   PreheaderLoad->setAlignment(Alignment);
-  PreheaderLoad->setDebugLoc(DebugLoc());
+  PreheaderLoad->dropLocation();
   if (AATags)
     PreheaderLoad->setAAMetadata(AATags);
   SSA.AddAvailableValue(Preheader, PreheaderLoad);
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 124a7c423e72cd..1672293380d7b1 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2218,7 +2218,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
   // be misleading while debugging.
   for (auto &I : *ThenBB) {
     if (!SpeculatedStoreValue || &I != SpeculatedStore)
-      I.setDebugLoc(DebugLoc());
+      I.dropLocation();
     I.dropUnknownNonDebugMetadata();
   }
 
@@ -2878,7 +2878,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
       // When we fold the bonus instructions we want to make sure we
       // reset their debug locations in order to avoid stepping on dead
       // code caused by folding dead branches.
-      NewBonusInst->setDebugLoc(DebugLoc());
+      NewBonusInst->dropLocation();
 
       RemapInstruction(NewBonusInst, VMap,
                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
@@ -2902,7 +2902,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
 
     // Reset the condition debug location to avoid jumping on dead code
     // as the result of folding dead branches.
-    CondInPred->setDebugLoc(DebugLoc());
+    CondInPred->dropLocation();
 
     RemapInstruction(CondInPred, VMap,
                      RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);

From d04775e16bba456f0be0aaa7478959c5bfa22c41 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu@amd.com>
Date: Tue, 29 Sep 2020 08:51:26 -0400
Subject: [PATCH 08/12] Add remquo, frexp and modf overload functions to HIP
 header

---
 clang/lib/Headers/__clang_hip_math.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h
index 0c27ef60a06488..b72bb40ccdb674 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -1221,6 +1221,27 @@ __DEVICE__
 inline _Float16 pow(_Float16 __base, int __iexp) {
   return __ocml_pown_f16(__base, __iexp);
 }
+
+__DEVICE__
+inline float remquo(float __x, float __y, int *__quo) {
+  return remquof(__x, __y, __quo);
+}
+
+template <typename __T1, typename __T2>
+__DEVICE__
+    typename __hip_enable_if<std::numeric_limits<__T1>::is_specialized &&
+                                 std::numeric_limits<__T2>::is_specialized,
+                             double>::type
+    remquo(__T1 __x, __T2 __y, int *__quo) {
+  return remquo((double)__x, (double)__y, __quo);
+}
+
+__DEVICE__
+inline float frexp(float __x, int *__nptr) { return frexpf(__x, __nptr); }
+
+__DEVICE__
+inline float modf(float __x, float *__iptr) { return modff(__x, __iptr); }
+
 #endif
 
 #pragma pop_macro("__DEF_FUN1")

From c6b18cf9672bca4f61bb3ef401173742068e46ea Mon Sep 17 00:00:00 2001
From: Evandro Menezes <ebahapo@users.noreply.github.com>
Date: Tue, 29 Sep 2020 17:11:12 -0500
Subject: [PATCH 09/12] [RISCV] Use the extensions in the canonical order (NFC)

Use the ISA extensions for specific processors in the conventional canonical order.
---
 llvm/lib/Target/RISCV/RISCV.td | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 578b393dc879ae..66eda3ba360cfb 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -231,16 +231,16 @@ def : ProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit]>;
 def : ProcessorModel<"bullet-rv32", BulletModel, []>;
 def : ProcessorModel<"bullet-rv64", BulletModel, [Feature64Bit]>;
 
-def : ProcessorModel<"sifive-e31", RocketModel, [FeatureStdExtA,
-                                                 FeatureStdExtC,
-                                                 FeatureStdExtM]>;
+def : ProcessorModel<"sifive-e31", RocketModel, [FeatureStdExtM,
+                                                 FeatureStdExtA,
+                                                 FeatureStdExtC]>;
 
 def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit,
+                                                 FeatureStdExtM,
+                                                 FeatureStdExtF,
                                                  FeatureStdExtA,
-                                                 FeatureStdExtC,
                                                  FeatureStdExtD,
-                                                 FeatureStdExtF,
-                                                 FeatureStdExtM]>;
+                                                 FeatureStdExtC]>;
 
 //===----------------------------------------------------------------------===//
 // Define the RISC-V target.

From 0a146a9d0bdd54411f0b0712e27481a4c280ae03 Mon Sep 17 00:00:00 2001
From: Hubert Tong <hubert.reinterpretcast@gmail.com>
Date: Tue, 29 Sep 2020 21:11:16 -0400
Subject: [PATCH 10/12] [AIX] asm output: use character literals in byte lists
 for strings

This patch improves the assembly output produced for string literals by
using character literals in byte lists. This provides the benefits of
having printable characters appear as such in the assembly output and of
having strings kept as logical units on the same line.

Reviewed By: daltenty

Differential Revision: https://reviews.llvm.org/D80953
---
 llvm/include/llvm/MC/MCAsmInfo.h              | 23 +++++
 llvm/lib/MC/MCAsmInfoXCOFF.cpp                |  2 +
 llvm/lib/MC/MCAsmStreamer.cpp                 | 94 ++++++++++++++-----
 llvm/test/CodeGen/PowerPC/aix-bytestring.ll   |  7 ++
 llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll   |  5 +-
 .../PowerPC/aix-xcoff-mergeable-str.ll        | 25 +----
 llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll |  5 +-
 7 files changed, 108 insertions(+), 53 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-bytestring.ll

diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h
index 0f9d503045d88a..2b889d0ed5fa9d 100644
--- a/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/llvm/include/llvm/MC/MCAsmInfo.h
@@ -54,6 +54,15 @@ enum LCOMMType { NoAlignment, ByteAlignment, Log2Alignment };
 /// This class is intended to be used as a base class for asm
 /// properties and features specific to the target.
 class MCAsmInfo {
+public:
+  /// Assembly character literal syntax types.
+  enum AsmCharLiteralSyntax {
+    ACLS_Unknown, /// Unknown; character literals not used by LLVM for this
+                  /// target.
+    ACLS_SingleQuotePrefix, /// The desired character is prefixed by a single
+                            /// quote, e.g., `'A`.
+  };
+
 protected:
   //===------------------------------------------------------------------===//
   // Properties to be set by the target writer, used to configure asm printer.
@@ -200,6 +209,16 @@ class MCAsmInfo {
   /// doesn't support this, it can be set to null.  Defaults to "\t.asciz\t"
   const char *AscizDirective;
 
+  /// This directive accepts a comma-separated list of bytes for emission as a
+  /// string of bytes.  For targets that do not support this, it shall be set to
+  /// null.  Defaults to null.
+  const char *ByteListDirective = nullptr;
+
+  /// Form used for character literals in the assembly syntax.  Useful for
+  /// producing strings as byte lists.  If a target does not use or support
+  /// this, it shall be set to ACLS_Unknown.  Defaults to ACLS_Unknown.
+  AsmCharLiteralSyntax CharacterLiteralSyntax = ACLS_Unknown;
+
   /// These directives are used to output some unit of integer data to the
   /// current section.  If a data directive is set to null, smaller data
   /// directives will be used to emit the large sizes.  Defaults to "\t.byte\t",
@@ -562,6 +581,10 @@ class MCAsmInfo {
   }
   const char *getAsciiDirective() const { return AsciiDirective; }
   const char *getAscizDirective() const { return AscizDirective; }
+  const char *getByteListDirective() const { return ByteListDirective; }
+  AsmCharLiteralSyntax characterLiteralSyntax() const {
+    return CharacterLiteralSyntax;
+  }
   bool getAlignmentIsInBytes() const { return AlignmentIsInBytes; }
   unsigned getTextAlignFillValue() const { return TextAlignFillValue; }
   const char *getGlobalDirective() const { return GlobalDirective; }
diff --git a/llvm/lib/MC/MCAsmInfoXCOFF.cpp b/llvm/lib/MC/MCAsmInfoXCOFF.cpp
index b5c5bb3ace8edf..04982af4af31be 100644
--- a/llvm/lib/MC/MCAsmInfoXCOFF.cpp
+++ b/llvm/lib/MC/MCAsmInfoXCOFF.cpp
@@ -24,6 +24,8 @@ MCAsmInfoXCOFF::MCAsmInfoXCOFF() {
   ZeroDirectiveSupportsNonZeroValue = false;
   AsciiDirective = nullptr; // not supported
   AscizDirective = nullptr; // not supported
+  ByteListDirective = "\t.byte\t";
+  CharacterLiteralSyntax = ACLS_SingleQuotePrefix;
 
   // Use .vbyte for data definition to avoid directives that apply an implicit
   // alignment.
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 647197d8de4d11..8d96935b220599 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -971,6 +971,47 @@ void MCAsmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
 
 static inline char toOctal(int X) { return (X&7)+'0'; }
 
+static void PrintByteList(StringRef Data, raw_ostream &OS,
+                          MCAsmInfo::AsmCharLiteralSyntax ACLS) {
+  assert(!Data.empty() && "Cannot generate an empty list.");
+  const auto printCharacterInOctal = [&OS](unsigned char C) {
+    OS << '0';
+    OS << toOctal(C >> 6);
+    OS << toOctal(C >> 3);
+    OS << toOctal(C >> 0);
+  };
+  const auto printOneCharacterFor = [printCharacterInOctal](
+                                        auto printOnePrintingCharacter) {
+    return [printCharacterInOctal, printOnePrintingCharacter](unsigned char C) {
+      if (isPrint(C)) {
+        printOnePrintingCharacter(static_cast<char>(C));
+        return;
+      }
+      printCharacterInOctal(C);
+    };
+  };
+  const auto printCharacterList = [Data, &OS](const auto &printOneCharacter) {
+    const auto BeginPtr = Data.begin(), EndPtr = Data.end();
+    for (const unsigned char C : make_range(BeginPtr, EndPtr - 1)) {
+      printOneCharacter(C);
+      OS << ',';
+    }
+    printOneCharacter(*(EndPtr - 1));
+  };
+  switch (ACLS) {
+  case MCAsmInfo::ACLS_Unknown:
+    printCharacterList(printCharacterInOctal);
+    return;
+  case MCAsmInfo::ACLS_SingleQuotePrefix:
+    printCharacterList(printOneCharacterFor([&OS](char C) {
+      const char AsmCharLitBuf[2] = {'\'', C};
+      OS << StringRef(AsmCharLitBuf, sizeof(AsmCharLitBuf));
+    }));
+    return;
+  }
+  llvm_unreachable("Invalid AsmCharLiteralSyntax value!");
+}
+
 static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
   OS << '"';
 
@@ -1009,33 +1050,42 @@ void MCAsmStreamer::emitBytes(StringRef Data) {
          "Cannot emit contents before setting section!");
   if (Data.empty()) return;
 
-  // If only single byte is provided or no ascii or asciz directives is
-  // supported, emit as vector of 8bits data.
-  if (Data.size() == 1 ||
-      !(MAI->getAscizDirective() || MAI->getAsciiDirective())) {
-    if (MCTargetStreamer *TS = getTargetStreamer()) {
-      TS->emitRawBytes(Data);
+  const auto emitAsString = [this](StringRef Data) {
+    // If the data ends with 0 and the target supports .asciz, use it, otherwise
+    // use .ascii or a byte-list directive
+    if (MAI->getAscizDirective() && Data.back() == 0) {
+      OS << MAI->getAscizDirective();
+      Data = Data.substr(0, Data.size() - 1);
+    } else if (LLVM_LIKELY(MAI->getAsciiDirective())) {
+      OS << MAI->getAsciiDirective();
+    } else if (MAI->getByteListDirective()) {
+      OS << MAI->getByteListDirective();
+      PrintByteList(Data, OS, MAI->characterLiteralSyntax());
+      EmitEOL();
+      return true;
     } else {
-      const char *Directive = MAI->getData8bitsDirective();
-      for (const unsigned char C : Data.bytes()) {
-        OS << Directive << (unsigned)C;
-        EmitEOL();
-      }
+      return false;
     }
+
+    PrintQuotedString(Data, OS);
+    EmitEOL();
+    return true;
+  };
+
+  if (Data.size() != 1 && emitAsString(Data))
     return;
-  }
 
-  // If the data ends with 0 and the target supports .asciz, use it, otherwise
-  // use .ascii
-  if (MAI->getAscizDirective() && Data.back() == 0) {
-    OS << MAI->getAscizDirective();
-    Data = Data.substr(0, Data.size()-1);
-  } else {
-    OS << MAI->getAsciiDirective();
+  // Only single byte is provided or no ascii, asciz, or byte-list directives
+  // are applicable. Emit as vector of individual 8bits data elements.
+  if (MCTargetStreamer *TS = getTargetStreamer()) {
+    TS->emitRawBytes(Data);
+    return;
+  }
+  const char *Directive = MAI->getData8bitsDirective();
+  for (const unsigned char C : Data.bytes()) {
+    OS << Directive << (unsigned)C;
+    EmitEOL();
   }
-
-  PrintQuotedString(Data, OS);
-  EmitEOL();
 }
 
 void MCAsmStreamer::emitBinaryData(StringRef Data) {
diff --git a/llvm/test/CodeGen/PowerPC/aix-bytestring.ll b/llvm/test/CodeGen/PowerPC/aix-bytestring.ll
new file mode 100644
index 00000000000000..443c019c9e30cf
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-bytestring.ll
@@ -0,0 +1,7 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s
+
+@str = constant [256 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10\11\12\13\14\15\16\17\18\19\1A\1B\1C\1D\1E\1F !\22#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\7F\80\81\82\83\84\85\86\87\88\89\8A\8B\8C\8D\8E\8F\90\91\92\93\94\95\96\97\98\99\9A\9B\9C\9D\9E\9F\A0\A1\A2\A3\A4\A5\A6\A7\A8\A9\AA\AB\AC\AD\AE\AF\B0\B1\B2\B3\B4\B5\B6\B7\B8\B9\BA\BB\BC\BD\BE\BF\C0\C1\C2\C3\C4\C5\C6\C7\C8\C9\CA\CB\CC\CD\CE\CF\D0\D1\D2\D3\D4\D5\D6\D7\D8\D9\DA\DB\DC\DD\DE\DF\E0\E1\E2\E3\E4\E5\E6\E7\E8\E9\EA\EB\EC\ED\EE\EF\F0\F1\F2\F3\F4\F5\F6\F7\F8\F9\FA\FB\FC\FD\FE\FF\00", align 1
+
+; CHECK-LABEL:str:
+;  CHECK-NEXT:        .byte   0001,0002,0003,0004,0005,0006,0007,0010,0011,0012,0013,0014,0015,0016,0017,0020,0021,0022,0023,0024,0025,0026,0027,0030,0031,0032,0033,0034,0035,0036,0037,' ,'!,'",'#,'$,'%,'&,'','(,'),'*,'+,',,'-,'.,'/,'0,'1,'2,'3,'4,'5,'6,'7,'8,'9,':,';,'<,'=,'>,'?,'@,'A,'B,'C,'D,'E,'F,'G,'H,'I,'J,'K,'L,'M,'N,'O,'P,'Q,'R,'S,'T,'U,'V,'W,'X,'Y,'Z,'[,'\,'],'^,'_,'`,'a,'b,'c,'d,'e,'f,'g,'h,'i,'j,'k,'l,'m,'n,'o,'p,'q,'r,'s,'t,'u,'v,'w,'x,'y,'z,'{,'|,'},'~,0177,0200,0201,0202,0203,0204,0205,0206,0207,0210,0211,0212,0213,0214,0215,0216,0217,0220,0221,0222,0223,0224,0225,0226,0227,0230,0231,0232,0233,0234,0235,0236,0237,0240,0241,0242,0243,0244,0245,0246,0247,0250,0251,0252,0253,0254,0255,0256,0257,0260,0261,0262,0263,0264,0265,0266,0267,0270,0271,0272,0273,0274,0275,0276,0277,0300,0301,0302,0303,0304,0305,0306,0307,0310,0311,0312,0313,0314,0315,0316,0317,0320,0321,0322,0323,0324,0325,0326,0327,0330,0331,0332,0333,0334,0335,0336,0337,0340,0341,0342,0343,0344,0345,0346,0347,0350,0351,0352,0353,0354,0355,0356,0357,0360,0361,0362,0363,0364,0365,0366,0367,0370,0371,0372,0373,0374,0375,0376,0377,0000
diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll
index 4083bd58fe98b0..88c8b08bdb59f1 100644
--- a/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll
@@ -86,10 +86,7 @@
 
 ; CHECK:      .globl  chrarray
 ; CHECK-NEXT: chrarray:
-; CHECK-NEXT: .byte   97
-; CHECK-NEXT: .byte   98
-; CHECK-NEXT: .byte   99
-; CHECK-NEXT: .byte   100
+; CHECK-NEXT: .byte   'a,'b,'c,'d
 
 ; CHECK:      .globl  dblarr
 ; CHECK-NEXT: .align  3
diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll
index 42ead4b9b4de7b..0d29857fd1556e 100644
--- a/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll
@@ -41,30 +41,9 @@ entry:
 ; CHECK-NEXT:   .vbyte	4, 0                       # 0x0
 ; CHECK-NEXT:   .csect .rodata.str1.1[RO],2
 ; CHECK-NEXT: L..strA:
-; CHECK-NEXT: .byte   104
-; CHECK-NEXT: .byte   101
-; CHECK-NEXT: .byte   108
-; CHECK-NEXT: .byte   108
-; CHECK-NEXT: .byte   111
-; CHECK-NEXT: .byte   32
-; CHECK-NEXT: .byte   119
-; CHECK-NEXT: .byte   111
-; CHECK-NEXT: .byte   114
-; CHECK-NEXT: .byte   108
-; CHECK-NEXT: .byte   100
-; CHECK-NEXT: .byte   33
-; CHECK-NEXT: .byte   10
-; CHECK-NEXT: .byte   0
+; CHECK-NEXT: .byte   'h,'e,'l,'l,'o,' ,'w,'o,'r,'l,'d,'!,0012,0000
 ; CHECK-NEXT: L...str:
-; CHECK-NEXT: .byte   97
-; CHECK-NEXT: .byte   98
-; CHECK-NEXT: .byte   99
-; CHECK-NEXT: .byte   100
-; CHECK-NEXT: .byte   101
-; CHECK-NEXT: .byte   102
-; CHECK-NEXT: .byte   103
-; CHECK-NEXT: .byte   104
-; CHECK-NEXT: .byte   0
+; CHECK-NEXT: .byte   'a,'b,'c,'d,'e,'f,'g,'h,0000
 
 ; CHECKOBJ:     00000010 <.rodata.str2.2>:
 ; CHECKOBJ-NEXT:       10: 01 08 01 10
diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll
index dddbe2ba089e8d..a7bb018966429f 100644
--- a/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll
@@ -53,10 +53,7 @@
 ; CHECK64-NEXT:        .vbyte	8, 0x408c200000000000
 ; CHECK-NEXT:          .globl  const_chrarray
 ; CHECK-NEXT:  const_chrarray:
-; CHECK-NEXT:          .byte   97
-; CHECK-NEXT:          .byte   98
-; CHECK-NEXT:          .byte   99
-; CHECK-NEXT:          .byte   100
+; CHECK-NEXT:          .byte   'a,'b,'c,'d
 ; CHECK-NEXT:          .globl  const_dblarr
 ; CHECK-NEXT:          .align  3
 ; CHECK-NEXT:  const_dblarr:

From 618a890b72f874cbc41168737d03f724f58805fc Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 29 Sep 2020 10:51:49 -0700
Subject: [PATCH 11/12] [X86] Increase the depth threshold required to form
 VPERMI2W/VPERMI2B in shuffle combining

These instructions are implemented with two port 5 uops and one port 015 uop so they are more complicated that most shuffles.

This patch increases the depth threshold for when we form them during shuffle combining to try to limit increasing the number of uops especially on port 5.

Differential Revision: https://reviews.llvm.org/D88503
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  19 +--
 .../CodeGen/X86/min-legal-vector-width.ll     |  36 +++---
 .../CodeGen/X86/vector-shuffle-128-v16.ll     |  45 ++-----
 .../test/CodeGen/X86/vector-shuffle-128-v8.ll | 112 ++++--------------
 .../CodeGen/X86/vector-shuffle-256-v32.ll     |  28 +----
 llvm/test/CodeGen/X86/vector-zext.ll          |  19 +--
 6 files changed, 71 insertions(+), 188 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2a7f028d378966..4b3adc7dcfbc9b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35351,6 +35351,9 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
   // Depth threshold above which we can efficiently use variable mask shuffles.
   int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 1 : 2;
   AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask;
+  // VPERMI2W/VPERMI2B are 3 uops on Skylake and Icelake so we require a
+  // higher depth before combining them.
+  bool AllowBWIVPERMV3 = (Depth >= 2 || HasVariableMask);
 
   bool MaskContainsZeros = isAnyZero(Mask);
 
@@ -35387,9 +35390,9 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
            MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 ||
            MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32 ||
            MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
-         (Subtarget.hasBWI() &&
+         (Subtarget.hasBWI() && AllowBWIVPERMV3 &&
           (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) ||
-         (Subtarget.hasVBMI() &&
+         (Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
           (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) {
       // Adjust shuffle mask - replace SM_SentinelZero with second source index.
       for (unsigned i = 0; i != NumMaskElts; ++i)
@@ -35416,9 +35419,9 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
            MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 ||
            MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32 ||
            MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
-         (Subtarget.hasBWI() &&
+         (Subtarget.hasBWI() && AllowBWIVPERMV3 &&
           (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) ||
-         (Subtarget.hasVBMI() &&
+         (Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
           (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) {
       V1 = DAG.getBitcast(MaskVT, V1);
       V2 = DAG.getBitcast(MaskVT, V2);
@@ -35588,10 +35591,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
          MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 || MaskVT == MVT::v8f32 ||
          MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 ||
          MaskVT == MVT::v16i32)) ||
-       (Subtarget.hasBWI() && (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 ||
-                               MaskVT == MVT::v32i16)) ||
-       (Subtarget.hasVBMI() && (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 ||
-                                MaskVT == MVT::v64i8)))) {
+       (Subtarget.hasBWI() && AllowBWIVPERMV3 &&
+        (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) ||
+       (Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
+        (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) {
     V1 = DAG.getBitcast(MaskVT, V1);
     V2 = DAG.getBitcast(MaskVT, V2);
     Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG);
diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
index e5240d5e246a6c..a39fbf878fd9f7 100644
--- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll
+++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
@@ -857,10 +857,10 @@ define <8 x i16> @trunc_v8i64_v8i16(<8 x i64>* %x) nounwind "min-legal-vector-wi
 define <8 x i32> @trunc_v8i64_v8i32_zeroes(<8 x i64>* %x) nounwind "min-legal-vector-width"="256" {
 ; CHECK-LABEL: trunc_v8i64_v8i32_zeroes:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpsrlq $48, 32(%rdi), %ymm1
-; CHECK-NEXT:    vpsrlq $48, (%rdi), %ymm2
-; CHECK-NEXT:    vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30]
-; CHECK-NEXT:    vpermi2w %ymm1, %ymm2, %ymm0
+; CHECK-NEXT:    vpsrlq $48, 32(%rdi), %ymm0
+; CHECK-NEXT:    vpsrlq $48, (%rdi), %ymm1
+; CHECK-NEXT:    vpackusdw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
 ; CHECK-NEXT:    retq
   %a = load <8 x i64>, <8 x i64>* %x
   %b = lshr <8 x i64> %a, <i64 48, i64 48, i64 48, i64 48, i64 48, i64 48, i64 48, i64 48>
@@ -920,9 +920,10 @@ define <8 x i32> @trunc_v8i64_v8i32_sign(<8 x i64>* %x) nounwind "min-legal-vect
 define <16 x i16> @trunc_v16i32_v16i16_sign(<16 x i32>* %x) nounwind "min-legal-vector-width"="256" {
 ; CHECK-LABEL: trunc_v16i32_v16i16_sign:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmovdqa (%rdi), %ymm1
-; CHECK-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]
-; CHECK-NEXT:    vpermi2w 32(%rdi), %ymm1, %ymm0
+; CHECK-NEXT:    vpsrad $16, 32(%rdi), %ymm0
+; CHECK-NEXT:    vpsrad $16, (%rdi), %ymm1
+; CHECK-NEXT:    vpackssdw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
 ; CHECK-NEXT:    retq
   %a = load <16 x i32>, <16 x i32>* %x
   %b = ashr <16 x i32> %a, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -931,20 +932,13 @@ define <16 x i16> @trunc_v16i32_v16i16_sign(<16 x i32>* %x) nounwind "min-legal-
 }
 
 define <32 x i8> @trunc_v32i16_v32i8_sign(<32 x i16>* %x) nounwind "min-legal-vector-width"="256" {
-; CHECK-AVX512-LABEL: trunc_v32i16_v32i8_sign:
-; CHECK-AVX512:       # %bb.0:
-; CHECK-AVX512-NEXT:    vpsraw $8, 32(%rdi), %ymm0
-; CHECK-AVX512-NEXT:    vpsraw $8, (%rdi), %ymm1
-; CHECK-AVX512-NEXT:    vpacksswb %ymm0, %ymm1, %ymm0
-; CHECK-AVX512-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
-; CHECK-AVX512-NEXT:    retq
-;
-; CHECK-VBMI-LABEL: trunc_v32i16_v32i8_sign:
-; CHECK-VBMI:       # %bb.0:
-; CHECK-VBMI-NEXT:    vmovdqa (%rdi), %ymm1
-; CHECK-VBMI-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63]
-; CHECK-VBMI-NEXT:    vpermi2b 32(%rdi), %ymm1, %ymm0
-; CHECK-VBMI-NEXT:    retq
+; CHECK-LABEL: trunc_v32i16_v32i8_sign:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpsraw $8, 32(%rdi), %ymm0
+; CHECK-NEXT:    vpsraw $8, (%rdi), %ymm1
+; CHECK-NEXT:    vpacksswb %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; CHECK-NEXT:    retq
   %a = load <32 x i16>, <32 x i16>* %x
   %b = ashr <32 x i16> %a, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
   %c = trunc <32 x i16> %b to <32 x i8>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
index fb300a88b4120b..ee3cf43e8f2f7f 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -304,24 +304,11 @@ define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(
 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX2-NEXT:    retq
-;
-; AVX512VLBW-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
-; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpbroadcastb %xmm1, %xmm1
-; AVX512VLBW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX512VLBW-NEXT:    retq
-;
-; AVX512VLVBMI-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
-; AVX512VLVBMI:       # %bb.0:
-; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,16,0,17,0,18,0,19,0,20,0,21,0,22,0,23]
-; AVX512VLVBMI-NEXT:    vpermi2b %xmm0, %xmm1, %xmm2
-; AVX512VLVBMI-NEXT:    vmovdqa %xmm2, %xmm0
-; AVX512VLVBMI-NEXT:    retq
+; AVX2OR512VL-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
+; AVX2OR512VL:       # %bb.0:
+; AVX2OR512VL-NEXT:    vpbroadcastb %xmm1, %xmm1
+; AVX2OR512VL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX2OR512VL-NEXT:    retq
 ;
 ; XOPAVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
 ; XOPAVX1:       # %bb.0:
@@ -1335,23 +1322,11 @@ define <16 x i8> @shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23(
 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
 ; SSE-NEXT:    retq
 ;
-; AVX1OR2-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23:
-; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX1OR2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; AVX1OR2-NEXT:    retq
-;
-; AVX512VLBW-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23:
-; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX512VLBW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; AVX512VLBW-NEXT:    retq
-;
-; AVX512VLVBMI-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23:
-; AVX512VLVBMI:       # %bb.0:
-; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,16,1,17,4,20,5,21,2,18,3,19,6,22,7,23]
-; AVX512VLVBMI-NEXT:    vpermt2b %xmm1, %xmm2, %xmm0
-; AVX512VLVBMI-NEXT:    retq
+; AVX-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %val1, <16 x i8> %val2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23>
   ret <16 x i8> %shuffle
 }
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
index c72d736960f96c..f7baebf7c4e4ff 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -1017,23 +1017,11 @@ define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; SSE-NEXT:    retq
 ;
-; AVX1OR2-LABEL: shuffle_v8i16_0c1d2e3f:
-; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; AVX1OR2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1OR2-NEXT:    retq
-;
-; AVX512VL-SLOW-LABEL: shuffle_v8i16_0c1d2e3f:
-; AVX512VL-SLOW:       # %bb.0:
-; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; AVX512VL-SLOW-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX512VL-SLOW-NEXT:    retq
-;
-; AVX512VL-FAST-LABEL: shuffle_v8i16_0c1d2e3f:
-; AVX512VL-FAST:       # %bb.0:
-; AVX512VL-FAST-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,12,1,13,2,14,3,15]
-; AVX512VL-FAST-NEXT:    vpermt2w %xmm1, %xmm2, %xmm0
-; AVX512VL-FAST-NEXT:    retq
+; AVX-LABEL: shuffle_v8i16_0c1d2e3f:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
   ret <8 x i16> %shuffle
 }
@@ -1059,23 +1047,11 @@ define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; SSE-NEXT:    retq
 ;
-; AVX1OR2-LABEL: shuffle_v8i16_48596a7b:
-; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; AVX1OR2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1OR2-NEXT:    retq
-;
-; AVX512VL-SLOW-LABEL: shuffle_v8i16_48596a7b:
-; AVX512VL-SLOW:       # %bb.0:
-; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; AVX512VL-SLOW-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX512VL-SLOW-NEXT:    retq
-;
-; AVX512VL-FAST-LABEL: shuffle_v8i16_48596a7b:
-; AVX512VL-FAST:       # %bb.0:
-; AVX512VL-FAST-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,8,5,9,6,10,7,11]
-; AVX512VL-FAST-NEXT:    vpermt2w %xmm1, %xmm2, %xmm0
-; AVX512VL-FAST-NEXT:    retq
+; AVX-LABEL: shuffle_v8i16_48596a7b:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
   ret <8 x i16> %shuffle
 }
@@ -1424,23 +1400,11 @@ define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
 ; SSE41-NEXT:    retq
 ;
-; AVX1OR2-LABEL: shuffle_v8i16_012dXXXX:
-; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
-; AVX1OR2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
-; AVX1OR2-NEXT:    retq
-;
-; AVX512VL-SLOW-LABEL: shuffle_v8i16_012dXXXX:
-; AVX512VL-SLOW:       # %bb.0:
-; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
-; AVX512VL-SLOW-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
-; AVX512VL-SLOW-NEXT:    retq
-;
-; AVX512VL-FAST-LABEL: shuffle_v8i16_012dXXXX:
-; AVX512VL-FAST:       # %bb.0:
-; AVX512VL-FAST-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,13,4,5,6,7]
-; AVX512VL-FAST-NEXT:    vpermt2w %xmm1, %xmm2, %xmm0
-; AVX512VL-FAST-NEXT:    retq
+; AVX-LABEL: shuffle_v8i16_012dXXXX:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <8 x i16> %shuffle
 }
@@ -1475,24 +1439,11 @@ define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: shuffle_v8i16_XXXXcde3:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
-; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
-; AVX2-NEXT:    retq
-;
-; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXXXcde3:
-; AVX512VL-SLOW:       # %bb.0:
-; AVX512VL-SLOW-NEXT:    vpbroadcastq %xmm0, %xmm0
-; AVX512VL-SLOW-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
-; AVX512VL-SLOW-NEXT:    retq
-;
-; AVX512VL-FAST-LABEL: shuffle_v8i16_XXXXcde3:
-; AVX512VL-FAST:       # %bb.0:
-; AVX512VL-FAST-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,11]
-; AVX512VL-FAST-NEXT:    vpermi2w %xmm0, %xmm1, %xmm2
-; AVX512VL-FAST-NEXT:    vmovdqa %xmm2, %xmm0
-; AVX512VL-FAST-NEXT:    retq
+; AVX2OR512VL-LABEL: shuffle_v8i16_XXXXcde3:
+; AVX2OR512VL:       # %bb.0:
+; AVX2OR512VL-NEXT:    vpbroadcastq %xmm0, %xmm0
+; AVX2OR512VL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
+; AVX2OR512VL-NEXT:    retq
 ;
 ; XOPAVX1-LABEL: shuffle_v8i16_XXXXcde3:
 ; XOPAVX1:       # %bb.0:
@@ -1533,24 +1484,11 @@ define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
 ; SSE41-NEXT:    retq
 ;
-; AVX1OR2-LABEL: shuffle_v8i16_cde3XXXX:
-; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; AVX1OR2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
-; AVX1OR2-NEXT:    retq
-;
-; AVX512VL-SLOW-LABEL: shuffle_v8i16_cde3XXXX:
-; AVX512VL-SLOW:       # %bb.0:
-; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; AVX512VL-SLOW-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
-; AVX512VL-SLOW-NEXT:    retq
-;
-; AVX512VL-FAST-LABEL: shuffle_v8i16_cde3XXXX:
-; AVX512VL-FAST:       # %bb.0:
-; AVX512VL-FAST-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,6,11,4,5,6,7]
-; AVX512VL-FAST-NEXT:    vpermi2w %xmm0, %xmm1, %xmm2
-; AVX512VL-FAST-NEXT:    vmovdqa %xmm2, %xmm0
-; AVX512VL-FAST-NEXT:    retq
+; AVX-LABEL: shuffle_v8i16_cde3XXXX:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <8 x i16> %shuffle
 }
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
index 23bf91de6e7e8a..e3eed625dab3b9 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -4804,29 +4804,11 @@ define <4 x i64> @PR28136(<32 x i8> %a0, <32 x i8> %a1) {
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: PR28136:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
-; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
-; AVX2-NEXT:    retq
-;
-; AVX512VLBW-LABEL: PR28136:
-; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
-; AVX512VLBW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
-; AVX512VLBW-NEXT:    retq
-;
-; AVX512VLVBMI-SLOW-LABEL: PR28136:
-; AVX512VLVBMI-SLOW:       # %bb.0:
-; AVX512VLVBMI-SLOW-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
-; AVX512VLVBMI-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
-; AVX512VLVBMI-SLOW-NEXT:    retq
-;
-; AVX512VLVBMI-FAST-LABEL: PR28136:
-; AVX512VLVBMI-FAST:       # %bb.0:
-; AVX512VLVBMI-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,32,1,33,2,34,3,35,16,48,17,49,18,50,19,51,4,36,5,37,6,38,7,39,20,52,21,53,22,54,23,55]
-; AVX512VLVBMI-FAST-NEXT:    vpermt2b %ymm1, %ymm2, %ymm0
-; AVX512VLVBMI-FAST-NEXT:    retq
+; AVX2OR512VL-LABEL: PR28136:
+; AVX2OR512VL:       # %bb.0:
+; AVX2OR512VL-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX2OR512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2OR512VL-NEXT:    retq
 ;
 ; XOPAVX1-LABEL: PR28136:
 ; XOPAVX1:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index 2ad16f2e04c5b1..0132e901e6b3a3 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -1902,20 +1902,11 @@ define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable
 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: shuf_zext_8i16_to_4i64_offset2:
-; AVX512F:       # %bb.0: # %entry
-; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
-; AVX512F-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX512F-NEXT:    retq
-;
-; AVX512BW-LABEL: shuf_zext_8i16_to_4i64_offset2:
-; AVX512BW:       # %bb.0: # %entry
-; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,33,34,35,3,37,38,39,4,41,42,43,5,45,46,47]
-; AVX512BW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpermt2w %zmm2, %zmm1, %zmm0
-; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512BW-NEXT:    retq
+; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
+; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX512-NEXT:    retq
 entry:
   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
   %Z = bitcast <16 x i16> %B to <4 x i64>

From 1d54e75cf26a4c60b66659d5d9c62f4bb9452b03 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara@apple.com>
Date: Tue, 29 Sep 2020 14:39:54 -0700
Subject: [PATCH 12/12] [GlobalISel] Fix multiply with overflow intrinsics
 legalization generating invalid MIR.

During lowering of G_UMULO and friends, the previous code moved the builder's
insertion point to be after the legalizing instruction. When that happened, if
there happened to be a "G_CONSTANT i32 0" immediately after, the CSEMIRBuilder
would try to find that constant during the buildConstant(zero) call, and since
it dominates itself would return the iterator unchanged, even though the def
of the constant was *after* the current insertion point. This resulted in the
compare being generated *before* the constant which it was using.

There's no need to modify the insertion point before building the mul-hi or
constant. Delaying moving the insert point ensures those are built/CSEd before
the G_ICMP is built.

Fixes PR47679

Differential Revision: https://reviews.llvm.org/D88514
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  5 +-
 .../AArch64/GlobalISel/legalize-mul.mir       | 68 ++++++++++++++++++-
 .../CodeGen/Mips/GlobalISel/legalizer/mul.mir |  2 +-
 .../CodeGen/Mips/GlobalISel/llvm-ir/mul.ll    | 12 ++--
 4 files changed, 76 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e8bc4067c127e4..45ac2b7b671193 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2892,11 +2892,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     MI.RemoveOperand(1);
     Observer.changedInstr(MI);
 
-    MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
-
     auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
     auto Zero = MIRBuilder.buildConstant(Ty, 0);
 
+    // Move insert point forward so we can use the Res register if needed.
+    MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+
     // For *signed* multiply, overflow is detected by checking:
     // (hi != (lo >> bitwidth-1))
     if (Opcode == TargetOpcode::G_SMULH) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
index 84c839f7b341bc..20af216aaeb5ed 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
@@ -28,8 +28,8 @@ body:             |
     ; CHECK-LABEL: name: test_smul_overflow
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
-    ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
     ; CHECK: [[SMULH:%[0-9]+]]:_(s64) = G_SMULH [[COPY]], [[COPY1]]
+    ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
     ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MUL]], [[C]](s64)
     ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SMULH]](s64), [[ASHR]]
@@ -51,9 +51,9 @@ body:             |
     ; CHECK-LABEL: name: test_umul_overflow
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
-    ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
     ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[COPY]], [[COPY1]]
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
     ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]]
     ; CHECK: $x0 = COPY [[MUL]](s64)
     ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
@@ -91,3 +91,67 @@ body:             |
     $q0 = COPY %2(<2 x s64>)
     RET_ReallyLR implicit $q0
 ...
+---
+name:            test_umulo_overflow_no_invalid_mir
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+  - { reg: '$x2' }
+frameInfo:
+  maxAlignment:    16
+stack:
+  - { id: 0, size: 8, alignment: 8 }
+  - { id: 1, size: 8, alignment: 8 }
+  - { id: 2, size: 16, alignment: 16 }
+  - { id: 3, size: 16, alignment: 8 }
+machineFunctionInfo: {}
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; Check that the overflow result doesn't generate incorrect MIR by using a G_CONSTANT 0
+    ; before it's been defined.
+    ; CHECK-LABEL: name: test_umulo_overflow_no_invalid_mir
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+    ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+    ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3
+    ; CHECK: G_STORE [[COPY2]](s64), [[FRAME_INDEX]](p0) :: (store 8)
+    ; CHECK: G_STORE [[COPY1]](s64), [[FRAME_INDEX1]](p0) :: (store 8)
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load 8)
+    ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[LOAD]], [[LOAD1]]
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[LOAD]], [[LOAD1]]
+    ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]]
+    ; CHECK: G_STORE [[C]](s64), [[FRAME_INDEX2]](p0) :: (store 8, align 1)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]]
+    ; CHECK: $x0 = COPY [[MUL]](s64)
+    ; CHECK: $x1 = COPY [[AND]](s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+    %0:_(p0) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %25:_(s32) = G_CONSTANT i32 0
+    %3:_(p0) = G_FRAME_INDEX %stack.0
+    %4:_(p0) = G_FRAME_INDEX %stack.1
+    %6:_(p0) = G_FRAME_INDEX %stack.3
+    G_STORE %2(s64), %3(p0) :: (store 8)
+    G_STORE %1(s64), %4(p0) :: (store 8)
+    %7:_(s64) = G_LOAD %3(p0) :: (dereferenceable load 8)
+    %8:_(s64) = G_LOAD %4(p0) :: (dereferenceable load 8)
+    %9:_(s64), %10:_(s1) = G_UMULO %7, %8
+    %31:_(s64) = G_CONSTANT i64 0
+    G_STORE %31(s64), %6(p0) :: (store 8, align 1)
+    %16:_(s64) = G_ZEXT %10(s1)
+    $x0 = COPY %9(s64)
+    $x1 = COPY %16(s64)
+    RET_ReallyLR implicit $x0
+
+...
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
index c92a55d0af3225..b146aa5ff13d56 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
+++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
@@ -439,9 +439,9 @@ body:             |
     ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
     ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
     ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3
-    ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
     ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]]
     ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
     ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]]
     ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
index 659eadf181c02a..f7250ccde898fd 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
@@ -180,13 +180,13 @@ declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32)
 define void @umul_with_overflow(i32 %lhs, i32 %rhs, i32* %pmul, i1* %pcarry_flag) {
 ; MIPS32-LABEL: umul_with_overflow:
 ; MIPS32:       # %bb.0:
-; MIPS32-NEXT:    mul $1, $4, $5
 ; MIPS32-NEXT:    multu $4, $5
-; MIPS32-NEXT:    mfhi $2
-; MIPS32-NEXT:    sltu $2, $zero, $2
-; MIPS32-NEXT:    andi $2, $2, 1
-; MIPS32-NEXT:    sb $2, 0($7)
-; MIPS32-NEXT:    sw $1, 0($6)
+; MIPS32-NEXT:    mfhi $1
+; MIPS32-NEXT:    mul $2, $4, $5
+; MIPS32-NEXT:    sltu $1, $zero, $1
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    sb $1, 0($7)
+; MIPS32-NEXT:    sw $2, 0($6)
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
   %res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs)