From 5c4fc581d5fe8427f03ec90b0d745453398aa3ad Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Tue, 29 Sep 2020 16:19:08 -0700 Subject: [PATCH 01/12] [DebugInfo] Add types from constructor homing to the retained types list. Add class types to the retained types list to make sure they don't get dropped if the constructor is optimized out later. Differential Revision: https://reviews.llvm.org/D88522 --- clang/lib/CodeGen/CGDebugInfo.cpp | 2 +- clang/test/CodeGenCXX/debug-info-limited-ctor.cpp | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 27c584ff0795a9..88aace8b85dd18 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1726,7 +1726,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( // info is emitted. if (DebugKind == codegenoptions::DebugInfoConstructor) if (const CXXConstructorDecl *CD = dyn_cast(Method)) - completeClass(CD->getParent()); + completeUnusedClass(*CD->getParent()); llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit); llvm::DISubprogram *SP = DBuilder.createMethod( diff --git a/clang/test/CodeGenCXX/debug-info-limited-ctor.cpp b/clang/test/CodeGenCXX/debug-info-limited-ctor.cpp index cf2e89e35522f7..cf7adad6b44929 100644 --- a/clang/test/CodeGenCXX/debug-info-limited-ctor.cpp +++ b/clang/test/CodeGenCXX/debug-info-limited-ctor.cpp @@ -9,7 +9,7 @@ struct B { B(); } TestB; -// CHECK-DAG: !DICompositeType(tag: DW_TAG_structure_type, name: "C"{{.*}}DIFlagTypePassByValue +// CHECK-DAG: ![[C:[0-9]+]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C"{{.*}}DIFlagTypePassByValue struct C { C() {} } TestC; @@ -73,3 +73,7 @@ void f(K k) {} void L() { auto func = [&]() {}; } + +// Check that types are being added to retained types list. +// CHECK-DAG: !DICompileUnit{{.*}}retainedTypes: ![[RETAINED:[0-9]+]] +// CHECK-DAG: ![[RETAINED]] = {{.*}}![[C]] From f71849c74ed58e5d9ed3681cc6294128098012dc Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Tue, 29 Sep 2020 17:07:06 -0700 Subject: [PATCH 02/12] [docs] Recommend dropLocation() over setDebugLoc(DebugLoc()) --- llvm/docs/HowToUpdateDebugInfo.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/HowToUpdateDebugInfo.rst b/llvm/docs/HowToUpdateDebugInfo.rst index 3283bfd893393b..7df2a8a2582759 100644 --- a/llvm/docs/HowToUpdateDebugInfo.rst +++ b/llvm/docs/HowToUpdateDebugInfo.rst @@ -117,7 +117,7 @@ When to drop an instruction location A transformation should drop debug locations if the rules for :ref:`preserving` and :ref:`merging` debug locations do not apply. The API to -use is ``Instruction::setDebugLoc()``. +use is ``Instruction::dropLocation()``. The purpose of this rule is to prevent erratic or misleading single-stepping behavior in situations in which an instruction has no clear, unambiguous From 616c68aab75016d5d7ebc0b79bb3c38405b18ae6 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 29 Sep 2020 14:38:56 -0700 Subject: [PATCH 03/12] [NFC][MSAN] Remove an attribute in test --- llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll index a8ce0561c3b87a..54493c9cdc8c5f 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll @@ -7,7 +7,7 @@ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define <4 x i64> @test_mm256_abs_epi8(<4 x i64> noundef %a) local_unnamed_addr #0 { +define <4 x i64> @test_mm256_abs_epi8(<4 x i64> %a) local_unnamed_addr #0 { ; CHECK-LABEL: @test_mm256_abs_epi8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 From 795d94fdb9d2377452f86952dcf0921a6c68d2b5 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 29 Sep 2020 15:31:25 -0700 Subject: [PATCH 04/12] [NFC][Msan] Add llvm.fabs test llvm.fabs does not need a special handler as llvm.abs as its single argument type match the return type. --- .../MemorySanitizer/abs-vector.ll | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll index 54493c9cdc8c5f..d3b29d65f2f224 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -msan-check-access-address=0 -passes=msan 2>&1 | FileCheck %s -; RUN: opt < %s -S -msan-check-access-address=0 -msan | FileCheck %s -; RUN: opt < %s -S -msan-check-access-address=0 -msan-track-origins=2 -passes=msan 2>&1 | FileCheck %s --check-prefixes=CHECK,ORIGIN -; RUN: opt < %s -S -msan-check-access-address=0 -msan-track-origins=2 -msan | FileCheck %s --check-prefixes=CHECK,ORIGIN +; RUN: opt %s -S -msan-check-access-address=0 -passes=msan 2>&1 | FileCheck %s +; RUN: opt %s -S -msan-check-access-address=0 -msan | FileCheck %s +; RUN: opt %s -S -msan-check-access-address=0 -msan-track-origins=2 -passes=msan 2>&1 | FileCheck %s --check-prefixes=CHECK,ORIGIN +; RUN: opt %s -S -msan-check-access-address=0 -msan-track-origins=2 -msan | FileCheck %s --check-prefixes=CHECK,ORIGIN target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -73,9 +73,26 @@ entry: ret <4 x i64> %2 } +define <4 x double> @test_fabs(<4 x double> %a) local_unnamed_addr #0 { +; CHECK-LABEL: @test_fabs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; ORIGIN-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4 +; CHECK: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> [[A:%.*]]) +; CHECK-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; ORIGIN-NEXT: store i32 [[TMP1]], i32* @__msan_retval_origin_tls, align 4 +; CHECK: ret <4 x double> [[TMP2]] +; +entry: + %0 = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %a) + ret <4 x double> %0 +} + declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1 immarg) #1 declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1 immarg) #1 declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1 immarg) #1 +declare <4 x double> @llvm.fabs.v4f64(<4 x double>) #1 attributes #0 = { nounwind readnone sanitize_memory } attributes #1 = { nounwind readnone speculatable willreturn } From afcf9c47c5e74a0b567531547b677ff1d383ae50 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Tue, 29 Sep 2020 17:08:42 -0700 Subject: [PATCH 05/12] Fix test failures with trunk clang - Make the consteval constructor for the zero type be noexcept - Don't expect three-way comparison of 0 against a comparison category to fail --- libcxx/include/compare | 2 +- .../cmp/cmp.categories.pre/zero_type.verify.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libcxx/include/compare b/libcxx/include/compare index c1cd81bb6fc1ac..596505f8860d4b 100644 --- a/libcxx/include/compare +++ b/libcxx/include/compare @@ -156,7 +156,7 @@ enum class _LIBCPP_ENUM_VIS _NCmpResult : signed char { struct _CmpUnspecifiedParam { _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEVAL - _CmpUnspecifiedParam(int _CmpUnspecifiedParam::*) {} + _CmpUnspecifiedParam(int _CmpUnspecifiedParam::*) noexcept {} template>> _CmpUnspecifiedParam(_Tp) = delete; diff --git a/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp b/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp index 40f6677d43c9be..fc21c03a3ddf07 100644 --- a/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp +++ b/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp @@ -46,9 +46,9 @@ void test_category(T v) { void(0 > v); void(v >= 0); void(0 >= v); -#ifndef _LIBCPP_HAS_NO_THREE_WAY_COMPARISON - void(v <=> 0); // expected-error 3 {{}} - void(0 <=> v); // expected-error 3 {{}} +#ifndef _LIBCPP_HAS_NO_SPACESHIP_OPERATOR + void(v <=> 0); + void(0 <=> v); #endif } From bd14d6ea1517c93ceecaec29dad016d9a122fa1b Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 29 Sep 2020 17:22:16 -0700 Subject: [PATCH 06/12] [lldb] Hoist -s (trace directory) argument out of LLDB_TEST_COMMON_ARGS (NFC) Give the trace directory argument its own variable (LLDB_TEST_TRACE_DIRECTORY) so that we can configure it in lit.site.cfg.py if we so desire. --- lldb/test/API/CMakeLists.txt | 7 +++++-- lldb/test/API/lit.cfg.py | 3 +++ lldb/test/API/lit.site.cfg.py.in | 1 + lldb/utils/lldb-dotest/CMakeLists.txt | 5 +++++ lldb/utils/lldb-dotest/lldb-dotest.in | 10 ++++++---- 5 files changed, 20 insertions(+), 6 deletions(-) diff --git a/lldb/test/API/CMakeLists.txt b/lldb/test/API/CMakeLists.txt index fe92012e37678b..f4802e2f5ca2cb 100644 --- a/lldb/test/API/CMakeLists.txt +++ b/lldb/test/API/CMakeLists.txt @@ -36,13 +36,14 @@ set(LLDB_TEST_USER_ARGS # hash of filename and .text section, there *will* be conflicts inside # the build directory. set(LLDB_TEST_COMMON_ARGS - -s - ${CMAKE_BINARY_DIR}/lldb-test-traces -S nm -u CXXFLAGS -u CFLAGS ) +# Configure the traces directory. +set(LLDB_TEST_TRACE_DIRECTORY "${PROJECT_BINARY_DIR}/lldb-test-traces" CACHE PATH "The test traces directory.") + # Set the path to the default lldb test executable. set(LLDB_DEFAULT_TEST_EXECUTABLE "${LLVM_RUNTIME_OUTPUT_INTDIR}/lldb${CMAKE_EXECUTABLE_SUFFIX}") @@ -141,6 +142,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_SOURCE_DIR "${LLDB_SOURCE_DIR}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_FRAMEWORK_DIR "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_BUILD_DIRECTORY "${LLDB_TEST_BUILD_DIRECTORY}") + string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_TRACE_DIRECTORY "${LLDB_TEST_TRACE_DIRECTORY}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_EXECUTABLE "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_COMPILER "${LLDB_TEST_COMPILER}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_DSYMUTIL "${LLDB_TEST_DSYMUTIL}") @@ -170,6 +172,7 @@ endif() string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_DOTEST_ARGS "${LLDB_DOTEST_ARGS}") string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_SOURCE_DIR "${LLDB_SOURCE_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_BUILD_DIRECTORY "${LLDB_TEST_BUILD_DIRECTORY}") +string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_TRACE_DIRECTORY "${LLDB_TEST_TRACE_DIRECTORY}") string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_EXECUTABLE "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_COMPILER "${LLDB_TEST_COMPILER}") string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_DSYMUTIL "${LLDB_TEST_DSYMUTIL}") diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index d78a1aae546752..a4d4d83fd366df 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -177,6 +177,9 @@ def delete_module_cache(path): if is_configured('lldb_build_directory'): dotest_cmd += ['--build-dir', config.lldb_build_directory] +if is_configured('lldb_trace_directory'): + dotest_cmd += ['-s', config.lldb_trace_directory] + if is_configured('lldb_module_cache'): delete_module_cache(config.lldb_module_cache) dotest_cmd += ['--lldb-module-cache-dir', config.lldb_module_cache] diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in index 271faf371f9d1d..0481e8fecc73a2 100644 --- a/lldb/test/API/lit.site.cfg.py.in +++ b/lldb/test/API/lit.site.cfg.py.in @@ -19,6 +19,7 @@ config.shared_libs = @LLVM_ENABLE_SHARED_LIBS@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.target_triple = "@TARGET_TRIPLE@" config.lldb_build_directory = "@LLDB_TEST_BUILD_DIRECTORY@" +config.lldb_trace_directory = "@LLDB_TEST_TRACE_DIRECTORY@" config.lldb_reproducer_directory = os.path.join("@LLDB_TEST_BUILD_DIRECTORY@", "reproducers") config.python_executable = "@Python3_EXECUTABLE@" config.dotest_args_str = "@LLDB_DOTEST_ARGS@" diff --git a/lldb/utils/lldb-dotest/CMakeLists.txt b/lldb/utils/lldb-dotest/CMakeLists.txt index 1001fbf04ebe79..cba04f3499b957 100644 --- a/lldb/utils/lldb-dotest/CMakeLists.txt +++ b/lldb/utils/lldb-dotest/CMakeLists.txt @@ -23,6 +23,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") + string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") @@ -37,6 +38,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") + string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") @@ -50,6 +52,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") + string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") @@ -71,6 +74,7 @@ elseif(NOT "${CMAKE_CFG_INTDIR}" STREQUAL ".") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") + string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") @@ -89,6 +93,7 @@ else() set(LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") set(LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") set(LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") + set(LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}") set(LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") set(LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") set(LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") diff --git a/lldb/utils/lldb-dotest/lldb-dotest.in b/lldb/utils/lldb-dotest/lldb-dotest.in index cfd73f5b32a6e4..d66968955a7409 100755 --- a/lldb/utils/lldb-dotest/lldb-dotest.in +++ b/lldb/utils/lldb-dotest/lldb-dotest.in @@ -3,7 +3,6 @@ import subprocess import sys dotest_path = '@LLDB_SOURCE_DIR_CONFIGURED@/test/API/dotest.py' -build_dir = '@LLDB_TEST_BUILD_DIRECTORY_CONFIGURED@' dotest_args_str = '@LLDB_DOTEST_ARGS_CONFIGURED@' arch = '@LLDB_TEST_ARCH@' executable = '@LLDB_TEST_EXECUTABLE_CONFIGURED@' @@ -12,9 +11,11 @@ dsymutil = '@LLDB_TEST_DSYMUTIL_CONFIGURED@' filecheck = '@LLDB_TEST_FILECHECK_CONFIGURED@' yaml2obj = '@LLDB_TEST_YAML2OBJ_CONFIGURED@' server = '@LLDB_TEST_SERVER_CONFIGURED@' -lldb_libs_dir = "@LLDB_LIBS_DIR_CONFIGURED@" -lldb_framework_dir = "@LLDB_FRAMEWORK_DIR_CONFIGURED@" +lldb_build_dir = '@LLDB_TEST_BUILD_DIRECTORY_CONFIGURED@' lldb_build_intel_pt = "@LLDB_BUILD_INTEL_PT@" +lldb_framework_dir = "@LLDB_FRAMEWORK_DIR_CONFIGURED@" +lldb_libs_dir = "@LLDB_LIBS_DIR_CONFIGURED@" +lldb_trace_dir = '@LLDB_TEST_TRACE_DIRECTORY_CONFIGURED@' if __name__ == '__main__': wrapper_args = sys.argv[1:] @@ -23,7 +24,8 @@ if __name__ == '__main__': cmd = [sys.executable, dotest_path] cmd.extend(['--arch', arch]) cmd.extend(dotest_args) - cmd.extend(['--build-dir', build_dir]) + cmd.extend(['-s', lldb_trace_dir]) + cmd.extend(['--build-dir', lldb_build_dir]) cmd.extend(['--executable', executable]) cmd.extend(['--compiler', compiler]) cmd.extend(['--dsymutil', dsymutil]) From 674f57870f4c8a7fd7b629bffc85b149cbefd3e0 Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Tue, 29 Sep 2020 17:37:36 -0700 Subject: [PATCH 07/12] [gardening] Replace some uses of setDebugLoc(DebugLoc()) with dropLocation(), NFC --- llvm/lib/Transforms/Scalar/LICM.cpp | 2 +- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 631fa2f27c5b3f..bc581e7ad40f32 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2159,7 +2159,7 @@ bool llvm::promoteLoopAccessesToScalars( if (SawUnorderedAtomic) PreheaderLoad->setOrdering(AtomicOrdering::Unordered); PreheaderLoad->setAlignment(Alignment); - PreheaderLoad->setDebugLoc(DebugLoc()); + PreheaderLoad->dropLocation(); if (AATags) PreheaderLoad->setAAMetadata(AATags); SSA.AddAvailableValue(Preheader, PreheaderLoad); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 124a7c423e72cd..1672293380d7b1 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2218,7 +2218,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // be misleading while debugging. for (auto &I : *ThenBB) { if (!SpeculatedStoreValue || &I != SpeculatedStore) - I.setDebugLoc(DebugLoc()); + I.dropLocation(); I.dropUnknownNonDebugMetadata(); } @@ -2878,7 +2878,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, // When we fold the bonus instructions we want to make sure we // reset their debug locations in order to avoid stepping on dead // code caused by folding dead branches. - NewBonusInst->setDebugLoc(DebugLoc()); + NewBonusInst->dropLocation(); RemapInstruction(NewBonusInst, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); @@ -2902,7 +2902,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, // Reset the condition debug location to avoid jumping on dead code // as the result of folding dead branches. - CondInPred->setDebugLoc(DebugLoc()); + CondInPred->dropLocation(); RemapInstruction(CondInPred, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); From d04775e16bba456f0be0aaa7478959c5bfa22c41 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 29 Sep 2020 08:51:26 -0400 Subject: [PATCH 08/12] Add remquo, frexp and modf overload functions to HIP header --- clang/lib/Headers/__clang_hip_math.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h index 0c27ef60a06488..b72bb40ccdb674 100644 --- a/clang/lib/Headers/__clang_hip_math.h +++ b/clang/lib/Headers/__clang_hip_math.h @@ -1221,6 +1221,27 @@ __DEVICE__ inline _Float16 pow(_Float16 __base, int __iexp) { return __ocml_pown_f16(__base, __iexp); } + +__DEVICE__ +inline float remquo(float __x, float __y, int *__quo) { + return remquof(__x, __y, __quo); +} + +template +__DEVICE__ + typename __hip_enable_if::is_specialized && + std::numeric_limits<__T2>::is_specialized, + double>::type + remquo(__T1 __x, __T2 __y, int *__quo) { + return remquo((double)__x, (double)__y, __quo); +} + +__DEVICE__ +inline float frexp(float __x, int *__nptr) { return frexpf(__x, __nptr); } + +__DEVICE__ +inline float modf(float __x, float *__iptr) { return modff(__x, __iptr); } + #endif #pragma pop_macro("__DEF_FUN1") From c6b18cf9672bca4f61bb3ef401173742068e46ea Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Tue, 29 Sep 2020 17:11:12 -0500 Subject: [PATCH 09/12] [RISCV] Use the extensions in the canonical order (NFC) Use the ISA extensions for specific processors in the conventional canonical order. --- llvm/lib/Target/RISCV/RISCV.td | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 578b393dc879ae..66eda3ba360cfb 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -231,16 +231,16 @@ def : ProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit]>; def : ProcessorModel<"bullet-rv32", BulletModel, []>; def : ProcessorModel<"bullet-rv64", BulletModel, [Feature64Bit]>; -def : ProcessorModel<"sifive-e31", RocketModel, [FeatureStdExtA, - FeatureStdExtC, - FeatureStdExtM]>; +def : ProcessorModel<"sifive-e31", RocketModel, [FeatureStdExtM, + FeatureStdExtA, + FeatureStdExtC]>; def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit, + FeatureStdExtM, + FeatureStdExtF, FeatureStdExtA, - FeatureStdExtC, FeatureStdExtD, - FeatureStdExtF, - FeatureStdExtM]>; + FeatureStdExtC]>; //===----------------------------------------------------------------------===// // Define the RISC-V target. From 0a146a9d0bdd54411f0b0712e27481a4c280ae03 Mon Sep 17 00:00:00 2001 From: Hubert Tong Date: Tue, 29 Sep 2020 21:11:16 -0400 Subject: [PATCH 10/12] [AIX] asm output: use character literals in byte lists for strings This patch improves the assembly output produced for string literals by using character literals in byte lists. This provides the benefits of having printable characters appear as such in the assembly output and of having strings kept as logical units on the same line. Reviewed By: daltenty Differential Revision: https://reviews.llvm.org/D80953 --- llvm/include/llvm/MC/MCAsmInfo.h | 23 +++++ llvm/lib/MC/MCAsmInfoXCOFF.cpp | 2 + llvm/lib/MC/MCAsmStreamer.cpp | 94 ++++++++++++++----- llvm/test/CodeGen/PowerPC/aix-bytestring.ll | 7 ++ llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll | 5 +- .../PowerPC/aix-xcoff-mergeable-str.ll | 25 +---- llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll | 5 +- 7 files changed, 108 insertions(+), 53 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix-bytestring.ll diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index 0f9d503045d88a..2b889d0ed5fa9d 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -54,6 +54,15 @@ enum LCOMMType { NoAlignment, ByteAlignment, Log2Alignment }; /// This class is intended to be used as a base class for asm /// properties and features specific to the target. class MCAsmInfo { +public: + /// Assembly character literal syntax types. + enum AsmCharLiteralSyntax { + ACLS_Unknown, /// Unknown; character literals not used by LLVM for this + /// target. + ACLS_SingleQuotePrefix, /// The desired character is prefixed by a single + /// quote, e.g., `'A`. + }; + protected: //===------------------------------------------------------------------===// // Properties to be set by the target writer, used to configure asm printer. @@ -200,6 +209,16 @@ class MCAsmInfo { /// doesn't support this, it can be set to null. Defaults to "\t.asciz\t" const char *AscizDirective; + /// This directive accepts a comma-separated list of bytes for emission as a + /// string of bytes. For targets that do not support this, it shall be set to + /// null. Defaults to null. + const char *ByteListDirective = nullptr; + + /// Form used for character literals in the assembly syntax. Useful for + /// producing strings as byte lists. If a target does not use or support + /// this, it shall be set to ACLS_Unknown. Defaults to ACLS_Unknown. + AsmCharLiteralSyntax CharacterLiteralSyntax = ACLS_Unknown; + /// These directives are used to output some unit of integer data to the /// current section. If a data directive is set to null, smaller data /// directives will be used to emit the large sizes. Defaults to "\t.byte\t", @@ -562,6 +581,10 @@ class MCAsmInfo { } const char *getAsciiDirective() const { return AsciiDirective; } const char *getAscizDirective() const { return AscizDirective; } + const char *getByteListDirective() const { return ByteListDirective; } + AsmCharLiteralSyntax characterLiteralSyntax() const { + return CharacterLiteralSyntax; + } bool getAlignmentIsInBytes() const { return AlignmentIsInBytes; } unsigned getTextAlignFillValue() const { return TextAlignFillValue; } const char *getGlobalDirective() const { return GlobalDirective; } diff --git a/llvm/lib/MC/MCAsmInfoXCOFF.cpp b/llvm/lib/MC/MCAsmInfoXCOFF.cpp index b5c5bb3ace8edf..04982af4af31be 100644 --- a/llvm/lib/MC/MCAsmInfoXCOFF.cpp +++ b/llvm/lib/MC/MCAsmInfoXCOFF.cpp @@ -24,6 +24,8 @@ MCAsmInfoXCOFF::MCAsmInfoXCOFF() { ZeroDirectiveSupportsNonZeroValue = false; AsciiDirective = nullptr; // not supported AscizDirective = nullptr; // not supported + ByteListDirective = "\t.byte\t"; + CharacterLiteralSyntax = ACLS_SingleQuotePrefix; // Use .vbyte for data definition to avoid directives that apply an implicit // alignment. diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 647197d8de4d11..8d96935b220599 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -971,6 +971,47 @@ void MCAsmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, static inline char toOctal(int X) { return (X&7)+'0'; } +static void PrintByteList(StringRef Data, raw_ostream &OS, + MCAsmInfo::AsmCharLiteralSyntax ACLS) { + assert(!Data.empty() && "Cannot generate an empty list."); + const auto printCharacterInOctal = [&OS](unsigned char C) { + OS << '0'; + OS << toOctal(C >> 6); + OS << toOctal(C >> 3); + OS << toOctal(C >> 0); + }; + const auto printOneCharacterFor = [printCharacterInOctal]( + auto printOnePrintingCharacter) { + return [printCharacterInOctal, printOnePrintingCharacter](unsigned char C) { + if (isPrint(C)) { + printOnePrintingCharacter(static_cast(C)); + return; + } + printCharacterInOctal(C); + }; + }; + const auto printCharacterList = [Data, &OS](const auto &printOneCharacter) { + const auto BeginPtr = Data.begin(), EndPtr = Data.end(); + for (const unsigned char C : make_range(BeginPtr, EndPtr - 1)) { + printOneCharacter(C); + OS << ','; + } + printOneCharacter(*(EndPtr - 1)); + }; + switch (ACLS) { + case MCAsmInfo::ACLS_Unknown: + printCharacterList(printCharacterInOctal); + return; + case MCAsmInfo::ACLS_SingleQuotePrefix: + printCharacterList(printOneCharacterFor([&OS](char C) { + const char AsmCharLitBuf[2] = {'\'', C}; + OS << StringRef(AsmCharLitBuf, sizeof(AsmCharLitBuf)); + })); + return; + } + llvm_unreachable("Invalid AsmCharLiteralSyntax value!"); +} + static void PrintQuotedString(StringRef Data, raw_ostream &OS) { OS << '"'; @@ -1009,33 +1050,42 @@ void MCAsmStreamer::emitBytes(StringRef Data) { "Cannot emit contents before setting section!"); if (Data.empty()) return; - // If only single byte is provided or no ascii or asciz directives is - // supported, emit as vector of 8bits data. - if (Data.size() == 1 || - !(MAI->getAscizDirective() || MAI->getAsciiDirective())) { - if (MCTargetStreamer *TS = getTargetStreamer()) { - TS->emitRawBytes(Data); + const auto emitAsString = [this](StringRef Data) { + // If the data ends with 0 and the target supports .asciz, use it, otherwise + // use .ascii or a byte-list directive + if (MAI->getAscizDirective() && Data.back() == 0) { + OS << MAI->getAscizDirective(); + Data = Data.substr(0, Data.size() - 1); + } else if (LLVM_LIKELY(MAI->getAsciiDirective())) { + OS << MAI->getAsciiDirective(); + } else if (MAI->getByteListDirective()) { + OS << MAI->getByteListDirective(); + PrintByteList(Data, OS, MAI->characterLiteralSyntax()); + EmitEOL(); + return true; } else { - const char *Directive = MAI->getData8bitsDirective(); - for (const unsigned char C : Data.bytes()) { - OS << Directive << (unsigned)C; - EmitEOL(); - } + return false; } + + PrintQuotedString(Data, OS); + EmitEOL(); + return true; + }; + + if (Data.size() != 1 && emitAsString(Data)) return; - } - // If the data ends with 0 and the target supports .asciz, use it, otherwise - // use .ascii - if (MAI->getAscizDirective() && Data.back() == 0) { - OS << MAI->getAscizDirective(); - Data = Data.substr(0, Data.size()-1); - } else { - OS << MAI->getAsciiDirective(); + // Only single byte is provided or no ascii, asciz, or byte-list directives + // are applicable. Emit as vector of individual 8bits data elements. + if (MCTargetStreamer *TS = getTargetStreamer()) { + TS->emitRawBytes(Data); + return; + } + const char *Directive = MAI->getData8bitsDirective(); + for (const unsigned char C : Data.bytes()) { + OS << Directive << (unsigned)C; + EmitEOL(); } - - PrintQuotedString(Data, OS); - EmitEOL(); } void MCAsmStreamer::emitBinaryData(StringRef Data) { diff --git a/llvm/test/CodeGen/PowerPC/aix-bytestring.ll b/llvm/test/CodeGen/PowerPC/aix-bytestring.ll new file mode 100644 index 00000000000000..443c019c9e30cf --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-bytestring.ll @@ -0,0 +1,7 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s + +@str = constant [256 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10\11\12\13\14\15\16\17\18\19\1A\1B\1C\1D\1E\1F !\22#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\7F\80\81\82\83\84\85\86\87\88\89\8A\8B\8C\8D\8E\8F\90\91\92\93\94\95\96\97\98\99\9A\9B\9C\9D\9E\9F\A0\A1\A2\A3\A4\A5\A6\A7\A8\A9\AA\AB\AC\AD\AE\AF\B0\B1\B2\B3\B4\B5\B6\B7\B8\B9\BA\BB\BC\BD\BE\BF\C0\C1\C2\C3\C4\C5\C6\C7\C8\C9\CA\CB\CC\CD\CE\CF\D0\D1\D2\D3\D4\D5\D6\D7\D8\D9\DA\DB\DC\DD\DE\DF\E0\E1\E2\E3\E4\E5\E6\E7\E8\E9\EA\EB\EC\ED\EE\EF\F0\F1\F2\F3\F4\F5\F6\F7\F8\F9\FA\FB\FC\FD\FE\FF\00", align 1 + +; CHECK-LABEL:str: +; CHECK-NEXT: .byte 0001,0002,0003,0004,0005,0006,0007,0010,0011,0012,0013,0014,0015,0016,0017,0020,0021,0022,0023,0024,0025,0026,0027,0030,0031,0032,0033,0034,0035,0036,0037,' ,'!,'",'#,'$,'%,'&,'','(,'),'*,'+,',,'-,'.,'/,'0,'1,'2,'3,'4,'5,'6,'7,'8,'9,':,';,'<,'=,'>,'?,'@,'A,'B,'C,'D,'E,'F,'G,'H,'I,'J,'K,'L,'M,'N,'O,'P,'Q,'R,'S,'T,'U,'V,'W,'X,'Y,'Z,'[,'\,'],'^,'_,'`,'a,'b,'c,'d,'e,'f,'g,'h,'i,'j,'k,'l,'m,'n,'o,'p,'q,'r,'s,'t,'u,'v,'w,'x,'y,'z,'{,'|,'},'~,0177,0200,0201,0202,0203,0204,0205,0206,0207,0210,0211,0212,0213,0214,0215,0216,0217,0220,0221,0222,0223,0224,0225,0226,0227,0230,0231,0232,0233,0234,0235,0236,0237,0240,0241,0242,0243,0244,0245,0246,0247,0250,0251,0252,0253,0254,0255,0256,0257,0260,0261,0262,0263,0264,0265,0266,0267,0270,0271,0272,0273,0274,0275,0276,0277,0300,0301,0302,0303,0304,0305,0306,0307,0310,0311,0312,0313,0314,0315,0316,0317,0320,0321,0322,0323,0324,0325,0326,0327,0330,0331,0332,0333,0334,0335,0336,0337,0340,0341,0342,0343,0344,0345,0346,0347,0350,0351,0352,0353,0354,0355,0356,0357,0360,0361,0362,0363,0364,0365,0366,0367,0370,0371,0372,0373,0374,0375,0376,0377,0000 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll index 4083bd58fe98b0..88c8b08bdb59f1 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll @@ -86,10 +86,7 @@ ; CHECK: .globl chrarray ; CHECK-NEXT: chrarray: -; CHECK-NEXT: .byte 97 -; CHECK-NEXT: .byte 98 -; CHECK-NEXT: .byte 99 -; CHECK-NEXT: .byte 100 +; CHECK-NEXT: .byte 'a,'b,'c,'d ; CHECK: .globl dblarr ; CHECK-NEXT: .align 3 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll index 42ead4b9b4de7b..0d29857fd1556e 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll @@ -41,30 +41,9 @@ entry: ; CHECK-NEXT: .vbyte 4, 0 # 0x0 ; CHECK-NEXT: .csect .rodata.str1.1[RO],2 ; CHECK-NEXT: L..strA: -; CHECK-NEXT: .byte 104 -; CHECK-NEXT: .byte 101 -; CHECK-NEXT: .byte 108 -; CHECK-NEXT: .byte 108 -; CHECK-NEXT: .byte 111 -; CHECK-NEXT: .byte 32 -; CHECK-NEXT: .byte 119 -; CHECK-NEXT: .byte 111 -; CHECK-NEXT: .byte 114 -; CHECK-NEXT: .byte 108 -; CHECK-NEXT: .byte 100 -; CHECK-NEXT: .byte 33 -; CHECK-NEXT: .byte 10 -; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 'h,'e,'l,'l,'o,' ,'w,'o,'r,'l,'d,'!,0012,0000 ; CHECK-NEXT: L...str: -; CHECK-NEXT: .byte 97 -; CHECK-NEXT: .byte 98 -; CHECK-NEXT: .byte 99 -; CHECK-NEXT: .byte 100 -; CHECK-NEXT: .byte 101 -; CHECK-NEXT: .byte 102 -; CHECK-NEXT: .byte 103 -; CHECK-NEXT: .byte 104 -; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 'a,'b,'c,'d,'e,'f,'g,'h,0000 ; CHECKOBJ: 00000010 <.rodata.str2.2>: ; CHECKOBJ-NEXT: 10: 01 08 01 10 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll index dddbe2ba089e8d..a7bb018966429f 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll @@ -53,10 +53,7 @@ ; CHECK64-NEXT: .vbyte 8, 0x408c200000000000 ; CHECK-NEXT: .globl const_chrarray ; CHECK-NEXT: const_chrarray: -; CHECK-NEXT: .byte 97 -; CHECK-NEXT: .byte 98 -; CHECK-NEXT: .byte 99 -; CHECK-NEXT: .byte 100 +; CHECK-NEXT: .byte 'a,'b,'c,'d ; CHECK-NEXT: .globl const_dblarr ; CHECK-NEXT: .align 3 ; CHECK-NEXT: const_dblarr: From 618a890b72f874cbc41168737d03f724f58805fc Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Sep 2020 10:51:49 -0700 Subject: [PATCH 11/12] [X86] Increase the depth threshold required to form VPERMI2W/VPERMI2B in shuffle combining These instructions are implemented with two port 5 uops and one port 015 uop so they are more complicated that most shuffles. This patch increases the depth threshold for when we form them during shuffle combining to try to limit increasing the number of uops especially on port 5. Differential Revision: https://reviews.llvm.org/D88503 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 19 +-- .../CodeGen/X86/min-legal-vector-width.ll | 36 +++--- .../CodeGen/X86/vector-shuffle-128-v16.ll | 45 ++----- .../test/CodeGen/X86/vector-shuffle-128-v8.ll | 112 ++++-------------- .../CodeGen/X86/vector-shuffle-256-v32.ll | 28 +---- llvm/test/CodeGen/X86/vector-zext.ll | 19 +-- 6 files changed, 71 insertions(+), 188 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2a7f028d378966..4b3adc7dcfbc9b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35351,6 +35351,9 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // Depth threshold above which we can efficiently use variable mask shuffles. int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 1 : 2; AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask; + // VPERMI2W/VPERMI2B are 3 uops on Skylake and Icelake so we require a + // higher depth before combining them. + bool AllowBWIVPERMV3 = (Depth >= 2 || HasVariableMask); bool MaskContainsZeros = isAnyZero(Mask); @@ -35387,9 +35390,9 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || - (Subtarget.hasBWI() && + (Subtarget.hasBWI() && AllowBWIVPERMV3 && (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || - (Subtarget.hasVBMI() && + (Subtarget.hasVBMI() && AllowBWIVPERMV3 && (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { // Adjust shuffle mask - replace SM_SentinelZero with second source index. for (unsigned i = 0; i != NumMaskElts; ++i) @@ -35416,9 +35419,9 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32 || MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || - (Subtarget.hasBWI() && + (Subtarget.hasBWI() && AllowBWIVPERMV3 && (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || - (Subtarget.hasVBMI() && + (Subtarget.hasVBMI() && AllowBWIVPERMV3 && (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { V1 = DAG.getBitcast(MaskVT, V1); V2 = DAG.getBitcast(MaskVT, V2); @@ -35588,10 +35591,10 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 || MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || - (Subtarget.hasBWI() && (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 || - MaskVT == MVT::v32i16)) || - (Subtarget.hasVBMI() && (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 || - MaskVT == MVT::v64i8)))) { + (Subtarget.hasBWI() && AllowBWIVPERMV3 && + (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || + (Subtarget.hasVBMI() && AllowBWIVPERMV3 && + (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { V1 = DAG.getBitcast(MaskVT, V1); V2 = DAG.getBitcast(MaskVT, V2); Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll index e5240d5e246a6c..a39fbf878fd9f7 100644 --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -857,10 +857,10 @@ define <8 x i16> @trunc_v8i64_v8i16(<8 x i64>* %x) nounwind "min-legal-vector-wi define <8 x i32> @trunc_v8i64_v8i32_zeroes(<8 x i64>* %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v8i64_v8i32_zeroes: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsrlq $48, 32(%rdi), %ymm1 -; CHECK-NEXT: vpsrlq $48, (%rdi), %ymm2 -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30] -; CHECK-NEXT: vpermi2w %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: vpsrlq $48, 32(%rdi), %ymm0 +; CHECK-NEXT: vpsrlq $48, (%rdi), %ymm1 +; CHECK-NEXT: vpackusdw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; CHECK-NEXT: retq %a = load <8 x i64>, <8 x i64>* %x %b = lshr <8 x i64> %a, @@ -920,9 +920,10 @@ define <8 x i32> @trunc_v8i64_v8i32_sign(<8 x i64>* %x) nounwind "min-legal-vect define <16 x i16> @trunc_v16i32_v16i16_sign(<16 x i32>* %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v16i32_v16i16_sign: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm1 -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31] -; CHECK-NEXT: vpermi2w 32(%rdi), %ymm1, %ymm0 +; CHECK-NEXT: vpsrad $16, 32(%rdi), %ymm0 +; CHECK-NEXT: vpsrad $16, (%rdi), %ymm1 +; CHECK-NEXT: vpackssdw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; CHECK-NEXT: retq %a = load <16 x i32>, <16 x i32>* %x %b = ashr <16 x i32> %a, @@ -931,20 +932,13 @@ define <16 x i16> @trunc_v16i32_v16i16_sign(<16 x i32>* %x) nounwind "min-legal- } define <32 x i8> @trunc_v32i16_v32i8_sign(<32 x i16>* %x) nounwind "min-legal-vector-width"="256" { -; CHECK-AVX512-LABEL: trunc_v32i16_v32i8_sign: -; CHECK-AVX512: # %bb.0: -; CHECK-AVX512-NEXT: vpsraw $8, 32(%rdi), %ymm0 -; CHECK-AVX512-NEXT: vpsraw $8, (%rdi), %ymm1 -; CHECK-AVX512-NEXT: vpacksswb %ymm0, %ymm1, %ymm0 -; CHECK-AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; CHECK-AVX512-NEXT: retq -; -; CHECK-VBMI-LABEL: trunc_v32i16_v32i8_sign: -; CHECK-VBMI: # %bb.0: -; CHECK-VBMI-NEXT: vmovdqa (%rdi), %ymm1 -; CHECK-VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63] -; CHECK-VBMI-NEXT: vpermi2b 32(%rdi), %ymm1, %ymm0 -; CHECK-VBMI-NEXT: retq +; CHECK-LABEL: trunc_v32i16_v32i8_sign: +; CHECK: # %bb.0: +; CHECK-NEXT: vpsraw $8, 32(%rdi), %ymm0 +; CHECK-NEXT: vpsraw $8, (%rdi), %ymm1 +; CHECK-NEXT: vpacksswb %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; CHECK-NEXT: retq %a = load <32 x i16>, <32 x i16>* %x %b = ashr <32 x i16> %a, %c = trunc <32 x i16> %b to <32 x i8> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll index fb300a88b4120b..ee3cf43e8f2f7f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -304,24 +304,11 @@ define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07( ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 -; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX2-NEXT: retq -; -; AVX512VLBW-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX512VLBW-NEXT: retq -; -; AVX512VLVBMI-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: -; AVX512VLVBMI: # %bb.0: -; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [0,16,0,17,0,18,0,19,0,20,0,21,0,22,0,23] -; AVX512VLVBMI-NEXT: vpermi2b %xmm0, %xmm1, %xmm2 -; AVX512VLVBMI-NEXT: vmovdqa %xmm2, %xmm0 -; AVX512VLVBMI-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: +; AVX2OR512VL: # %bb.0: +; AVX2OR512VL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX2OR512VL-NEXT: retq ; ; XOPAVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: ; XOPAVX1: # %bb.0: @@ -1335,23 +1322,11 @@ define <16 x i8> @shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23( ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] ; SSE-NEXT: retq ; -; AVX1OR2-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] -; AVX1OR2-NEXT: retq -; -; AVX512VLBW-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX512VLBW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] -; AVX512VLBW-NEXT: retq -; -; AVX512VLVBMI-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23: -; AVX512VLVBMI: # %bb.0: -; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [0,16,1,17,4,20,5,21,2,18,3,19,6,22,7,23] -; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 -; AVX512VLVBMI-NEXT: retq +; AVX-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23: +; AVX: # %bb.0: +; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %val1, <16 x i8> %val2, <16 x i32> ret <16 x i8> %shuffle } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index c72d736960f96c..f7baebf7c4e4ff 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -1017,23 +1017,11 @@ define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) { ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE-NEXT: retq ; -; AVX1OR2-LABEL: shuffle_v8i16_0c1d2e3f: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; AVX1OR2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1OR2-NEXT: retq -; -; AVX512VL-SLOW-LABEL: shuffle_v8i16_0c1d2e3f: -; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX512VL-SLOW-NEXT: retq -; -; AVX512VL-FAST-LABEL: shuffle_v8i16_0c1d2e3f: -; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,12,1,13,2,14,3,15] -; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0 -; AVX512VL-FAST-NEXT: retq +; AVX-LABEL: shuffle_v8i16_0c1d2e3f: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1059,23 +1047,11 @@ define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) { ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE-NEXT: retq ; -; AVX1OR2-LABEL: shuffle_v8i16_48596a7b: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; AVX1OR2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1OR2-NEXT: retq -; -; AVX512VL-SLOW-LABEL: shuffle_v8i16_48596a7b: -; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX512VL-SLOW-NEXT: retq -; -; AVX512VL-FAST-LABEL: shuffle_v8i16_48596a7b: -; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8,5,9,6,10,7,11] -; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0 -; AVX512VL-FAST-NEXT: retq +; AVX-LABEL: shuffle_v8i16_48596a7b: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1424,23 +1400,11 @@ define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] ; SSE41-NEXT: retq ; -; AVX1OR2-LABEL: shuffle_v8i16_012dXXXX: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] -; AVX1OR2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] -; AVX1OR2-NEXT: retq -; -; AVX512VL-SLOW-LABEL: shuffle_v8i16_012dXXXX: -; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] -; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] -; AVX512VL-SLOW-NEXT: retq -; -; AVX512VL-FAST-LABEL: shuffle_v8i16_012dXXXX: -; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,13,4,5,6,7] -; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0 -; AVX512VL-FAST-NEXT: retq +; AVX-LABEL: shuffle_v8i16_012dXXXX: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1475,24 +1439,11 @@ define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) { ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v8i16_XXXXcde3: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] -; AVX2-NEXT: retq -; -; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXXXcde3: -; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0 -; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] -; AVX512VL-SLOW-NEXT: retq -; -; AVX512VL-FAST-LABEL: shuffle_v8i16_XXXXcde3: -; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,11] -; AVX512VL-FAST-NEXT: vpermi2w %xmm0, %xmm1, %xmm2 -; AVX512VL-FAST-NEXT: vmovdqa %xmm2, %xmm0 -; AVX512VL-FAST-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v8i16_XXXXcde3: +; AVX2OR512VL: # %bb.0: +; AVX2OR512VL-NEXT: vpbroadcastq %xmm0, %xmm0 +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] +; AVX2OR512VL-NEXT: retq ; ; XOPAVX1-LABEL: shuffle_v8i16_XXXXcde3: ; XOPAVX1: # %bb.0: @@ -1533,24 +1484,11 @@ define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] ; SSE41-NEXT: retq ; -; AVX1OR2-LABEL: shuffle_v8i16_cde3XXXX: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; AVX1OR2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] -; AVX1OR2-NEXT: retq -; -; AVX512VL-SLOW-LABEL: shuffle_v8i16_cde3XXXX: -; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] -; AVX512VL-SLOW-NEXT: retq -; -; AVX512VL-FAST-LABEL: shuffle_v8i16_cde3XXXX: -; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,11,4,5,6,7] -; AVX512VL-FAST-NEXT: vpermi2w %xmm0, %xmm1, %xmm2 -; AVX512VL-FAST-NEXT: vmovdqa %xmm2, %xmm0 -; AVX512VL-FAST-NEXT: retq +; AVX-LABEL: shuffle_v8i16_cde3XXXX: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 23bf91de6e7e8a..e3eed625dab3b9 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -4804,29 +4804,11 @@ define <4 x i64> @PR28136(<32 x i8> %a0, <32 x i8> %a1) { ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: PR28136: -; AVX2: # %bb.0: -; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: retq -; -; AVX512VLBW-LABEL: PR28136: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX512VLBW-NEXT: retq -; -; AVX512VLVBMI-SLOW-LABEL: PR28136: -; AVX512VLVBMI-SLOW: # %bb.0: -; AVX512VLVBMI-SLOW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX512VLVBMI-SLOW-NEXT: retq -; -; AVX512VLVBMI-FAST-LABEL: PR28136: -; AVX512VLVBMI-FAST: # %bb.0: -; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,32,1,33,2,34,3,35,16,48,17,49,18,50,19,51,4,36,5,37,6,38,7,39,20,52,21,53,22,54,23,55] -; AVX512VLVBMI-FAST-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 -; AVX512VLVBMI-FAST-NEXT: retq +; AVX2OR512VL-LABEL: PR28136: +; AVX2OR512VL: # %bb.0: +; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] +; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2OR512VL-NEXT: retq ; ; XOPAVX1-LABEL: PR28136: ; XOPAVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll index 2ad16f2e04c5b1..0132e901e6b3a3 100644 --- a/llvm/test/CodeGen/X86/vector-zext.ll +++ b/llvm/test/CodeGen/X86/vector-zext.ll @@ -1902,20 +1902,11 @@ define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; AVX2-NEXT: retq ; -; AVX512F-LABEL: shuf_zext_8i16_to_4i64_offset2: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] -; AVX512F-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: shuf_zext_8i16_to_4i64_offset2: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [2,33,34,35,3,37,38,39,4,41,42,43,5,45,46,47] -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; AVX512BW-NEXT: retq +; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] +; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX512-NEXT: retq entry: %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> %Z = bitcast <16 x i16> %B to <4 x i64> From 1d54e75cf26a4c60b66659d5d9c62f4bb9452b03 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Tue, 29 Sep 2020 14:39:54 -0700 Subject: [PATCH 12/12] [GlobalISel] Fix multiply with overflow intrinsics legalization generating invalid MIR. During lowering of G_UMULO and friends, the previous code moved the builder's insertion point to be after the legalizing instruction. When that happened, if there happened to be a "G_CONSTANT i32 0" immediately after, the CSEMIRBuilder would try to find that constant during the buildConstant(zero) call, and since it dominates itself would return the iterator unchanged, even though the def of the constant was *after* the current insertion point. This resulted in the compare being generated *before* the constant which it was using. There's no need to modify the insertion point before building the mul-hi or constant. Delaying moving the insert point ensures those are built/CSEd before the G_ICMP is built. Fixes PR47679 Differential Revision: https://reviews.llvm.org/D88514 --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 5 +- .../AArch64/GlobalISel/legalize-mul.mir | 68 ++++++++++++++++++- .../CodeGen/Mips/GlobalISel/legalizer/mul.mir | 2 +- .../CodeGen/Mips/GlobalISel/llvm-ir/mul.ll | 12 ++-- 4 files changed, 76 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e8bc4067c127e4..45ac2b7b671193 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2892,11 +2892,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { MI.RemoveOperand(1); Observer.changedInstr(MI); - MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS}); auto Zero = MIRBuilder.buildConstant(Ty, 0); + // Move insert point forward so we can use the Res register if needed. + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + // For *signed* multiply, overflow is detected by checking: // (hi != (lo >> bitwidth-1)) if (Opcode == TargetOpcode::G_SMULH) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir index 84c839f7b341bc..20af216aaeb5ed 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir @@ -28,8 +28,8 @@ body: | ; CHECK-LABEL: name: test_smul_overflow ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] ; CHECK: [[SMULH:%[0-9]+]]:_(s64) = G_SMULH [[COPY]], [[COPY1]] + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MUL]], [[C]](s64) ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SMULH]](s64), [[ASHR]] @@ -51,9 +51,9 @@ body: | ; CHECK-LABEL: name: test_umul_overflow ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[COPY]], [[COPY1]] ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]] ; CHECK: $x0 = COPY [[MUL]](s64) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) @@ -91,3 +91,67 @@ body: | $q0 = COPY %2(<2 x s64>) RET_ReallyLR implicit $q0 ... +--- +name: test_umulo_overflow_no_invalid_mir +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } +frameInfo: + maxAlignment: 16 +stack: + - { id: 0, size: 8, alignment: 8 } + - { id: 1, size: 8, alignment: 8 } + - { id: 2, size: 16, alignment: 16 } + - { id: 3, size: 16, alignment: 8 } +machineFunctionInfo: {} +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; Check that the overflow result doesn't generate incorrect MIR by using a G_CONSTANT 0 + ; before it's been defined. + ; CHECK-LABEL: name: test_umulo_overflow_no_invalid_mir + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3 + ; CHECK: G_STORE [[COPY2]](s64), [[FRAME_INDEX]](p0) :: (store 8) + ; CHECK: G_STORE [[COPY1]](s64), [[FRAME_INDEX1]](p0) :: (store 8) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load 8) + ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[LOAD]], [[LOAD1]] + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[LOAD]], [[LOAD1]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]] + ; CHECK: G_STORE [[C]](s64), [[FRAME_INDEX2]](p0) :: (store 8, align 1) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] + ; CHECK: $x0 = COPY [[MUL]](s64) + ; CHECK: $x1 = COPY [[AND]](s64) + ; CHECK: RET_ReallyLR implicit $x0 + %0:_(p0) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %25:_(s32) = G_CONSTANT i32 0 + %3:_(p0) = G_FRAME_INDEX %stack.0 + %4:_(p0) = G_FRAME_INDEX %stack.1 + %6:_(p0) = G_FRAME_INDEX %stack.3 + G_STORE %2(s64), %3(p0) :: (store 8) + G_STORE %1(s64), %4(p0) :: (store 8) + %7:_(s64) = G_LOAD %3(p0) :: (dereferenceable load 8) + %8:_(s64) = G_LOAD %4(p0) :: (dereferenceable load 8) + %9:_(s64), %10:_(s1) = G_UMULO %7, %8 + %31:_(s64) = G_CONSTANT i64 0 + G_STORE %31(s64), %6(p0) :: (store 8, align 1) + %16:_(s64) = G_ZEXT %10(s1) + $x0 = COPY %9(s64) + $x1 = COPY %16(s64) + RET_ReallyLR implicit $x0 + +... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir index c92a55d0af3225..b146aa5ff13d56 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir @@ -439,9 +439,9 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll index 659eadf181c02a..f7250ccde898fd 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll @@ -180,13 +180,13 @@ declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) define void @umul_with_overflow(i32 %lhs, i32 %rhs, i32* %pmul, i1* %pcarry_flag) { ; MIPS32-LABEL: umul_with_overflow: ; MIPS32: # %bb.0: -; MIPS32-NEXT: mul $1, $4, $5 ; MIPS32-NEXT: multu $4, $5 -; MIPS32-NEXT: mfhi $2 -; MIPS32-NEXT: sltu $2, $zero, $2 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: sb $2, 0($7) -; MIPS32-NEXT: sw $1, 0($6) +; MIPS32-NEXT: mfhi $1 +; MIPS32-NEXT: mul $2, $4, $5 +; MIPS32-NEXT: sltu $1, $zero, $1 +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sb $1, 0($7) +; MIPS32-NEXT: sw $2, 0($6) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop %res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs)