diff --git a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt index 8fcbfbe40b2313..c74c4051ade72c 100644 --- a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt @@ -19,6 +19,7 @@ add_clang_library(clangTidyModernizeModule RawStringLiteralCheck.cpp RedundantVoidArgCheck.cpp ReplaceAutoPtrCheck.cpp + ReplaceDisallowCopyAndAssignMacroCheck.cpp ReplaceRandomShuffleCheck.cpp ReturnBracedInitListCheck.cpp ShrinkToFitCheck.cpp diff --git a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp index 6280f9c991e8ac..d9ccd2cd0ad7fb 100644 --- a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp @@ -21,6 +21,7 @@ #include "RawStringLiteralCheck.h" #include "RedundantVoidArgCheck.h" #include "ReplaceAutoPtrCheck.h" +#include "ReplaceDisallowCopyAndAssignMacroCheck.h" #include "ReplaceRandomShuffleCheck.h" #include "ReturnBracedInitListCheck.h" #include "ShrinkToFitCheck.h" @@ -67,6 +68,8 @@ class ModernizeModule : public ClangTidyModule { "modernize-redundant-void-arg"); CheckFactories.registerCheck( "modernize-replace-auto-ptr"); + CheckFactories.registerCheck( + "modernize-replace-disallow-copy-and-assign-macro"); CheckFactories.registerCheck( "modernize-replace-random-shuffle"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.cpp b/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.cpp new file mode 100644 index 00000000000000..2219a3c477b361 --- /dev/null +++ b/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.cpp @@ -0,0 +1,90 @@ +//===--- ReplaceDisallowCopyAndAssignMacroCheck.cpp - clang-tidy ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ReplaceDisallowCopyAndAssignMacroCheck.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Lex/MacroArgs.h" +#include "llvm/Support/FormatVariadic.h" + +namespace clang { +namespace tidy { +namespace modernize { + +namespace { + +class ReplaceDisallowCopyAndAssignMacroCallbacks : public PPCallbacks { +public: + explicit ReplaceDisallowCopyAndAssignMacroCallbacks( + ReplaceDisallowCopyAndAssignMacroCheck &Check, Preprocessor &PP) + : Check(Check), PP(PP) {} + + void MacroExpands(const Token &MacroNameTok, const MacroDefinition &MD, + SourceRange Range, const MacroArgs *Args) override { + IdentifierInfo *Info = MacroNameTok.getIdentifierInfo(); + if (!Info || !Args || Args->getNumMacroArguments() != 1) + return; + if (Info->getName() != Check.getMacroName()) + return; + // The first argument to the DISALLOW_COPY_AND_ASSIGN macro is exptected to + // be the class name. + const Token *ClassNameTok = Args->getUnexpArgument(0); + if (Args->ArgNeedsPreexpansion(ClassNameTok, PP)) + // For now we only support simple argument that don't need to be + // pre-expanded. + return; + clang::IdentifierInfo *ClassIdent = ClassNameTok->getIdentifierInfo(); + if (!ClassIdent) + return; + + std::string Replacement = llvm::formatv( + R"cpp({0}(const {0} &) = delete; +const {0} &operator=(const {0} &) = delete{1})cpp", + ClassIdent->getName(), shouldAppendSemi(Range) ? ";" : ""); + + Check.diag(MacroNameTok.getLocation(), + "prefer deleting copy constructor and assignment operator over " + "using macro '%0'") + << Check.getMacroName() + << FixItHint::CreateReplacement( + PP.getSourceManager().getExpansionRange(Range), Replacement); + } + +private: + /// \returns \c true if the next token after the given \p MacroLoc is \b not a + /// semicolon. + bool shouldAppendSemi(SourceRange MacroLoc) { + llvm::Optional Next = Lexer::findNextToken( + MacroLoc.getEnd(), PP.getSourceManager(), PP.getLangOpts()); + return !(Next && Next->is(tok::semi)); + } + + ReplaceDisallowCopyAndAssignMacroCheck &Check; + Preprocessor &PP; +}; +} // namespace + +ReplaceDisallowCopyAndAssignMacroCheck::ReplaceDisallowCopyAndAssignMacroCheck( + StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + MacroName(Options.get("MacroName", "DISALLOW_COPY_AND_ASSIGN")) {} + +void ReplaceDisallowCopyAndAssignMacroCheck::registerPPCallbacks( + const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) { + PP->addPPCallbacks( + ::std::make_unique( + *this, *ModuleExpanderPP)); +} + +void ReplaceDisallowCopyAndAssignMacroCheck::storeOptions( + ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "MacroName", MacroName); +} + +} // namespace modernize +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.h b/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.h new file mode 100644 index 00000000000000..818b6aa270fda3 --- /dev/null +++ b/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.h @@ -0,0 +1,62 @@ +//===--- ReplaceDisallowCopyAndAssignMacroCheck.h - clang-tidy --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACEDISALLOWCOPYANDASSIGNMACROCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACEDISALLOWCOPYANDASSIGNMACROCHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace modernize { + +/// This check finds macro expansions of ``DISALLOW_COPY_AND_ASSIGN(Type)`` and +/// replaces them with a deleted copy constructor and a deleted assignment +/// operator. +/// +/// Before: +/// ~~~{.cpp} +/// class Foo { +/// private: +/// DISALLOW_COPY_AND_ASSIGN(Foo); +/// }; +/// ~~~ +/// +/// After: +/// ~~~{.cpp} +/// class Foo { +/// private: +/// Foo(const Foo &) = delete; +/// const Foo &operator=(const Foo &) = delete; +/// }; +/// ~~~ +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/modernize-replace-disallow-copy-and-assign-macro.html +class ReplaceDisallowCopyAndAssignMacroCheck : public ClangTidyCheck { +public: + ReplaceDisallowCopyAndAssignMacroCheck(StringRef Name, + ClangTidyContext *Context); + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus11; + } + void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP, + Preprocessor *ModuleExpanderPP) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; + + const std::string &getMacroName() const { return MacroName; } + +private: + const std::string MacroName; +}; + +} // namespace modernize +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACEDISALLOWCOPYANDASSIGNMACROCHECK_H diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 8da24a93d7f4dd..bd898de446b9d4 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -136,6 +136,12 @@ New checks Finds includes of system libc headers not provided by the compiler within llvm-libc implementations. +- New :doc:`modernize-replace-disallow-copy-and-assign-macro + ` check. + + Finds macro expansions of ``DISALLOW_COPY_AND_ASSIGN`` and replaces them with + a deleted copy constructor and a deleted assignment operator. + - New :doc:`objc-dealloc-in-category ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 3794aa5bc3d848..17331605aa64e1 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -218,6 +218,7 @@ Clang-Tidy Checks `modernize-raw-string-literal `_, "Yes" `modernize-redundant-void-arg `_, "Yes" `modernize-replace-auto-ptr `_, "Yes" + `modernize-replace-disallow-copy-and-assign-macro `_, "Yes" `modernize-replace-random-shuffle `_, "Yes" `modernize-return-braced-init-list `_, "Yes" `modernize-shrink-to-fit `_, "Yes" diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize-replace-disallow-copy-and-assign-macro.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize-replace-disallow-copy-and-assign-macro.rst new file mode 100644 index 00000000000000..6717c928506a78 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize-replace-disallow-copy-and-assign-macro.rst @@ -0,0 +1,52 @@ +.. title:: clang-tidy - modernize-replace-disallow-copy-and-assign-macro + +modernize-replace-disallow-copy-and-assign-macro +================================================ + +Finds macro expansions of ``DISALLOW_COPY_AND_ASSIGN(Type)`` and replaces them +with a deleted copy constructor and a deleted assignment operator. + +Before the ``delete`` keyword was introduced in C++11 it was common practice to +declare a copy constructor and an assignment operator as a private members. This +effectively makes them unusable to the public API of a class. + +With the advent of the ``delete`` keyword in C++11 we can abandon the +``private`` access of the copy constructor and the assignment operator and +delete the methods entirely. + +When running this check on a code like this: + +.. code-block:: c++ + + class Foo { + private: + DISALLOW_COPY_AND_ASSIGN(Foo); + }; + +It will be transformed to this: + +.. code-block:: c++ + + class Foo { + private: + Foo(const Foo &) = delete; + const Foo &operator=(const Foo &) = delete; + }; + +Known Limitations +----------------- + +* Notice that the migration example above leaves the ``private`` access + specification untouched. You might want to run the check:doc:`modernize-use-equals-delete + ` to get warnings for deleted functions in + private sections. + +Options +------- + +.. option:: MacroName + + A string specifying the macro name whose expansion will be replaced. + Default is `DISALLOW_COPY_AND_ASSIGN`. + +See: https://en.cppreference.com/w/cpp/language/function#Deleted_functions diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize-replace-disallow-copy-and-assign-macro.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize-replace-disallow-copy-and-assign-macro.cpp new file mode 100644 index 00000000000000..50b0c57d3b5298 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize-replace-disallow-copy-and-assign-macro.cpp @@ -0,0 +1,79 @@ +// RUN: %check_clang_tidy -format-style=LLVM -check-suffix=DEFAULT %s \ +// RUN: modernize-replace-disallow-copy-and-assign-macro %t + +// RUN: %check_clang_tidy -format-style=LLVM -check-suffix=DIFFERENT-NAME %s \ +// RUN: modernize-replace-disallow-copy-and-assign-macro %t \ +// RUN: -config="{CheckOptions: [ \ +// RUN: {key: modernize-replace-disallow-copy-and-assign-macro.MacroName, \ +// RUN: value: MY_MACRO_NAME}]}" + +// RUN: %check_clang_tidy -format-style=LLVM -check-suffix=FINALIZE %s \ +// RUN: modernize-replace-disallow-copy-and-assign-macro %t \ +// RUN: -config="{CheckOptions: [ \ +// RUN: {key: modernize-replace-disallow-copy-and-assign-macro.MacroName, \ +// RUN: value: DISALLOW_COPY_AND_ASSIGN_FINALIZE}]}" + +// RUN: clang-tidy %s -checks="-*,modernize-replace-disallow-copy-and-assign-macro" \ +// RUN: -config="{CheckOptions: [ \ +// RUN: {key: modernize-replace-disallow-copy-and-assign-macro.MacroName, \ +// RUN: value: DISALLOW_COPY_AND_ASSIGN_MORE_AGUMENTS}]}" | count 0 + +// RUN: clang-tidy %s -checks="-*,modernize-replace-disallow-copy-and-assign-macro" \ +// RUN: -config="{CheckOptions: [ \ +// RUN: {key: modernize-replace-disallow-copy-and-assign-macro.MacroName, \ +// RUN: value: DISALLOW_COPY_AND_ASSIGN_NEEDS_PREEXPANSION}]}" | count 0 + +// Note: the last two tests expect no diagnostics, but FileCheck cannot handle +// that, hence the use of | count 0. + +#define DISALLOW_COPY_AND_ASSIGN(TypeName) + +class TestClass1 { +private: + DISALLOW_COPY_AND_ASSIGN(TestClass1); +}; +// CHECK-MESSAGES-DEFAULT: :[[@LINE-2]]:3: warning: prefer deleting copy constructor and assignment operator over using macro 'DISALLOW_COPY_AND_ASSIGN' [modernize-replace-disallow-copy-and-assign-macro] +// CHECK-FIXES-DEFAULT: {{^}} TestClass1(const TestClass1 &) = delete;{{$}} +// CHECK-FIXES-DEFAULT-NEXT: {{^}} const TestClass1 &operator=(const TestClass1 &) = delete;{{$}} + +#define MY_MACRO_NAME(TypeName) + +class TestClass2 { +private: + MY_MACRO_NAME(TestClass2); +}; +// CHECK-MESSAGES-DIFFERENT-NAME: :[[@LINE-2]]:3: warning: prefer deleting copy constructor and assignment operator over using macro 'MY_MACRO_NAME' [modernize-replace-disallow-copy-and-assign-macro] +// CHECK-FIXES-DIFFERENT-NAME: {{^}} TestClass2(const TestClass2 &) = delete;{{$}} +// CHECK-FIXES-DIFFERENT-NAME-NEXT: {{^}} const TestClass2 &operator=(const TestClass2 &) = delete;{{$}} + +#define DISALLOW_COPY_AND_ASSIGN_FINALIZE(TypeName) \ + TypeName(const TypeName &) = delete; \ + const TypeName &operator=(const TypeName &) = delete; + +class TestClass3 { +private: + // Notice, that the macro allows to be used without a semicolon because the + // macro definition already contains one above. Therefore our replacement must + // contain a semicolon at the end. + DISALLOW_COPY_AND_ASSIGN_FINALIZE(TestClass3) +}; +// CHECK-MESSAGES-FINALIZE: :[[@LINE-2]]:3: warning: prefer deleting copy constructor and assignment operator over using macro 'DISALLOW_COPY_AND_ASSIGN_FINALIZE' [modernize-replace-disallow-copy-and-assign-macro] +// CHECK-FIXES-FINALIZE: {{^}} TestClass3(const TestClass3 &) = delete;{{$}} +// CHECK-FIXES-FINALIZE-NEXT: {{^}} const TestClass3 &operator=(const TestClass3 &) = delete;{{$}} + +#define DISALLOW_COPY_AND_ASSIGN_MORE_AGUMENTS(A, B) + +class TestClass4 { +private: + DISALLOW_COPY_AND_ASSIGN_MORE_AGUMENTS(TestClass4, TestClass4); +}; +// CHECK-MESSAGES-MORE-ARGUMENTS-NOT: warning: prefer deleting copy constructor and assignment operator over using macro 'DISALLOW_COPY_AND_ASSIGN_MORE_AGUMENTS' + +#define DISALLOW_COPY_AND_ASSIGN_NEEDS_PREEXPANSION(A) +#define TESTCLASS TestClass5 + +class TestClass5 { +private: + DISALLOW_COPY_AND_ASSIGN_NEEDS_PREEXPANSION(TESTCLASS); +}; +// CHECK-MESSAGES-MORE-ARGUMENTS-NOT: warning: prefer deleting copy constructor and assignment operator over using macro 'DISALLOW_COPY_AND_ASSIGN_NEEDS_PREEXPANSION' diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index 4fde6034975e18..1ebcab1651d056 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -3051,6 +3051,12 @@ WebAssembly .. option:: -munimplemented-simd128, -mno-unimplemented-simd128 +.. option:: -mexec-model= + +Select between "command" and "reactor" executable models. Commands have a main +function which scopes the lifetime of the program. Reactors are activated and +remain active until explicitly terminated. + X86 --- .. option:: -m3dnow, -mno-3dnow diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 185ba2f4b4c1df..6c39f6aab1b9c1 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -4560,7 +4560,7 @@ inline bool IsEnumDeclScoped(EnumDecl *ED) { /// The new name looks likes this: /// + OpenMPVariantManglingSeparatorStr + static constexpr StringRef getOpenMPVariantManglingSeparatorStr() { - return ".ompvariant"; + return "$ompvariant"; } } // namespace clang diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 05a9121592efdc..add71fb7dfd752 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -171,6 +171,10 @@ def m_ppc_Features_Group : OptionGroup<"">, Group, DocName<"PowerPC">; def m_wasm_Features_Group : OptionGroup<"">, Group, DocName<"WebAssembly">; +// The features added by this group will not be added to target features. +// These are explicitly handled. +def m_wasm_Features_Driver_Group : OptionGroup<"">, + Group, DocName<"WebAssembly Driver">; def m_x86_Features_Group : OptionGroup<"">, Group, Flags<[CoreOption]>, DocName<"X86">; def m_riscv_Features_Group : OptionGroup<"">, @@ -2456,6 +2460,9 @@ def mtail_call : Flag<["-"], "mtail-call">, Group; def mno_tail_call : Flag<["-"], "mno-tail-call">, Group; def mreference_types : Flag<["-"], "mreference-types">, Group; def mno_reference_types : Flag<["-"], "mno-reference-types">, Group; +def mexec_model_EQ : Joined<["-"], "mexec-model=">, Group, + Values<"command,reactor">, + HelpText<"Execution model (WebAssembly only)">; def mamdgpu_debugger_abi : Joined<["-"], "mamdgpu-debugger-abi=">, Flags<[HelpHidden]>, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index fa1c80fc6bbf9e..bcbe916820dc7e 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -2167,22 +2167,21 @@ std::string OMPTraitInfo::getMangledName() const { std::string MangledName; llvm::raw_string_ostream OS(MangledName); for (const OMPTraitSet &Set : Sets) { - OS << '.' << 'S' << unsigned(Set.Kind); + OS << '$' << 'S' << unsigned(Set.Kind); for (const OMPTraitSelector &Selector : Set.Selectors) { bool AllowsTraitScore = false; bool RequiresProperty = false; isValidTraitSelectorForTraitSet( Selector.Kind, Set.Kind, AllowsTraitScore, RequiresProperty); - OS << '.' << 's' << unsigned(Selector.Kind); + OS << '$' << 's' << unsigned(Selector.Kind); if (!RequiresProperty || Selector.Kind == TraitSelector::user_condition) continue; for (const OMPTraitProperty &Property : Selector.Properties) - OS << '.' << 'P' - << getOpenMPContextTraitPropertyName(Property.Kind); + OS << '$' << 'P' << getOpenMPContextTraitPropertyName(Property.Kind); } } return OS.str(); @@ -2191,7 +2190,7 @@ std::string OMPTraitInfo::getMangledName() const { OMPTraitInfo::OMPTraitInfo(StringRef MangledName) { unsigned long U; do { - if (!MangledName.consume_front(".S")) + if (!MangledName.consume_front("$S")) break; if (MangledName.consumeInteger(10, U)) break; @@ -2199,7 +2198,7 @@ OMPTraitInfo::OMPTraitInfo(StringRef MangledName) { OMPTraitSet &Set = Sets.back(); Set.Kind = TraitSet(U); do { - if (!MangledName.consume_front(".s")) + if (!MangledName.consume_front("$s")) break; if (MangledName.consumeInteger(10, U)) break; @@ -2207,11 +2206,11 @@ OMPTraitInfo::OMPTraitInfo(StringRef MangledName) { OMPTraitSelector &Selector = Set.Selectors.back(); Selector.Kind = TraitSelector(U); do { - if (!MangledName.consume_front(".P")) + if (!MangledName.consume_front("$P")) break; Selector.Properties.push_back(OMPTraitProperty()); OMPTraitProperty &Property = Selector.Properties.back(); - std::pair PropRestPair = MangledName.split('.'); + std::pair PropRestPair = MangledName.split('$'); Property.Kind = getOpenMPContextTraitPropertyKind(Set.Kind, PropRestPair.first); MangledName = PropRestPair.second; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index e47cb178792be6..b87490a6a85898 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -1605,8 +1605,7 @@ void X86TargetInfo::getCPUSpecificCPUDispatchFeatures( bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const { return llvm::StringSwitch(FeatureStr) #define X86_VENDOR(ENUM, STRING) .Case(STRING, true) -#define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) \ - .Cases(STR, ALIAS, true) +#define X86_CPU_TYPE_COMPAT_ALIAS(ENUM, ALIAS) .Case(ALIAS, true) #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) .Case(STR, true) #define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) .Case(STR, true) #include "llvm/Support/X86TargetParser.def" diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 948d31312bd81c..a73245ad829c43 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -11375,8 +11375,8 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { std::tie(Index, Value) = StringSwitch>(CPUStr) #define X86_VENDOR(ENUM, STRING) \ .Case(STRING, {0u, static_cast(llvm::X86::ENUM)}) -#define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) \ - .Cases(STR, ALIAS, {1u, static_cast(llvm::X86::ENUM)}) +#define X86_CPU_TYPE_COMPAT_ALIAS(ENUM, ALIAS) \ + .Case(ALIAS, {1u, static_cast(llvm::X86::ENUM)}) #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) \ .Case(STR, {1u, static_cast(llvm::X86::ENUM)}) #define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) \ diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index 461ec75d17f693..48f9a9b603dbe7 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -69,8 +69,26 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_u); ToolChain.AddFilePathLibArgs(Args, CmdArgs); + const char *Crt1 = "crt1.o"; + const char *Entry = NULL; + if (const Arg *A = Args.getLastArg(options::OPT_mexec_model_EQ)) { + StringRef CM = A->getValue(); + if (CM == "command") { + // Use default values. + } else if (CM == "reactor") { + Crt1 = "crt1-reactor.o"; + Entry = "_initialize"; + } else { + ToolChain.getDriver().Diag(diag::err_drv_invalid_argument_to_option) + << CM << A->getOption().getName(); + } + } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt1.o"))); + CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(Crt1))); + if (Entry) { + CmdArgs.push_back(Args.MakeArgString("--entry")); + CmdArgs.push_back(Args.MakeArgString(Entry)); + } AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); diff --git a/clang/test/Driver/wasm-toolchain.c b/clang/test/Driver/wasm-toolchain.c index 343c1b001ef8f0..8300a81614e321 100644 --- a/clang/test/Driver/wasm-toolchain.c +++ b/clang/test/Driver/wasm-toolchain.c @@ -107,3 +107,14 @@ // RUN: %clang %s -### -fsanitize=address -target wasm32-unknown-emscripten 2>&1 | FileCheck -check-prefix=CHECK-ASAN-EMSCRIPTEN %s // CHECK-ASAN-EMSCRIPTEN: "-fsanitize=address" // CHECK-ASAN-EMSCRIPTEN: "-fsanitize-address-globals-dead-stripping" + +// Basic exec-model tests. + +// RUN: %clang %s -### -no-canonical-prefixes -target wasm32-unknown-unknown -mexec-model=command 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-COMMAND %s +// CHECK-COMMAND: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" +// CHECK-COMMAND: wasm-ld{{.*}}" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" + +// RUN: %clang %s -### -no-canonical-prefixes -target wasm32-unknown-unknown -mexec-model=reactor 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-REACTOR %s +// CHECK-REACTOR: wasm-ld{{.*}}" {{.*}} "--entry" "_initialize" {{.*}} diff --git a/clang/test/OpenMP/nvptx_declare_variant_name_mangling.cpp b/clang/test/OpenMP/nvptx_declare_variant_name_mangling.cpp new file mode 100644 index 00000000000000..6a9ce799d01e42 --- /dev/null +++ b/clang/test/OpenMP/nvptx_declare_variant_name_mangling.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=50 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-version=50 | FileCheck %s --implicit-check-not='call i32 {@_Z3bazv|@_Z3barv}' +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t -fopenmp-version=50 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - -fopenmp-version=50 | FileCheck %s --implicit-check-not='call i32 {@_Z3bazv|@_Z3barv}' +// expected-no-diagnostics + +// CHECK-DAG: @_Z3barv +// CHECK-DAG: @_Z3bazv +// CHECK-DAG: @"_Z54bar$ompvariant$S2$s8$Pnvptx$Pnvptx64$S3$s10$Pmatch_anyv" +// CHECK-DAG: @"_Z54baz$ompvariant$S2$s8$Pnvptx$Pnvptx64$S3$s10$Pmatch_anyv" +// CHECK-DAG: call i32 @"_Z54bar$ompvariant$S2$s8$Pnvptx$Pnvptx64$S3$s10$Pmatch_anyv"() +// CHECK-DAG: call i32 @"_Z54baz$ompvariant$S2$s8$Pnvptx$Pnvptx64$S3$s10$Pmatch_anyv"() + +#ifndef HEADER +#define HEADER + +#pragma omp declare target + +int bar() { return 1; } + +int baz() { return 5; } + +#pragma omp begin declare variant match(device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) + +int bar() { return 2; } + +int baz() { return 6; } + +#pragma omp end declare variant + +#pragma omp end declare target + +int main() { + int res; +#pragma omp target map(from \ + : res) + res = bar() + baz(); + return res; +} + +#endif \ No newline at end of file diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp index 7550545ea6fa2f..9a0053a59a644e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -388,7 +388,7 @@ void GetThreadStackTopAndBottom(bool at_initialization, uptr *stack_top, // pthread_get_stacksize_np() returns an incorrect stack size for the main // thread on Mavericks. See // https://github.com/google/sanitizers/issues/261 - if ((GetMacosVersion() >= MACOS_VERSION_MAVERICKS) && at_initialization && + if ((GetMacosAlignedVersion() >= MacosVersion(10, 9)) && at_initialization && stacksize == (1 << 19)) { struct rlimit rl; CHECK_EQ(getrlimit(RLIMIT_STACK, &rl), 0); @@ -607,53 +607,27 @@ HandleSignalMode GetHandleSignalMode(int signum) { return result; } -MacosVersion cached_macos_version = MACOS_VERSION_UNINITIALIZED; - -MacosVersion GetMacosVersionInternal() { - int mib[2] = { CTL_KERN, KERN_OSRELEASE }; - char version[100]; - uptr len = 0, maxlen = sizeof(version) / sizeof(version[0]); - for (uptr i = 0; i < maxlen; i++) version[i] = '\0'; - // Get the version length. - CHECK_NE(internal_sysctl(mib, 2, 0, &len, 0, 0), -1); - CHECK_LT(len, maxlen); - CHECK_NE(internal_sysctl(mib, 2, version, &len, 0, 0), -1); - - // Expect .(.) - CHECK_GE(len, 3); - const char *p = version; - int major = internal_simple_strtoll(p, &p, /*base=*/10); - if (*p != '.') return MACOS_VERSION_UNKNOWN; - p += 1; - int minor = internal_simple_strtoll(p, &p, /*base=*/10); - if (*p != '.') return MACOS_VERSION_UNKNOWN; - - switch (major) { - case 11: return MACOS_VERSION_LION; - case 12: return MACOS_VERSION_MOUNTAIN_LION; - case 13: return MACOS_VERSION_MAVERICKS; - case 14: return MACOS_VERSION_YOSEMITE; - case 15: return MACOS_VERSION_EL_CAPITAN; - case 16: return MACOS_VERSION_SIERRA; - case 17: return MACOS_VERSION_HIGH_SIERRA; - case 18: return MACOS_VERSION_MOJAVE; - case 19: return MACOS_VERSION_CATALINA; - default: - if (major < 9) return MACOS_VERSION_UNKNOWN; - return MACOS_VERSION_UNKNOWN_NEWER; - } +static MacosVersion GetMacosAlignedVersionInternal() { + u16 kernel_major = GetDarwinKernelVersion().major; + const u16 version_offset = 4; + CHECK_GE(kernel_major, version_offset); + u16 macos_major = kernel_major - version_offset; + return MacosVersion(10, macos_major); } -MacosVersion GetMacosVersion() { - atomic_uint32_t *cache = - reinterpret_cast(&cached_macos_version); - MacosVersion result = - static_cast(atomic_load(cache, memory_order_acquire)); - if (result == MACOS_VERSION_UNINITIALIZED) { - result = GetMacosVersionInternal(); - atomic_store(cache, result, memory_order_release); +static_assert(sizeof(MacosVersion) == sizeof(atomic_uint32_t::Type), + "MacosVersion cache size"); +static atomic_uint32_t cached_macos_version; + +MacosVersion GetMacosAlignedVersion() { + atomic_uint32_t::Type result = + atomic_load(&cached_macos_version, memory_order_acquire); + if (!result) { + MacosVersion version = GetMacosAlignedVersionInternal(); + result = *reinterpret_cast(&version); + atomic_store(&cached_macos_version, result, memory_order_release); } - return result; + return *reinterpret_cast(&result); } DarwinKernelVersion GetDarwinKernelVersion() { @@ -719,7 +693,7 @@ void LogFullErrorReport(const char *buffer) { #if !SANITIZER_GO // Log with os_trace. This will make it into the crash log. #if SANITIZER_OS_TRACE - if (GetMacosVersion() >= MACOS_VERSION_YOSEMITE) { + if (GetMacosAlignedVersion() >= MacosVersion(10, 10)) { // os_trace requires the message (format parameter) to be a string literal. if (internal_strncmp(SanitizerToolName, "AddressSanitizer", sizeof("AddressSanitizer") - 1) == 0) @@ -866,7 +840,7 @@ bool DyldNeedsEnvVariable() { if (!&dyldVersionNumber) return true; // If running on OS X 10.11+ or iOS 9.0+, dyld will interpose even if // DYLD_INSERT_LIBRARIES is not set. However, checking OS version via - // GetMacosVersion() doesn't work for the simulator. Let's instead check + // GetMacosAlignedVersion() doesn't work for the simulator. Let's instead check // `dyldVersionNumber`, which is exported by dyld, against a known version // number from the first OS release where this appeared. return dyldVersionNumber < kMinDyldVersionWithAutoInterposition; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.h b/compiler-rt/lib/sanitizer_common/sanitizer_mac.h index 34dc2c05dcf4b7..806aba955288cc 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.h @@ -30,37 +30,33 @@ struct MemoryMappingLayoutData { bool current_instrumented; }; -enum MacosVersion { - MACOS_VERSION_UNINITIALIZED = 0, - MACOS_VERSION_UNKNOWN, - MACOS_VERSION_LION, // macOS 10.7; oldest currently supported - MACOS_VERSION_MOUNTAIN_LION, - MACOS_VERSION_MAVERICKS, - MACOS_VERSION_YOSEMITE, - MACOS_VERSION_EL_CAPITAN, - MACOS_VERSION_SIERRA, - MACOS_VERSION_HIGH_SIERRA, - MACOS_VERSION_MOJAVE, - MACOS_VERSION_CATALINA, - MACOS_VERSION_UNKNOWN_NEWER -}; - -struct DarwinKernelVersion { +template +struct VersionBase { u16 major; u16 minor; - DarwinKernelVersion(u16 major, u16 minor) : major(major), minor(minor) {} + VersionBase(u16 major, u16 minor) : major(major), minor(minor) {} - bool operator==(const DarwinKernelVersion &other) const { + bool operator==(const VersionType &other) const { return major == other.major && minor == other.minor; } - bool operator>=(const DarwinKernelVersion &other) const { + bool operator>=(const VersionType &other) const { return major >= other.major || (major == other.major && minor >= other.minor); } }; -MacosVersion GetMacosVersion(); +struct MacosVersion : VersionBase { + MacosVersion(u16 ten, u16 major) : VersionBase(ten, major) { + CHECK_EQ(ten, 10); + } +}; + +struct DarwinKernelVersion : VersionBase { + DarwinKernelVersion(u16 major, u16 minor) : VersionBase(major, minor) {} +}; + +MacosVersion GetMacosAlignedVersion(); DarwinKernelVersion GetDarwinKernelVersion(); char **GetEnviron(); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp index cc233408d0cebf..29cbf62acd5cce 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp @@ -123,7 +123,7 @@ class AtosSymbolizerProcess : public SymbolizerProcess { argv[i++] = path_to_binary; argv[i++] = "-p"; argv[i++] = &pid_str_[0]; - if (GetMacosVersion() == MACOS_VERSION_MAVERICKS) { + if (GetMacosAlignedVersion() == MacosVersion(10, 9)) { // On Mavericks atos prints a deprecation warning which we suppress by // passing -d. The warning isn't present on other OSX versions, even the // newer ones. diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp index f92ecc5e40f6dc..eea52a34e97f4b 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp @@ -258,7 +258,7 @@ void InitializePlatform() { pthread_introspection_hook_install(&my_pthread_introspection_hook); #endif - if (GetMacosVersion() >= MACOS_VERSION_MOJAVE) { + if (GetMacosAlignedVersion() >= MacosVersion(10, 14)) { // Libsystem currently uses a process-global key; this might change. const unsigned kTLSLongjmpXorKeySlot = 0x7; longjmp_xor_key = (uptr)pthread_getspecific(kTLSLongjmpXorKeySlot); @@ -267,7 +267,7 @@ void InitializePlatform() { #ifdef __aarch64__ # define LONG_JMP_SP_ENV_SLOT \ - ((GetMacosVersion() >= MACOS_VERSION_MOJAVE) ? 12 : 13) + ((GetMacosAlignedVersion() >= MacosVersion(10, 14)) ? 12 : 13) #else # define LONG_JMP_SP_ENV_SLOT 2 #endif diff --git a/lldb/source/Expression/IRInterpreter.cpp b/lldb/source/Expression/IRInterpreter.cpp index 6f61a4bd26bf07..0af767116b746a 100644 --- a/lldb/source/Expression/IRInterpreter.cpp +++ b/lldb/source/Expression/IRInterpreter.cpp @@ -433,8 +433,6 @@ static const char *unsupported_opcode_error = "Interpreter doesn't handle one of the expression's opcodes"; static const char *unsupported_operand_error = "Interpreter doesn't handle one of the expression's operands"; -// static const char *interpreter_initialization_error = "Interpreter couldn't -// be initialized"; static const char *interpreter_internal_error = "Interpreter encountered an internal error"; static const char *bad_value_error = @@ -444,8 +442,6 @@ static const char *memory_allocation_error = static const char *memory_write_error = "Interpreter couldn't write to memory"; static const char *memory_read_error = "Interpreter couldn't read from memory"; static const char *infinite_loop_error = "Interpreter ran for too many cycles"; -// static const char *bad_result_error = "Result of expression -// is in bad memory"; static const char *too_many_functions_error = "Interpreter doesn't handle modules with multiple function bodies."; diff --git a/lldb/test/API/functionalities/thread/state_after_expression/Makefile b/lldb/test/API/functionalities/thread/state_after_expression/Makefile new file mode 100644 index 00000000000000..d7aace51bc8253 --- /dev/null +++ b/lldb/test/API/functionalities/thread/state_after_expression/Makefile @@ -0,0 +1,6 @@ +CXX_SOURCES := main.cpp +CFLAGS_EXTRAS := -std=c++11 + +ENABLE_THREADS := YES + +include Makefile.rules diff --git a/lldb/test/API/functionalities/thread/state_after_expression/TestStateAfterExpression.py b/lldb/test/API/functionalities/thread/state_after_expression/TestStateAfterExpression.py new file mode 100644 index 00000000000000..082b556dbdce83 --- /dev/null +++ b/lldb/test/API/functionalities/thread/state_after_expression/TestStateAfterExpression.py @@ -0,0 +1,53 @@ +""" +Make sure the stop reason of a thread that did not run +during an expression is not changed by running the expression +""" + + + +import lldb +import lldbsuite.test.lldbutil as lldbutil +from lldbsuite.test.lldbtest import * + + +class TestStopReasonAfterExpression(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def test_thread_state_after_expr(self): + self.build() + self.main_source_file = lldb.SBFileSpec("main.cpp") + self.do_test() + + def do_test(self): + (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(self, + "Set a breakpoint here", self.main_source_file) + + self.assertEqual(bkpt.GetNumLocations(), 2, "Got two locations") + + # So now thread holds the main thread. Continue to hit the + # breakpoint again on the spawned thread: + + threads = lldbutil.continue_to_breakpoint(process, bkpt) + self.assertEqual(len(threads), 1, "Hit the breakpoint the second time") + other_thread = threads[0] + + self.assertNotEqual(thread.GetThreadID(), other_thread.GetThreadID(), + "A different thread") + # Run an expression ONLY on other_thread. Don't let thread run: + options = lldb.SBExpressionOptions() + options.SetTryAllThreads(False) + options.SetStopOthers(True) + + result = thread.frames[0].EvaluateExpression('(int) printf("Hello\\n")', options) + self.assertTrue(result.GetError().Success(), + "Expression failed: '%s'"%(result.GetError().GetCString())) + + stop_reason = other_thread.GetStopReason() + + self.assertEqual(stop_reason, lldb.eStopReasonBreakpoint, + "Still records stopped at breakpoint: %s" + %(lldbutil.stop_reason_to_str(stop_reason))) + self.assertEqual(other_thread.GetStopReasonDataAtIndex(0), 1, + "Still records stopped at right breakpoint") + diff --git a/lldb/test/API/functionalities/thread/state_after_expression/main.cpp b/lldb/test/API/functionalities/thread/state_after_expression/main.cpp new file mode 100644 index 00000000000000..338232ece63269 --- /dev/null +++ b/lldb/test/API/functionalities/thread/state_after_expression/main.cpp @@ -0,0 +1,14 @@ +#include + +void thread_func() { + // Set a breakpoint here +} + +int +main() +{ + // Set a breakpoint here + std::thread stopped_thread(thread_func); + stopped_thread.join(); + return 0; +} diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h index e6cfa7a26f216b..20aa47ef601301 100644 --- a/llvm/include/llvm/IR/MatrixBuilder.h +++ b/llvm/include/llvm/IR/MatrixBuilder.h @@ -49,7 +49,7 @@ template class MatrixBuilder { PointerType *PtrTy = cast(DataPtr->getType()); Type *EltTy = PtrTy->getElementType(); - Type *RetType = VectorType::get(EltTy, Rows * Columns); + auto *RetType = FixedVectorType::get(EltTy, Rows * Columns); Value *Ops[] = {DataPtr, Stride, B.getInt32(Rows), B.getInt32(Columns)}; Type *OverloadedTypes[] = {RetType, PtrTy}; @@ -82,8 +82,8 @@ template class MatrixBuilder { CallInst *CreateMatrixTranspose(Value *Matrix, unsigned Rows, unsigned Columns, const Twine &Name = "") { auto *OpType = cast(Matrix->getType()); - Type *ReturnType = - VectorType::get(OpType->getElementType(), Rows * Columns); + auto *ReturnType = + FixedVectorType::get(OpType->getElementType(), Rows * Columns); Type *OverloadedTypes[] = {ReturnType}; Value *Ops[] = {Matrix, B.getInt32(Rows), B.getInt32(Columns)}; @@ -101,8 +101,8 @@ template class MatrixBuilder { auto *LHSType = cast(LHS->getType()); auto *RHSType = cast(RHS->getType()); - Type *ReturnType = - VectorType::get(LHSType->getElementType(), LHSRows * RHSColumns); + auto *ReturnType = + FixedVectorType::get(LHSType->getElementType(), LHSRows * RHSColumns); Value *Ops[] = {LHS, RHS, B.getInt32(LHSRows), B.getInt32(LHSColumns), B.getInt32(RHSColumns)}; diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def index c826f590b71f3c..aef189a562a5b0 100644 --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -19,12 +19,6 @@ X86_VENDOR(VENDOR_INTEL, "intel") X86_VENDOR(VENDOR_AMD, "amd") #undef X86_VENDOR -// This macro is used to implement CPU types that have an alias. As of now -// there is only ever one alias. -#ifndef X86_CPU_TYPE_COMPAT_WITH_ALIAS -#define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) -#endif - // This macro is used for cpu types present in compiler-rt/libgcc. #ifndef X86_CPU_TYPE_COMPAT #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) X86_CPU_TYPE(ARCHNAME, ENUM) @@ -33,42 +27,54 @@ X86_VENDOR(VENDOR_AMD, "amd") #ifndef X86_CPU_TYPE #define X86_CPU_TYPE(ARCHNAME, ENUM) #endif + +#ifndef X86_CPU_TYPE_COMPAT_ALIAS +#define X86_CPU_TYPE_COMPAT_ALIAS(ENUM, STR) +#endif + // The first part of this list must match what is implemented in libgcc and // compilert-rt. Clang uses this to know how to implement __builtin_cpu_is. -X86_CPU_TYPE_COMPAT_WITH_ALIAS("bonnell", INTEL_BONNELL, "bonnell", "atom") -X86_CPU_TYPE_COMPAT ("core2", INTEL_CORE2, "core2") -X86_CPU_TYPE_COMPAT ("nehalem", INTEL_COREI7, "corei7") -X86_CPU_TYPE_COMPAT_WITH_ALIAS("amdfam10", AMDFAM10H, "amdfam10h", "amdfam10") -X86_CPU_TYPE_COMPAT_WITH_ALIAS("bdver1", AMDFAM15H, "amdfam15h", "amdfam15") -X86_CPU_TYPE_COMPAT_WITH_ALIAS("silvermont", INTEL_SILVERMONT, "silvermont", "slm") -X86_CPU_TYPE_COMPAT ("knl", INTEL_KNL, "knl") -X86_CPU_TYPE_COMPAT ("btver1", AMD_BTVER1, "btver1") -X86_CPU_TYPE_COMPAT ("btver2", AMD_BTVER2, "btver2") -X86_CPU_TYPE_COMPAT ("znver1", AMDFAM17H, "amdfam17h") -X86_CPU_TYPE_COMPAT ("knm", INTEL_KNM, "knm") -X86_CPU_TYPE_COMPAT ("goldmont", INTEL_GOLDMONT, "goldmont") -X86_CPU_TYPE_COMPAT ("goldmont-plus", INTEL_GOLDMONT_PLUS, "goldmont-plus") -X86_CPU_TYPE_COMPAT ("tremont", INTEL_TREMONT, "tremont") +X86_CPU_TYPE_COMPAT("bonnell", INTEL_BONNELL, "bonnell") +X86_CPU_TYPE_COMPAT("core2", INTEL_CORE2, "core2") +X86_CPU_TYPE_COMPAT("nehalem", INTEL_COREI7, "corei7") +X86_CPU_TYPE_COMPAT("amdfam10", AMDFAM10H, "amdfam10h") +X86_CPU_TYPE_COMPAT("bdver1", AMDFAM15H, "amdfam15h") +X86_CPU_TYPE_COMPAT("silvermont", INTEL_SILVERMONT, "silvermont") +X86_CPU_TYPE_COMPAT("knl", INTEL_KNL, "knl") +X86_CPU_TYPE_COMPAT("btver1", AMD_BTVER1, "btver1") +X86_CPU_TYPE_COMPAT("btver2", AMD_BTVER2, "btver2") +X86_CPU_TYPE_COMPAT("znver1", AMDFAM17H, "amdfam17h") +X86_CPU_TYPE_COMPAT("knm", INTEL_KNM, "knm") +X86_CPU_TYPE_COMPAT("goldmont", INTEL_GOLDMONT, "goldmont") +X86_CPU_TYPE_COMPAT("goldmont-plus", INTEL_GOLDMONT_PLUS, "goldmont-plus") +X86_CPU_TYPE_COMPAT("tremont", INTEL_TREMONT, "tremont") // Entries below this are not in libgcc/compiler-rt. -X86_CPU_TYPE ("i386", INTEL_i386) -X86_CPU_TYPE ("i486", INTEL_i486) -X86_CPU_TYPE ("pentium", INTEL_PENTIUM) -X86_CPU_TYPE ("pentium-mmx", INTEL_PENTIUM_MMX) -X86_CPU_TYPE ("pentiumpro", INTEL_PENTIUM_PRO) -X86_CPU_TYPE ("pentium2", INTEL_PENTIUM_II) -X86_CPU_TYPE ("pentium3", INTEL_PENTIUM_III) -X86_CPU_TYPE ("pentium4", INTEL_PENTIUM_IV) -X86_CPU_TYPE ("pentium-m", INTEL_PENTIUM_M) -X86_CPU_TYPE ("yonah", INTEL_CORE_DUO) -X86_CPU_TYPE ("nocona", INTEL_NOCONA) -X86_CPU_TYPE ("prescott", INTEL_PRESCOTT) -X86_CPU_TYPE ("i486", AMD_i486) -X86_CPU_TYPE ("pentium", AMDPENTIUM) -X86_CPU_TYPE ("athlon", AMD_ATHLON) -X86_CPU_TYPE ("athlon-xp", AMD_ATHLON_XP) -X86_CPU_TYPE ("k8", AMD_K8) -X86_CPU_TYPE ("k8-sse3", AMD_K8SSE3) -#undef X86_CPU_TYPE_COMPAT_WITH_ALIAS +X86_CPU_TYPE ("i386", INTEL_i386) +X86_CPU_TYPE ("i486", INTEL_i486) +X86_CPU_TYPE ("pentium", INTEL_PENTIUM) +X86_CPU_TYPE ("pentium-mmx", INTEL_PENTIUM_MMX) +X86_CPU_TYPE ("pentiumpro", INTEL_PENTIUM_PRO) +X86_CPU_TYPE ("pentium2", INTEL_PENTIUM_II) +X86_CPU_TYPE ("pentium3", INTEL_PENTIUM_III) +X86_CPU_TYPE ("pentium4", INTEL_PENTIUM_IV) +X86_CPU_TYPE ("pentium-m", INTEL_PENTIUM_M) +X86_CPU_TYPE ("yonah", INTEL_CORE_DUO) +X86_CPU_TYPE ("nocona", INTEL_NOCONA) +X86_CPU_TYPE ("prescott", INTEL_PRESCOTT) +X86_CPU_TYPE ("i486", AMD_i486) +X86_CPU_TYPE ("pentium", AMDPENTIUM) +X86_CPU_TYPE ("athlon", AMD_ATHLON) +X86_CPU_TYPE ("athlon-xp", AMD_ATHLON_XP) +X86_CPU_TYPE ("k8", AMD_K8) +X86_CPU_TYPE ("k8-sse3", AMD_K8SSE3) + +// Alternate names supported by __builtin_cpu_is and target multiversioning. +X86_CPU_TYPE_COMPAT_ALIAS(INTEL_BONNELL, "atom") +X86_CPU_TYPE_COMPAT_ALIAS(AMDFAM10H, "amdfam10") +X86_CPU_TYPE_COMPAT_ALIAS(AMDFAM15H, "amdfam15") +X86_CPU_TYPE_COMPAT_ALIAS(INTEL_SILVERMONT, "slm") + +#undef X86_CPU_TYPE_COMPAT_ALIAS #undef X86_CPU_TYPE_COMPAT #undef X86_CPU_TYPE diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 46a1311a206c0a..cd65ba840eabc6 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -106,13 +106,8 @@ namespace { /// that it is alive across blocks. BitVector MayLiveAcrossBlocks; - /// State of a physical register. - enum RegState { - /// A disabled register is not available for allocation, but an alias may - /// be in use. A register can only be moved out of the disabled state if - /// all aliases are disabled. - regDisabled, - + /// State of a register unit. + enum RegUnitState { /// A free register is not currently in use and can be allocated /// immediately without checking aliases. regFree, @@ -126,8 +121,8 @@ namespace { /// register. In that case, LiveVirtRegs contains the inverse mapping. }; - /// Maps each physical register to a RegState enum or a virtual register. - std::vector PhysRegState; + /// Maps each physical register to a RegUnitState enum or virtual register. + std::vector RegUnitStates; SmallVector VirtDead; SmallVector Coalesced; @@ -138,6 +133,7 @@ namespace { RegUnitSet UsedInInstr; void setPhysRegState(MCPhysReg PhysReg, unsigned NewState); + bool isPhysRegFree(MCPhysReg PhysReg) const; /// Mark a physreg as used in this instruction. void markRegUsedInInstr(MCPhysReg PhysReg) { @@ -189,6 +185,7 @@ namespace { bool isLastUseOfLocalReg(const MachineOperand &MO) const; void addKillFlag(const LiveReg &LRI); + bool verifyRegStateMapping(const LiveReg &LR) const; void killVirtReg(LiveReg &LR); void killVirtReg(Register VirtReg); void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR); @@ -196,7 +193,7 @@ namespace { void usePhysReg(MachineOperand &MO); void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, - RegState NewState); + unsigned NewState); unsigned calcSpillCost(MCPhysReg PhysReg) const; void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); @@ -229,7 +226,8 @@ namespace { bool mayLiveOut(Register VirtReg); bool mayLiveIn(Register VirtReg); - void dumpState(); + void printRegUnitState(unsigned State) const; + void dumpState() const; }; } // end anonymous namespace @@ -240,7 +238,16 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false, false) void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { - PhysRegState[PhysReg] = NewState; + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) + RegUnitStates[*UI] = NewState; +} + +bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const { + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + if (RegUnitStates[*UI] != regFree) + return false; + } + return true; } /// This allocates space for the specified virtual register to be held on the @@ -384,12 +391,21 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) { } } +bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const { + for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) { + if (RegUnitStates[*UI] != LR.VirtReg) + return false; + } + + return true; +} + /// Mark virtreg as no longer available. void RegAllocFast::killVirtReg(LiveReg &LR) { + assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); addKillFlag(LR); - assert(PhysRegState[LR.PhysReg] == LR.VirtReg && - "Broken RegState mapping"); - setPhysRegState(LR.PhysReg, regFree); + MCPhysReg PhysReg = LR.PhysReg; + setPhysRegState(PhysReg, regFree); LR.PhysReg = 0; } @@ -416,7 +432,9 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, /// Do the actual work of spilling. void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { - assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping"); + assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); + + MCPhysReg PhysReg = LR.PhysReg; if (LR.Dirty) { // If this physreg is used by the instruction, we want to kill it on the @@ -424,7 +442,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI; LR.Dirty = false; - spill(MI, LR.VirtReg, LR.PhysReg, SpillKill); + spill(MI, LR.VirtReg, PhysReg, SpillKill); if (SpillKill) LR.LastUse = nullptr; // Don't kill register again @@ -460,53 +478,16 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { assert(PhysReg.isPhysical() && "Bad usePhysReg operand"); markRegUsedInInstr(PhysReg); - switch (PhysRegState[PhysReg]) { - case regDisabled: - break; - case regReserved: - PhysRegState[PhysReg] = regFree; - LLVM_FALLTHROUGH; - case regFree: - MO.setIsKill(); - return; - default: - // The physreg was allocated to a virtual register. That means the value we - // wanted has been clobbered. - llvm_unreachable("Instruction uses an allocated register"); - } - // Maybe a superregister is reserved? - for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - MCPhysReg Alias = *AI; - switch (PhysRegState[Alias]) { - case regDisabled: - break; + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + switch (RegUnitStates[*UI]) { case regReserved: - // Either PhysReg is a subregister of Alias and we mark the - // whole register as free, or PhysReg is the superregister of - // Alias and we mark all the aliases as disabled before freeing - // PhysReg. - // In the latter case, since PhysReg was disabled, this means that - // its value is defined only by physical sub-registers. This check - // is performed by the assert of the default case in this loop. - // Note: The value of the superregister may only be partial - // defined, that is why regDisabled is a valid state for aliases. - assert((TRI->isSuperRegister(PhysReg, Alias) || - TRI->isSuperRegister(Alias, PhysReg)) && - "Instruction is not using a subregister of a reserved register"); + RegUnitStates[*UI] = regFree; LLVM_FALLTHROUGH; case regFree: - if (TRI->isSuperRegister(PhysReg, Alias)) { - // Leave the superregister in the working set. - setPhysRegState(Alias, regFree); - MO.getParent()->addRegisterKilled(Alias, TRI, true); - return; - } - // Some other alias was in the working set - clear it. - setPhysRegState(Alias, regDisabled); break; default: - llvm_unreachable("Instruction uses an alias of an allocated register"); + llvm_unreachable("Unexpected reg unit state"); } } @@ -519,38 +500,20 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { /// similar to defineVirtReg except the physreg is reserved instead of /// allocated. void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, - MCPhysReg PhysReg, RegState NewState) { - markRegUsedInInstr(PhysReg); - switch (Register VirtReg = PhysRegState[PhysReg]) { - case regDisabled: - break; - default: - spillVirtReg(MI, VirtReg); - LLVM_FALLTHROUGH; - case regFree: - case regReserved: - setPhysRegState(PhysReg, NewState); - return; - } - - // This is a disabled register, disable all aliases. - setPhysRegState(PhysReg, NewState); - for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - MCPhysReg Alias = *AI; - switch (Register VirtReg = PhysRegState[Alias]) { - case regDisabled: - break; + MCPhysReg PhysReg, unsigned NewState) { + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + switch (unsigned VirtReg = RegUnitStates[*UI]) { default: spillVirtReg(MI, VirtReg); - LLVM_FALLTHROUGH; + break; case regFree: case regReserved: - setPhysRegState(Alias, regDisabled); - if (TRI->isSuperRegister(PhysReg, Alias)) - return; break; } } + + markRegUsedInInstr(PhysReg); + setPhysRegState(PhysReg, NewState); } /// Return the cost of spilling clearing out PhysReg and aliases so it is free @@ -563,46 +526,24 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { << " is already used in instr.\n"); return spillImpossible; } - switch (Register VirtReg = PhysRegState[PhysReg]) { - case regDisabled: - break; - case regFree: - return 0; - case regReserved: - LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " - << printReg(PhysReg, TRI) << " is reserved already.\n"); - return spillImpossible; - default: { - LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && - "Missing VirtReg entry"); - return LRI->Dirty ? spillDirty : spillClean; - } - } - // This is a disabled register, add up cost of aliases. - LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n"); - unsigned Cost = 0; - for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - MCPhysReg Alias = *AI; - switch (Register VirtReg = PhysRegState[Alias]) { - case regDisabled: - break; + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + switch (unsigned VirtReg = RegUnitStates[*UI]) { case regFree: - ++Cost; break; case regReserved: + LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " + << printReg(PhysReg, TRI) << " is reserved already.\n"); return spillImpossible; default: { LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && "Missing VirtReg entry"); - Cost += LRI->Dirty ? spillDirty : spillClean; - break; + return LRI->Dirty ? spillDirty : spillClean; } } } - return Cost; + return 0; } /// This method updates local state so that we know that PhysReg is the @@ -909,9 +850,17 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, if (!Reg || !Reg.isPhysical()) continue; markRegUsedInInstr(Reg); - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - if (ThroughRegs.count(PhysRegState[*AI])) - definePhysReg(MI, *AI, regFree); + + for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) { + if (!ThroughRegs.count(RegUnitStates[*UI])) + continue; + + // Need to spill any aliasing registers. + for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) { + for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) { + definePhysReg(MI, *SI, regFree); + } + } } } @@ -975,37 +924,40 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, } #ifndef NDEBUG -void RegAllocFast::dumpState() { - for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { - if (PhysRegState[Reg] == regDisabled) continue; - dbgs() << " " << printReg(Reg, TRI); - switch(PhysRegState[Reg]) { + +void RegAllocFast::dumpState() const { + for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE; + ++Unit) { + switch (unsigned VirtReg = RegUnitStates[Unit]) { case regFree: break; case regReserved: - dbgs() << "*"; + dbgs() << " " << printRegUnit(Unit, TRI) << "[P]"; break; default: { - dbgs() << '=' << printReg(PhysRegState[Reg]); - LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]); - assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && - "Missing VirtReg entry"); - if (LRI->Dirty) - dbgs() << "*"; - assert(LRI->PhysReg == Reg && "Bad inverse map"); + dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg); + LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); + assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry"); + if (I->Dirty) + dbgs() << "[D]"; + assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present"); break; } } } dbgs() << '\n'; // Check that LiveVirtRegs is the inverse. - for (LiveRegMap::iterator i = LiveVirtRegs.begin(), - e = LiveVirtRegs.end(); i != e; ++i) { - if (!i->PhysReg) - continue; - assert(i->VirtReg.isVirtual() && "Bad map key"); - assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value"); - assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); + for (const LiveReg &LR : LiveVirtRegs) { + Register VirtReg = LR.VirtReg; + assert(VirtReg.isVirtual() && "Bad map key"); + MCPhysReg PhysReg = LR.PhysReg; + if (PhysReg != 0) { + assert(Register::isPhysicalRegister(PhysReg) && + "mapped to physreg"); + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + assert(RegUnitStates[*UI] == VirtReg && "inverse map valid"); + } + } } } #endif @@ -1247,7 +1199,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { this->MBB = &MBB; LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); - PhysRegState.assign(TRI->getNumRegs(), regDisabled); + RegUnitStates.assign(TRI->getNumRegUnits(), regFree); assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); MachineBasicBlock::iterator MII = MBB.begin(); diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index d7a4f44fef600c..4beb9b642ef3cb 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -42,7 +42,7 @@ static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID, // Check whether this is an old version of the function, which received // v4f32 arguments. Type *Arg0Type = F->getFunctionType()->getParamType(0); - if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) + if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4)) return false; // Yes, it's old, replace it with new version. @@ -903,7 +903,7 @@ static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, unsigned NumElts = ResultTy->getNumElements() * 8; // Bitcast from a 64-bit element type to a byte element type. - Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts); + Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts); Op = Builder.CreateBitCast(Op, VecTy, "cast"); // We'll be shuffling in zeroes. @@ -937,7 +937,7 @@ static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned NumElts = ResultTy->getNumElements() * 8; // Bitcast from a 64-bit element type to a byte element type. - Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts); + Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts); Op = Builder.CreateBitCast(Op, VecTy, "cast"); // We'll be shuffling in zeroes. @@ -965,8 +965,8 @@ static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts) { - llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), - cast(Mask->getType())->getBitWidth()); + llvm::VectorType *MaskTy = FixedVectorType::get( + Builder.getInt1Ty(), cast(Mask->getType())->getBitWidth()); Mask = Builder.CreateBitCast(Mask, MaskTy); // If we have less than 8 elements, then the starting mask was an i8 and @@ -1002,9 +1002,8 @@ static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, if (C->isAllOnesValue()) return Op0; - llvm::VectorType *MaskTy = - llvm::VectorType::get(Builder.getInt1Ty(), - Mask->getType()->getIntegerBitWidth()); + auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), + Mask->getType()->getIntegerBitWidth()); Mask = Builder.CreateBitCast(Mask, MaskTy); Mask = Builder.CreateExtractElement(Mask, (uint64_t)0); return Builder.CreateSelect(Mask, Op0, Op1); @@ -1371,9 +1370,11 @@ static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, Value *Cmp; if (CC == 3) { - Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); + Cmp = Constant::getNullValue( + FixedVectorType::get(Builder.getInt1Ty(), NumElts)); } else if (CC == 7) { - Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); + Cmp = Constant::getAllOnesValue( + FixedVectorType::get(Builder.getInt1Ty(), NumElts)); } else { ICmpInst::Predicate Pred; switch (CC) { @@ -1756,7 +1757,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Value *Arg0 = CI->getArgOperand(0); Value *Arg1 = CI->getArgOperand(1); - Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); + auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2); Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0); Value *BC = Builder.CreateBitCast(Arg0, @@ -2161,7 +2162,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef{0, 1, 2, 3}); } Rep = Builder.CreateBitCast( - Rep, VectorType::get(Type::getHalfTy(C), NumDstElts)); + Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts)); Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps"); if (CI->getNumArgOperands() >= 3) Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, @@ -2335,7 +2336,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. Type *EltTy = cast(CI->getType())->getElementType(); unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); - Type *VT = VectorType::get(EltTy, NumSrcElts); + auto *VT = FixedVectorType::get(EltTy, NumSrcElts); Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), PointerType::getUnqual(VT)); Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1)); @@ -3658,13 +3659,13 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // So, the only thing required is a bitcast for both arguments. // First, check the arguments have the old type. Value *Arg0 = CI->getArgOperand(0); - if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) + if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4)) return; // Old intrinsic, add bitcasts Value *Arg1 = CI->getArgOperand(1); - Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); + auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2); Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index d090eaac3fb5e9..3fb49e94870fcc 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -881,7 +881,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, // Undefined shuffle mask -> undefined value. if (all_of(Mask, [](int Elt) { return Elt == UndefMaskElem; })) { - return UndefValue::get(VectorType::get(EltTy, MaskNumElts)); + return UndefValue::get(FixedVectorType::get(EltTy, MaskNumElts)); } // If the mask is all zeros this is a splat, no need to go through all @@ -2287,13 +2287,13 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C, Type *OrigGEPTy = PointerType::get(Ty, PtrTy->getAddressSpace()); Type *GEPTy = PointerType::get(Ty, PtrTy->getAddressSpace()); if (VectorType *VT = dyn_cast(C->getType())) - GEPTy = VectorType::get(OrigGEPTy, VT->getNumElements()); + GEPTy = FixedVectorType::get(OrigGEPTy, VT->getNumElements()); // The GEP returns a vector of pointers when one of more of // its arguments is a vector. for (unsigned i = 0, e = Idxs.size(); i != e; ++i) { if (auto *VT = dyn_cast(Idxs[i]->getType())) { - GEPTy = VectorType::get(OrigGEPTy, VT->getNumElements()); + GEPTy = FixedVectorType::get(OrigGEPTy, VT->getNumElements()); break; } } @@ -2528,7 +2528,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C, // overflow trouble. Type *ExtendedTy = Type::getIntNTy(Div->getContext(), CommonExtendedWidth); if (UseVector) - ExtendedTy = VectorType::get( + ExtendedTy = FixedVectorType::get( ExtendedTy, IsPrevIdxVector ? cast(PrevIdx->getType())->getNumElements() diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index 88971d89bf4c79..daa15bb2a9b053 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -1187,13 +1187,13 @@ ConstantVector::ConstantVector(VectorType *T, ArrayRef V) Constant *ConstantVector::get(ArrayRef V) { if (Constant *C = getImpl(V)) return C; - VectorType *Ty = VectorType::get(V.front()->getType(), V.size()); + auto *Ty = FixedVectorType::get(V.front()->getType(), V.size()); return Ty->getContext().pImpl->VectorConstants.getOrCreate(Ty, V); } Constant *ConstantVector::getImpl(ArrayRef V) { assert(!V.empty() && "Vectors can't be empty"); - VectorType *T = VectorType::get(V.front()->getType(), V.size()); + auto *T = FixedVectorType::get(V.front()->getType(), V.size()); // If this is an all-undef or all-zero vector, return a // ConstantAggregateZero or UndefValue. @@ -1960,7 +1960,7 @@ Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy, Type *MidTy = PointerType::get(DstElemTy, SrcScalarTy->getAddressSpace()); if (VectorType *VT = dyn_cast(DstTy)) { // Handle vectors of pointers. - MidTy = VectorType::get(MidTy, VT->getNumElements()); + MidTy = FixedVectorType::get(MidTy, VT->getNumElements()); } C = getBitCast(C, MidTy); } @@ -2742,32 +2742,32 @@ Constant *ConstantDataArray::getString(LLVMContext &Context, /// count and element type matching the ArrayRef passed in. Note that this /// can return a ConstantAggregateZero object. Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ - Type *Ty = VectorType::get(Type::getInt8Ty(Context), Elts.size()); + auto *Ty = FixedVectorType::get(Type::getInt8Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); return getImpl(StringRef(Data, Elts.size() * 1), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ - Type *Ty = VectorType::get(Type::getInt16Ty(Context), Elts.size()); + auto *Ty = FixedVectorType::get(Type::getInt16Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); return getImpl(StringRef(Data, Elts.size() * 2), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ - Type *Ty = VectorType::get(Type::getInt32Ty(Context), Elts.size()); + auto *Ty = FixedVectorType::get(Type::getInt32Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ - Type *Ty = VectorType::get(Type::getInt64Ty(Context), Elts.size()); + auto *Ty = FixedVectorType::get(Type::getInt64Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); return getImpl(StringRef(Data, Elts.size() * 8), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts) { - Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size()); + auto *Ty = FixedVectorType::get(Type::getFloatTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts) { - Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size()); + auto *Ty = FixedVectorType::get(Type::getDoubleTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); return getImpl(StringRef(Data, Elts.size() * 8), Ty); } @@ -2782,14 +2782,14 @@ Constant *ConstantDataVector::getFP(Type *ElementType, ArrayRef Elts) { assert((ElementType->isHalfTy() || ElementType->isBFloatTy()) && "Element type is not a 16-bit float type"); - Type *Ty = VectorType::get(ElementType, Elts.size()); + auto *Ty = FixedVectorType::get(ElementType, Elts.size()); const char *Data = reinterpret_cast(Elts.data()); return getImpl(StringRef(Data, Elts.size() * 2), Ty); } Constant *ConstantDataVector::getFP(Type *ElementType, ArrayRef Elts) { assert(ElementType->isFloatTy() && "Element type is not a 32-bit float type"); - Type *Ty = VectorType::get(ElementType, Elts.size()); + auto *Ty = FixedVectorType::get(ElementType, Elts.size()); const char *Data = reinterpret_cast(Elts.data()); return getImpl(StringRef(Data, Elts.size() * 4), Ty); } @@ -2797,7 +2797,7 @@ Constant *ConstantDataVector::getFP(Type *ElementType, ArrayRef Elts) { assert(ElementType->isDoubleTy() && "Element type is not a 64-bit float type"); - Type *Ty = VectorType::get(ElementType, Elts.size()); + auto *Ty = FixedVectorType::get(ElementType, Elts.size()); const char *Data = reinterpret_cast(Elts.data()); return getImpl(StringRef(Data, Elts.size() * 8), Ty); } diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index dbaf62251ce35a..52755f820b662c 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -756,7 +756,7 @@ LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace) { } LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount) { - return wrap(VectorType::get(unwrap(ElementType), ElementCount)); + return wrap(FixedVectorType::get(unwrap(ElementType), ElementCount)); } LLVMTypeRef LLVMGetElementType(LLVMTypeRef WrappedTy) { diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index 87563d988ed79a..0fe3199b9b49c5 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -792,7 +792,7 @@ Type *DataLayout::getIntPtrType(Type *Ty) const { unsigned NumBits = getPointerTypeSizeInBits(Ty); IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits); if (VectorType *VecTy = dyn_cast(Ty)) - return VectorType::get(IntTy, VecTy->getNumElements()); + return FixedVectorType::get(IntTy, VecTy->getNumElements()); return IntTy; } @@ -814,7 +814,7 @@ Type *DataLayout::getIndexType(Type *Ty) const { unsigned NumBits = getIndexTypeSizeInBits(Ty); IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits); if (VectorType *VecTy = dyn_cast(Ty)) - return VectorType::get(IntTy, VecTy->getNumElements()); + return FixedVectorType::get(IntTy, VecTy->getNumElements()); return IntTy; } diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index ef75d5c732163b..883f00fb75c658 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -525,11 +525,11 @@ CallInst *IRBuilderBase::CreateMaskedGather(Value *Ptrs, Align Alignment, auto PtrsTy = cast(Ptrs->getType()); auto PtrTy = cast(PtrsTy->getElementType()); unsigned NumElts = PtrsTy->getNumElements(); - Type *DataTy = VectorType::get(PtrTy->getElementType(), NumElts); + auto *DataTy = FixedVectorType::get(PtrTy->getElementType(), NumElts); if (!Mask) - Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context), - NumElts)); + Mask = Constant::getAllOnesValue( + FixedVectorType::get(Type::getInt1Ty(Context), NumElts)); if (!PassThru) PassThru = UndefValue::get(DataTy); @@ -564,8 +564,8 @@ CallInst *IRBuilderBase::CreateMaskedScatter(Value *Data, Value *Ptrs, #endif if (!Mask) - Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context), - NumElts)); + Mask = Constant::getAllOnesValue( + FixedVectorType::get(Type::getInt1Ty(Context), NumElts)); Type *OverloadedTypes[] = {DataTy, PtrsTy}; Value *Ops[] = {Data, Ptrs, getInt32(Alignment.value()), Mask}; @@ -994,12 +994,13 @@ Value *IRBuilderBase::CreateVectorSplat(unsigned NumElts, Value *V, // First insert it into an undef vector so we can shuffle it. Type *I32Ty = getInt32Ty(); - Value *Undef = UndefValue::get(VectorType::get(V->getType(), NumElts)); + Value *Undef = UndefValue::get(FixedVectorType::get(V->getType(), NumElts)); V = CreateInsertElement(Undef, V, ConstantInt::get(I32Ty, 0), Name + ".splatinsert"); // Shuffle the value across the desired number of elements. - Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32Ty, NumElts)); + Value *Zeros = + ConstantAggregateZero::get(FixedVectorType::get(I32Ty, NumElts)); return CreateShuffleVector(V, Undef, Zeros, Name + ".splat"); } diff --git a/llvm/lib/Target/WebAssembly/CMakeLists.txt b/llvm/lib/Target/WebAssembly/CMakeLists.txt index 04742424fde594..b730172715d93e 100644 --- a/llvm/lib/Target/WebAssembly/CMakeLists.txt +++ b/llvm/lib/Target/WebAssembly/CMakeLists.txt @@ -24,6 +24,7 @@ add_llvm_target(WebAssemblyCodeGen WebAssemblyExceptionInfo.cpp WebAssemblyExplicitLocals.cpp WebAssemblyFastISel.cpp + WebAssemblyFixBrTableDefaults.cpp WebAssemblyFixIrreducibleControlFlow.cpp WebAssemblyFixFunctionBitcasts.cpp WebAssemblyFrameLowering.cpp diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 79988b0f539315..971b50ee557897 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -459,6 +459,18 @@ inline bool isCallIndirect(unsigned Opc) { } } +inline bool isBrTable(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case WebAssembly::BR_TABLE_I32: + case WebAssembly::BR_TABLE_I32_S: + case WebAssembly::BR_TABLE_I64: + case WebAssembly::BR_TABLE_I64_S: + return true; + default: + return false; + } +} + inline bool isMarker(unsigned Opc) { switch (Opc) { case WebAssembly::BLOCK: diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.h b/llvm/lib/Target/WebAssembly/WebAssembly.h index f36ca7d6e22798..9ce02f7731e087 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.h +++ b/llvm/lib/Target/WebAssembly/WebAssembly.h @@ -44,6 +44,7 @@ FunctionPass *createWebAssemblyOptimizeLiveIntervals(); FunctionPass *createWebAssemblyMemIntrinsicResults(); FunctionPass *createWebAssemblyRegStackify(); FunctionPass *createWebAssemblyRegColoring(); +FunctionPass *createWebAssemblyFixBrTableDefaults(); FunctionPass *createWebAssemblyFixIrreducibleControlFlow(); FunctionPass *createWebAssemblyLateEHPrepare(); FunctionPass *createWebAssemblyCFGSort(); @@ -68,6 +69,7 @@ void initializeWebAssemblyOptimizeLiveIntervalsPass(PassRegistry &); void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &); void initializeWebAssemblyRegStackifyPass(PassRegistry &); void initializeWebAssemblyRegColoringPass(PassRegistry &); +void initializeWebAssemblyFixBrTableDefaultsPass(PassRegistry &); void initializeWebAssemblyFixIrreducibleControlFlowPass(PassRegistry &); void initializeWebAssemblyLateEHPreparePass(PassRegistry &); void initializeWebAssemblyExceptionInfoPass(PassRegistry &); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp new file mode 100644 index 00000000000000..25b37db1c4c8b1 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp @@ -0,0 +1,129 @@ +//=- WebAssemblyFixBrTableDefaults.cpp - Fix br_table default branch targets -// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This file implements a pass that eliminates redundant range checks +/// guarding br_table instructions. Since jump tables on most targets cannot +/// handle out of range indices, LLVM emits these checks before most jump +/// tables. But br_table takes a default branch target as an argument, so it +/// does not need the range checks. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "wasm-fix-br-table-defaults" + +namespace { + +class WebAssemblyFixBrTableDefaults final : public MachineFunctionPass { + StringRef getPassName() const override { + return "WebAssembly Fix br_table Defaults"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyFixBrTableDefaults() : MachineFunctionPass(ID) {} +}; + +char WebAssemblyFixBrTableDefaults::ID = 0; + +// `MI` is a br_table instruction missing its default target argument. This +// function finds and adds the default target argument and removes any redundant +// range check preceding the br_table. +MachineBasicBlock *fixBrTable(MachineInstr &MI, MachineBasicBlock *MBB, + MachineFunction &MF) { + // Get the header block, which contains the redundant range check. + assert(MBB->pred_size() == 1 && "Expected a single guard predecessor"); + auto *HeaderMBB = *MBB->pred_begin(); + + // Find the conditional jump to the default target. If it doesn't exist, the + // default target is unreachable anyway, so we can choose anything. + auto JumpMII = --HeaderMBB->end(); + while (JumpMII->getOpcode() != WebAssembly::BR_IF && + JumpMII != HeaderMBB->begin()) { + --JumpMII; + } + if (JumpMII->getOpcode() == WebAssembly::BR_IF) { + // Install the default target and remove the jumps in the header. + auto *DefaultMBB = JumpMII->getOperand(0).getMBB(); + assert(DefaultMBB != MBB && "Expected conditional jump to default target"); + MI.addOperand(MF, MachineOperand::CreateMBB(DefaultMBB)); + HeaderMBB->erase(JumpMII, HeaderMBB->end()); + } else { + // Arbitrarily choose the first jump target as the default. + auto *SomeMBB = MI.getOperand(1).getMBB(); + MI.addOperand(MachineOperand::CreateMBB(SomeMBB)); + } + + // Splice the jump table into the header. + HeaderMBB->splice(HeaderMBB->end(), MBB, MBB->begin(), MBB->end()); + + // Update CFG to skip the old jump table block. Remove shared successors + // before transferring to avoid duplicated successors. + HeaderMBB->removeSuccessor(MBB); + for (auto &Succ : MBB->successors()) + if (HeaderMBB->isSuccessor(Succ)) + HeaderMBB->removeSuccessor(Succ); + HeaderMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // Remove the old jump table block from the function + MF.erase(MBB); + + return HeaderMBB; +} + +bool WebAssemblyFixBrTableDefaults::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** Fixing br_table Default Targets **********\n" + "********** Function: " + << MF.getName() << '\n'); + + bool Changed = false; + SmallPtrSet MBBSet; + for (auto &MBB : MF) + MBBSet.insert(&MBB); + + while (!MBBSet.empty()) { + MachineBasicBlock *MBB = *MBBSet.begin(); + MBBSet.erase(MBB); + for (auto &MI : *MBB) { + if (WebAssembly::isBrTable(MI)) { + auto *Fixed = fixBrTable(MI, MBB, MF); + MBBSet.erase(Fixed); + Changed = true; + break; + } + } + } + + if (Changed) { + // We rewrote part of the function; recompute relevant things. + MF.RenumberBlocks(); + return true; + } + + return false; +} + +} // end anonymous namespace + +INITIALIZE_PASS(WebAssemblyFixBrTableDefaults, DEBUG_TYPE, + "Removes range checks and sets br_table default targets", false, + false); + +FunctionPass *llvm::createWebAssemblyFixBrTableDefaults() { + return new WebAssemblyFixBrTableDefaults(); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 1b85ecd91a6445..83e4ed737b90f9 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1279,11 +1279,8 @@ SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, for (auto MBB : MBBs) Ops.push_back(DAG.getBasicBlock(MBB)); - // TODO: For now, we just pick something arbitrary for a default case for now. - // We really want to sniff out the guard and put in the real default case (and - // delete the guard). - Ops.push_back(DAG.getBasicBlock(MBBs[0])); - + // Do not add the default case for now. It will be added in + // WebAssemblyFixBrTableDefaults. return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index ca098427980f1a..7bf655c925a45d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -406,6 +406,10 @@ bool WebAssemblyPassConfig::addInstSelector() { // it's inconvenient to collect. Collect it now, and update the immediate // operands. addPass(createWebAssemblySetP2AlignOperands()); + + // Eliminate range checks and add default targets to br_table instructions. + addPass(createWebAssemblyFixBrTableDefaults()); + return false; } diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll index 392af063eb8a02..7c546936ba27a9 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll @@ -4,8 +4,8 @@ define i32 @fptosi_wh(half %a) nounwind ssp { entry: ; CHECK-LABEL: fptosi_wh -; CHECK: fcvt s1, h0 -; CHECK: fcvtzs [[REG:w[0-9]+]], s1 +; CHECK: fcvt s0, h0 +; CHECK: fcvtzs [[REG:w[0-9]+]], s0 ; CHECK: mov w0, [[REG]] %conv = fptosi half %a to i32 ret i32 %conv @@ -15,8 +15,8 @@ entry: define i32 @fptoui_swh(half %a) nounwind ssp { entry: ; CHECK-LABEL: fptoui_swh -; CHECK: fcvt s1, h0 -; CHECK: fcvtzu [[REG:w[0-9]+]], s1 +; CHECK: fcvt s0, h0 +; CHECK: fcvtzu [[REG:w[0-9]+]], s0 ; CHECK: mov w0, [[REG]] %conv = fptoui half %a to i32 ret i32 %conv diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll index ed03aec07e7da8..d8abf14c1366b5 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll @@ -54,8 +54,8 @@ entry: ; CHECK: ldrh w8, [sp, #12] ; CHECK: str w8, [sp, #8] ; CHECK: ldr w8, [sp, #8] -; CHECK: mov x9, x8 -; CHECK: str x9, [sp] +; CHECK: ; kill: def $x8 killed $w8 +; CHECK: str x8, [sp] ; CHECK: ldr x0, [sp] ; CHECK: ret %a.addr = alloca i8, align 1 @@ -109,8 +109,8 @@ entry: ; CHECK: strh w8, [sp, #12] ; CHECK: ldrsh w8, [sp, #12] ; CHECK: str w8, [sp, #8] -; CHECK: ldrsw x9, [sp, #8] -; CHECK: str x9, [sp] +; CHECK: ldrsw x8, [sp, #8] +; CHECK: str x8, [sp] ; CHECK: ldr x0, [sp] ; CHECK: ret %a.addr = alloca i8, align 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll index 6b3e8d747d43d1..e1e889b906c01c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -285,11 +285,11 @@ define i16 @to_half(float %in) { ; FAST: // %bb.0: ; FAST-NEXT: sub sp, sp, #16 // =16 ; FAST-NEXT: .cfi_def_cfa_offset 16 -; FAST-NEXT: fcvt h1, s0 +; FAST-NEXT: fcvt h0, s0 ; FAST-NEXT: // implicit-def: $w0 -; FAST-NEXT: fmov s0, w0 -; FAST-NEXT: mov.16b v0, v1 -; FAST-NEXT: fmov w8, s0 +; FAST-NEXT: fmov s1, w0 +; FAST-NEXT: mov.16b v1, v0 +; FAST-NEXT: fmov w8, s1 ; FAST-NEXT: mov w0, w8 ; FAST-NEXT: str w0, [sp, #12] // 4-byte Folded Spill ; FAST-NEXT: mov w0, w8 diff --git a/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll b/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll index 8d62fb3556661e..22e3ccf2b12095 100644 --- a/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll @@ -15,8 +15,7 @@ ; CHECK-LABEL: foo: ; CHECK: sub ; CHECK-DAG: mov x[[SP:[0-9]+]], sp -; CHECK-DAG: mov [[TMP:w[0-9]+]], #4104 -; CHECK: mov w[[OFFSET:[0-9]+]], [[TMP]] +; CHECK-DAG: mov w[[OFFSET:[0-9]+]], #4104 ; CHECK: strb w0, [x[[SP]], x[[OFFSET]]] define void @foo(i8 %in) { diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll index 40ef3b00da6d44..e26b1c94710490 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -69,15 +69,15 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { ; GCN: renamable $vgpr30 = COPY killed renamable $vgpr14 ; GCN: renamable $vgpr31 = COPY killed renamable $vgpr15 ; GCN: renamable $vgpr32 = COPY killed renamable $vgpr16 - ; GCN: renamable $sgpr20_sgpr21 = S_MOV_B64 $exec + ; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN: renamable $vgpr1 = IMPLICIT_DEF - ; GCN: renamable $sgpr22_sgpr23 = IMPLICIT_DEF + ; GCN: renamable $sgpr2_sgpr3 = IMPLICIT_DEF ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) ; GCN: SI_SPILL_S128_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.1, align 4, addrspace 5) ; GCN: SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5) - ; GCN: SI_SPILL_S64_SAVE killed $sgpr20_sgpr21, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5) + ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5) ; GCN: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) - ; GCN: SI_SPILL_S64_SAVE killed $sgpr22_sgpr23, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) + ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.5, align 4, addrspace 5) @@ -91,8 +91,8 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { ; GCN: renamable $vgpr18 = V_MOV_B32_e32 undef $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0 ; GCN: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode ; GCN: renamable $vgpr19 = COPY renamable $vgpr18 - ; GCN: renamable $sgpr6_sgpr7 = COPY renamable $sgpr4_sgpr5 - ; GCN: SI_SPILL_S64_SAVE killed $sgpr6_sgpr7, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) + ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr4_sgpr5 + ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.6, align 4, addrspace 5) ; GCN: SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll index e991c550c6be03..b119ffd303e08f 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -11,7 +11,7 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, i32 %in) #0 { ; GCN-LABEL: spill_sgprs_to_multiple_vgprs: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dword s2, s[0:1], 0xb +; GCN-NEXT: s_load_dword s0, s[0:1], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND @@ -42,352 +42,354 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[84:91] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 0 -; GCN-NEXT: v_writelane_b32 v0, s5, 1 -; GCN-NEXT: v_writelane_b32 v0, s6, 2 -; GCN-NEXT: v_writelane_b32 v0, s7, 3 -; GCN-NEXT: v_writelane_b32 v0, s8, 4 -; GCN-NEXT: v_writelane_b32 v0, s9, 5 -; GCN-NEXT: v_writelane_b32 v0, s10, 6 -; GCN-NEXT: v_writelane_b32 v0, s11, 7 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:11] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 8 -; GCN-NEXT: v_writelane_b32 v0, s5, 9 -; GCN-NEXT: v_writelane_b32 v0, s6, 10 -; GCN-NEXT: v_writelane_b32 v0, s7, 11 -; GCN-NEXT: v_writelane_b32 v0, s8, 12 -; GCN-NEXT: v_writelane_b32 v0, s9, 13 -; GCN-NEXT: v_writelane_b32 v0, s10, 14 -; GCN-NEXT: v_writelane_b32 v0, s11, 15 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:11] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 16 -; GCN-NEXT: v_writelane_b32 v0, s5, 17 -; GCN-NEXT: v_writelane_b32 v0, s6, 18 -; GCN-NEXT: v_writelane_b32 v0, s7, 19 -; GCN-NEXT: v_writelane_b32 v0, s8, 20 -; GCN-NEXT: v_writelane_b32 v0, s9, 21 -; GCN-NEXT: v_writelane_b32 v0, s10, 22 -; GCN-NEXT: v_writelane_b32 v0, s11, 23 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:11] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 24 -; GCN-NEXT: v_writelane_b32 v0, s5, 25 -; GCN-NEXT: v_writelane_b32 v0, s6, 26 -; GCN-NEXT: v_writelane_b32 v0, s7, 27 -; GCN-NEXT: v_writelane_b32 v0, s8, 28 -; GCN-NEXT: v_writelane_b32 v0, s9, 29 -; GCN-NEXT: v_writelane_b32 v0, s10, 30 -; GCN-NEXT: v_writelane_b32 v0, s11, 31 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:11] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 32 -; GCN-NEXT: v_writelane_b32 v0, s5, 33 -; GCN-NEXT: v_writelane_b32 v0, s6, 34 -; GCN-NEXT: v_writelane_b32 v0, s7, 35 -; GCN-NEXT: v_writelane_b32 v0, s8, 36 -; GCN-NEXT: v_writelane_b32 v0, s9, 37 -; GCN-NEXT: v_writelane_b32 v0, s10, 38 -; GCN-NEXT: v_writelane_b32 v0, s11, 39 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:11] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 40 -; GCN-NEXT: v_writelane_b32 v0, s5, 41 -; GCN-NEXT: v_writelane_b32 v0, s6, 42 -; GCN-NEXT: v_writelane_b32 v0, s7, 43 -; GCN-NEXT: v_writelane_b32 v0, s8, 44 -; GCN-NEXT: v_writelane_b32 v0, s9, 45 -; GCN-NEXT: v_writelane_b32 v0, s10, 46 -; GCN-NEXT: v_writelane_b32 v0, s11, 47 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:11] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 48 -; GCN-NEXT: v_writelane_b32 v0, s5, 49 -; GCN-NEXT: v_writelane_b32 v0, s6, 50 -; GCN-NEXT: v_writelane_b32 v0, s7, 51 -; GCN-NEXT: v_writelane_b32 v0, s8, 52 -; GCN-NEXT: v_writelane_b32 v0, s9, 53 -; GCN-NEXT: v_writelane_b32 v0, s10, 54 -; GCN-NEXT: v_writelane_b32 v0, s11, 55 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:11] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s3, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_cmp_lg_u32 s2, s3 -; GCN-NEXT: v_writelane_b32 v0, s12, 56 -; GCN-NEXT: v_writelane_b32 v0, s13, 57 -; GCN-NEXT: v_writelane_b32 v0, s14, 58 -; GCN-NEXT: v_writelane_b32 v0, s15, 59 -; GCN-NEXT: v_writelane_b32 v0, s16, 60 -; GCN-NEXT: v_writelane_b32 v0, s17, 61 -; GCN-NEXT: v_writelane_b32 v0, s18, 62 -; GCN-NEXT: v_writelane_b32 v0, s19, 63 -; GCN-NEXT: v_writelane_b32 v1, s20, 0 -; GCN-NEXT: v_writelane_b32 v1, s21, 1 -; GCN-NEXT: v_writelane_b32 v1, s22, 2 -; GCN-NEXT: v_writelane_b32 v1, s23, 3 -; GCN-NEXT: v_writelane_b32 v1, s24, 4 -; GCN-NEXT: v_writelane_b32 v1, s25, 5 -; GCN-NEXT: v_writelane_b32 v1, s26, 6 -; GCN-NEXT: v_writelane_b32 v1, s27, 7 -; GCN-NEXT: v_writelane_b32 v1, s36, 8 -; GCN-NEXT: v_writelane_b32 v1, s37, 9 -; GCN-NEXT: v_writelane_b32 v1, s38, 10 -; GCN-NEXT: v_writelane_b32 v1, s39, 11 -; GCN-NEXT: v_writelane_b32 v1, s40, 12 -; GCN-NEXT: v_writelane_b32 v1, s41, 13 -; GCN-NEXT: v_writelane_b32 v1, s42, 14 -; GCN-NEXT: v_writelane_b32 v1, s43, 15 -; GCN-NEXT: v_writelane_b32 v1, s44, 16 -; GCN-NEXT: v_writelane_b32 v1, s45, 17 -; GCN-NEXT: v_writelane_b32 v1, s46, 18 -; GCN-NEXT: v_writelane_b32 v1, s47, 19 -; GCN-NEXT: v_writelane_b32 v1, s48, 20 -; GCN-NEXT: v_writelane_b32 v1, s49, 21 -; GCN-NEXT: v_writelane_b32 v1, s50, 22 -; GCN-NEXT: v_writelane_b32 v1, s51, 23 -; GCN-NEXT: v_writelane_b32 v1, s52, 24 -; GCN-NEXT: v_writelane_b32 v1, s53, 25 -; GCN-NEXT: v_writelane_b32 v1, s54, 26 -; GCN-NEXT: v_writelane_b32 v1, s55, 27 -; GCN-NEXT: v_writelane_b32 v1, s56, 28 -; GCN-NEXT: v_writelane_b32 v1, s57, 29 -; GCN-NEXT: v_writelane_b32 v1, s58, 30 -; GCN-NEXT: v_writelane_b32 v1, s59, 31 -; GCN-NEXT: v_writelane_b32 v1, s60, 32 -; GCN-NEXT: v_writelane_b32 v1, s61, 33 -; GCN-NEXT: v_writelane_b32 v1, s62, 34 -; GCN-NEXT: v_writelane_b32 v1, s63, 35 -; GCN-NEXT: v_writelane_b32 v1, s64, 36 -; GCN-NEXT: v_writelane_b32 v1, s65, 37 -; GCN-NEXT: v_writelane_b32 v1, s66, 38 -; GCN-NEXT: v_writelane_b32 v1, s67, 39 -; GCN-NEXT: v_writelane_b32 v1, s68, 40 -; GCN-NEXT: v_writelane_b32 v1, s69, 41 -; GCN-NEXT: v_writelane_b32 v1, s70, 42 -; GCN-NEXT: v_writelane_b32 v1, s71, 43 -; GCN-NEXT: v_writelane_b32 v1, s72, 44 -; GCN-NEXT: v_writelane_b32 v1, s73, 45 -; GCN-NEXT: v_writelane_b32 v1, s74, 46 -; GCN-NEXT: v_writelane_b32 v1, s75, 47 -; GCN-NEXT: v_writelane_b32 v1, s76, 48 -; GCN-NEXT: v_writelane_b32 v1, s77, 49 -; GCN-NEXT: v_writelane_b32 v1, s78, 50 -; GCN-NEXT: v_writelane_b32 v1, s79, 51 -; GCN-NEXT: v_writelane_b32 v1, s80, 52 -; GCN-NEXT: v_writelane_b32 v1, s81, 53 -; GCN-NEXT: v_writelane_b32 v1, s82, 54 -; GCN-NEXT: v_writelane_b32 v1, s83, 55 -; GCN-NEXT: v_writelane_b32 v1, s84, 56 -; GCN-NEXT: v_writelane_b32 v1, s85, 57 -; GCN-NEXT: v_writelane_b32 v1, s86, 58 -; GCN-NEXT: v_writelane_b32 v1, s87, 59 -; GCN-NEXT: v_writelane_b32 v1, s88, 60 -; GCN-NEXT: v_writelane_b32 v1, s89, 61 -; GCN-NEXT: v_writelane_b32 v1, s90, 62 -; GCN-NEXT: v_writelane_b32 v1, s91, 63 -; GCN-NEXT: v_writelane_b32 v2, s4, 0 -; GCN-NEXT: v_writelane_b32 v2, s5, 1 -; GCN-NEXT: v_writelane_b32 v2, s6, 2 -; GCN-NEXT: v_writelane_b32 v2, s7, 3 -; GCN-NEXT: v_writelane_b32 v2, s8, 4 -; GCN-NEXT: v_writelane_b32 v2, s9, 5 -; GCN-NEXT: v_writelane_b32 v2, s10, 6 -; GCN-NEXT: v_writelane_b32 v2, s11, 7 +; GCN-NEXT: v_writelane_b32 v0, s0, 0 +; GCN-NEXT: v_writelane_b32 v0, s4, 1 +; GCN-NEXT: v_writelane_b32 v0, s5, 2 +; GCN-NEXT: v_writelane_b32 v0, s6, 3 +; GCN-NEXT: v_writelane_b32 v0, s7, 4 +; GCN-NEXT: v_writelane_b32 v0, s8, 5 +; GCN-NEXT: v_writelane_b32 v0, s9, 6 +; GCN-NEXT: v_writelane_b32 v0, s10, 7 +; GCN-NEXT: v_writelane_b32 v0, s11, 8 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s0, 9 +; GCN-NEXT: v_writelane_b32 v0, s1, 10 +; GCN-NEXT: v_writelane_b32 v0, s2, 11 +; GCN-NEXT: v_writelane_b32 v0, s3, 12 +; GCN-NEXT: v_writelane_b32 v0, s4, 13 +; GCN-NEXT: v_writelane_b32 v0, s5, 14 +; GCN-NEXT: v_writelane_b32 v0, s6, 15 +; GCN-NEXT: v_writelane_b32 v0, s7, 16 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s0, 17 +; GCN-NEXT: v_writelane_b32 v0, s1, 18 +; GCN-NEXT: v_writelane_b32 v0, s2, 19 +; GCN-NEXT: v_writelane_b32 v0, s3, 20 +; GCN-NEXT: v_writelane_b32 v0, s4, 21 +; GCN-NEXT: v_writelane_b32 v0, s5, 22 +; GCN-NEXT: v_writelane_b32 v0, s6, 23 +; GCN-NEXT: v_writelane_b32 v0, s7, 24 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s0, 25 +; GCN-NEXT: v_writelane_b32 v0, s1, 26 +; GCN-NEXT: v_writelane_b32 v0, s2, 27 +; GCN-NEXT: v_writelane_b32 v0, s3, 28 +; GCN-NEXT: v_writelane_b32 v0, s4, 29 +; GCN-NEXT: v_writelane_b32 v0, s5, 30 +; GCN-NEXT: v_writelane_b32 v0, s6, 31 +; GCN-NEXT: v_writelane_b32 v0, s7, 32 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s0, 33 +; GCN-NEXT: v_writelane_b32 v0, s1, 34 +; GCN-NEXT: v_writelane_b32 v0, s2, 35 +; GCN-NEXT: v_writelane_b32 v0, s3, 36 +; GCN-NEXT: v_writelane_b32 v0, s4, 37 +; GCN-NEXT: v_writelane_b32 v0, s5, 38 +; GCN-NEXT: v_writelane_b32 v0, s6, 39 +; GCN-NEXT: v_writelane_b32 v0, s7, 40 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s0, 41 +; GCN-NEXT: v_writelane_b32 v0, s1, 42 +; GCN-NEXT: v_writelane_b32 v0, s2, 43 +; GCN-NEXT: v_writelane_b32 v0, s3, 44 +; GCN-NEXT: v_writelane_b32 v0, s4, 45 +; GCN-NEXT: v_writelane_b32 v0, s5, 46 +; GCN-NEXT: v_writelane_b32 v0, s6, 47 +; GCN-NEXT: v_writelane_b32 v0, s7, 48 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s0, 49 +; GCN-NEXT: v_writelane_b32 v0, s1, 50 +; GCN-NEXT: v_writelane_b32 v0, s2, 51 +; GCN-NEXT: v_writelane_b32 v0, s3, 52 +; GCN-NEXT: v_writelane_b32 v0, s4, 53 +; GCN-NEXT: v_writelane_b32 v0, s5, 54 +; GCN-NEXT: v_writelane_b32 v0, s6, 55 +; GCN-NEXT: v_writelane_b32 v0, s7, 56 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_mov_b32 s8, 0 +; GCN-NEXT: v_readlane_b32 s9, v0, 0 +; GCN-NEXT: s_cmp_lg_u32 s9, s8 +; GCN-NEXT: v_writelane_b32 v0, s12, 57 +; GCN-NEXT: v_writelane_b32 v0, s13, 58 +; GCN-NEXT: v_writelane_b32 v0, s14, 59 +; GCN-NEXT: v_writelane_b32 v0, s15, 60 +; GCN-NEXT: v_writelane_b32 v0, s16, 61 +; GCN-NEXT: v_writelane_b32 v0, s17, 62 +; GCN-NEXT: v_writelane_b32 v0, s18, 63 +; GCN-NEXT: v_writelane_b32 v1, s19, 0 +; GCN-NEXT: v_writelane_b32 v1, s20, 1 +; GCN-NEXT: v_writelane_b32 v1, s21, 2 +; GCN-NEXT: v_writelane_b32 v1, s22, 3 +; GCN-NEXT: v_writelane_b32 v1, s23, 4 +; GCN-NEXT: v_writelane_b32 v1, s24, 5 +; GCN-NEXT: v_writelane_b32 v1, s25, 6 +; GCN-NEXT: v_writelane_b32 v1, s26, 7 +; GCN-NEXT: v_writelane_b32 v1, s27, 8 +; GCN-NEXT: v_writelane_b32 v1, s36, 9 +; GCN-NEXT: v_writelane_b32 v1, s37, 10 +; GCN-NEXT: v_writelane_b32 v1, s38, 11 +; GCN-NEXT: v_writelane_b32 v1, s39, 12 +; GCN-NEXT: v_writelane_b32 v1, s40, 13 +; GCN-NEXT: v_writelane_b32 v1, s41, 14 +; GCN-NEXT: v_writelane_b32 v1, s42, 15 +; GCN-NEXT: v_writelane_b32 v1, s43, 16 +; GCN-NEXT: v_writelane_b32 v1, s44, 17 +; GCN-NEXT: v_writelane_b32 v1, s45, 18 +; GCN-NEXT: v_writelane_b32 v1, s46, 19 +; GCN-NEXT: v_writelane_b32 v1, s47, 20 +; GCN-NEXT: v_writelane_b32 v1, s48, 21 +; GCN-NEXT: v_writelane_b32 v1, s49, 22 +; GCN-NEXT: v_writelane_b32 v1, s50, 23 +; GCN-NEXT: v_writelane_b32 v1, s51, 24 +; GCN-NEXT: v_writelane_b32 v1, s52, 25 +; GCN-NEXT: v_writelane_b32 v1, s53, 26 +; GCN-NEXT: v_writelane_b32 v1, s54, 27 +; GCN-NEXT: v_writelane_b32 v1, s55, 28 +; GCN-NEXT: v_writelane_b32 v1, s56, 29 +; GCN-NEXT: v_writelane_b32 v1, s57, 30 +; GCN-NEXT: v_writelane_b32 v1, s58, 31 +; GCN-NEXT: v_writelane_b32 v1, s59, 32 +; GCN-NEXT: v_writelane_b32 v1, s60, 33 +; GCN-NEXT: v_writelane_b32 v1, s61, 34 +; GCN-NEXT: v_writelane_b32 v1, s62, 35 +; GCN-NEXT: v_writelane_b32 v1, s63, 36 +; GCN-NEXT: v_writelane_b32 v1, s64, 37 +; GCN-NEXT: v_writelane_b32 v1, s65, 38 +; GCN-NEXT: v_writelane_b32 v1, s66, 39 +; GCN-NEXT: v_writelane_b32 v1, s67, 40 +; GCN-NEXT: v_writelane_b32 v1, s68, 41 +; GCN-NEXT: v_writelane_b32 v1, s69, 42 +; GCN-NEXT: v_writelane_b32 v1, s70, 43 +; GCN-NEXT: v_writelane_b32 v1, s71, 44 +; GCN-NEXT: v_writelane_b32 v1, s72, 45 +; GCN-NEXT: v_writelane_b32 v1, s73, 46 +; GCN-NEXT: v_writelane_b32 v1, s74, 47 +; GCN-NEXT: v_writelane_b32 v1, s75, 48 +; GCN-NEXT: v_writelane_b32 v1, s76, 49 +; GCN-NEXT: v_writelane_b32 v1, s77, 50 +; GCN-NEXT: v_writelane_b32 v1, s78, 51 +; GCN-NEXT: v_writelane_b32 v1, s79, 52 +; GCN-NEXT: v_writelane_b32 v1, s80, 53 +; GCN-NEXT: v_writelane_b32 v1, s81, 54 +; GCN-NEXT: v_writelane_b32 v1, s82, 55 +; GCN-NEXT: v_writelane_b32 v1, s83, 56 +; GCN-NEXT: v_writelane_b32 v1, s84, 57 +; GCN-NEXT: v_writelane_b32 v1, s85, 58 +; GCN-NEXT: v_writelane_b32 v1, s86, 59 +; GCN-NEXT: v_writelane_b32 v1, s87, 60 +; GCN-NEXT: v_writelane_b32 v1, s88, 61 +; GCN-NEXT: v_writelane_b32 v1, s89, 62 +; GCN-NEXT: v_writelane_b32 v1, s90, 63 +; GCN-NEXT: v_writelane_b32 v2, s91, 0 +; GCN-NEXT: v_writelane_b32 v2, s0, 1 +; GCN-NEXT: v_writelane_b32 v2, s1, 2 +; GCN-NEXT: v_writelane_b32 v2, s2, 3 +; GCN-NEXT: v_writelane_b32 v2, s3, 4 +; GCN-NEXT: v_writelane_b32 v2, s4, 5 +; GCN-NEXT: v_writelane_b32 v2, s5, 6 +; GCN-NEXT: v_writelane_b32 v2, s6, 7 +; GCN-NEXT: v_writelane_b32 v2, s7, 8 ; GCN-NEXT: s_cbranch_scc1 BB0_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-NEXT: v_readlane_b32 s2, v0, 2 -; GCN-NEXT: v_readlane_b32 s3, v0, 3 -; GCN-NEXT: v_readlane_b32 s4, v0, 4 -; GCN-NEXT: v_readlane_b32 s5, v0, 5 -; GCN-NEXT: v_readlane_b32 s6, v0, 6 -; GCN-NEXT: v_readlane_b32 s7, v0, 7 +; GCN-NEXT: v_readlane_b32 s0, v0, 1 +; GCN-NEXT: v_readlane_b32 s1, v0, 2 +; GCN-NEXT: v_readlane_b32 s2, v0, 3 +; GCN-NEXT: v_readlane_b32 s3, v0, 4 +; GCN-NEXT: v_readlane_b32 s4, v0, 5 +; GCN-NEXT: v_readlane_b32 s5, v0, 6 +; GCN-NEXT: v_readlane_b32 s6, v0, 7 +; GCN-NEXT: v_readlane_b32 s7, v0, 8 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 56 -; GCN-NEXT: v_readlane_b32 s1, v0, 57 -; GCN-NEXT: v_readlane_b32 s2, v0, 58 -; GCN-NEXT: v_readlane_b32 s3, v0, 59 -; GCN-NEXT: v_readlane_b32 s4, v0, 60 -; GCN-NEXT: v_readlane_b32 s5, v0, 61 -; GCN-NEXT: v_readlane_b32 s6, v0, 62 -; GCN-NEXT: v_readlane_b32 s7, v0, 63 +; GCN-NEXT: v_readlane_b32 s0, v0, 57 +; GCN-NEXT: v_readlane_b32 s1, v0, 58 +; GCN-NEXT: v_readlane_b32 s2, v0, 59 +; GCN-NEXT: v_readlane_b32 s3, v0, 60 +; GCN-NEXT: v_readlane_b32 s4, v0, 61 +; GCN-NEXT: v_readlane_b32 s5, v0, 62 +; GCN-NEXT: v_readlane_b32 s6, v0, 63 +; GCN-NEXT: v_readlane_b32 s7, v1, 0 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 0 -; GCN-NEXT: v_readlane_b32 s1, v1, 1 -; GCN-NEXT: v_readlane_b32 s2, v1, 2 -; GCN-NEXT: v_readlane_b32 s3, v1, 3 -; GCN-NEXT: v_readlane_b32 s4, v1, 4 -; GCN-NEXT: v_readlane_b32 s5, v1, 5 -; GCN-NEXT: v_readlane_b32 s6, v1, 6 -; GCN-NEXT: v_readlane_b32 s7, v1, 7 +; GCN-NEXT: v_readlane_b32 s0, v1, 1 +; GCN-NEXT: v_readlane_b32 s1, v1, 2 +; GCN-NEXT: v_readlane_b32 s2, v1, 3 +; GCN-NEXT: v_readlane_b32 s3, v1, 4 +; GCN-NEXT: v_readlane_b32 s4, v1, 5 +; GCN-NEXT: v_readlane_b32 s5, v1, 6 +; GCN-NEXT: v_readlane_b32 s6, v1, 7 +; GCN-NEXT: v_readlane_b32 s7, v1, 8 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 8 -; GCN-NEXT: v_readlane_b32 s1, v1, 9 -; GCN-NEXT: v_readlane_b32 s2, v1, 10 -; GCN-NEXT: v_readlane_b32 s3, v1, 11 -; GCN-NEXT: v_readlane_b32 s4, v1, 12 -; GCN-NEXT: v_readlane_b32 s5, v1, 13 -; GCN-NEXT: v_readlane_b32 s6, v1, 14 -; GCN-NEXT: v_readlane_b32 s7, v1, 15 +; GCN-NEXT: v_readlane_b32 s0, v1, 9 +; GCN-NEXT: v_readlane_b32 s1, v1, 10 +; GCN-NEXT: v_readlane_b32 s2, v1, 11 +; GCN-NEXT: v_readlane_b32 s3, v1, 12 +; GCN-NEXT: v_readlane_b32 s4, v1, 13 +; GCN-NEXT: v_readlane_b32 s5, v1, 14 +; GCN-NEXT: v_readlane_b32 s6, v1, 15 +; GCN-NEXT: v_readlane_b32 s7, v1, 16 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 16 -; GCN-NEXT: v_readlane_b32 s1, v1, 17 -; GCN-NEXT: v_readlane_b32 s2, v1, 18 -; GCN-NEXT: v_readlane_b32 s3, v1, 19 -; GCN-NEXT: v_readlane_b32 s4, v1, 20 -; GCN-NEXT: v_readlane_b32 s5, v1, 21 -; GCN-NEXT: v_readlane_b32 s6, v1, 22 -; GCN-NEXT: v_readlane_b32 s7, v1, 23 +; GCN-NEXT: v_readlane_b32 s0, v1, 17 +; GCN-NEXT: v_readlane_b32 s1, v1, 18 +; GCN-NEXT: v_readlane_b32 s2, v1, 19 +; GCN-NEXT: v_readlane_b32 s3, v1, 20 +; GCN-NEXT: v_readlane_b32 s4, v1, 21 +; GCN-NEXT: v_readlane_b32 s5, v1, 22 +; GCN-NEXT: v_readlane_b32 s6, v1, 23 +; GCN-NEXT: v_readlane_b32 s7, v1, 24 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 24 -; GCN-NEXT: v_readlane_b32 s1, v1, 25 -; GCN-NEXT: v_readlane_b32 s2, v1, 26 -; GCN-NEXT: v_readlane_b32 s3, v1, 27 -; GCN-NEXT: v_readlane_b32 s4, v1, 28 -; GCN-NEXT: v_readlane_b32 s5, v1, 29 -; GCN-NEXT: v_readlane_b32 s6, v1, 30 -; GCN-NEXT: v_readlane_b32 s7, v1, 31 +; GCN-NEXT: v_readlane_b32 s0, v1, 25 +; GCN-NEXT: v_readlane_b32 s1, v1, 26 +; GCN-NEXT: v_readlane_b32 s2, v1, 27 +; GCN-NEXT: v_readlane_b32 s3, v1, 28 +; GCN-NEXT: v_readlane_b32 s4, v1, 29 +; GCN-NEXT: v_readlane_b32 s5, v1, 30 +; GCN-NEXT: v_readlane_b32 s6, v1, 31 +; GCN-NEXT: v_readlane_b32 s7, v1, 32 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 32 -; GCN-NEXT: v_readlane_b32 s1, v1, 33 -; GCN-NEXT: v_readlane_b32 s2, v1, 34 -; GCN-NEXT: v_readlane_b32 s3, v1, 35 -; GCN-NEXT: v_readlane_b32 s4, v1, 36 -; GCN-NEXT: v_readlane_b32 s5, v1, 37 -; GCN-NEXT: v_readlane_b32 s6, v1, 38 -; GCN-NEXT: v_readlane_b32 s7, v1, 39 +; GCN-NEXT: v_readlane_b32 s0, v1, 33 +; GCN-NEXT: v_readlane_b32 s1, v1, 34 +; GCN-NEXT: v_readlane_b32 s2, v1, 35 +; GCN-NEXT: v_readlane_b32 s3, v1, 36 +; GCN-NEXT: v_readlane_b32 s4, v1, 37 +; GCN-NEXT: v_readlane_b32 s5, v1, 38 +; GCN-NEXT: v_readlane_b32 s6, v1, 39 +; GCN-NEXT: v_readlane_b32 s7, v1, 40 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 40 -; GCN-NEXT: v_readlane_b32 s1, v1, 41 -; GCN-NEXT: v_readlane_b32 s2, v1, 42 -; GCN-NEXT: v_readlane_b32 s3, v1, 43 -; GCN-NEXT: v_readlane_b32 s4, v1, 44 -; GCN-NEXT: v_readlane_b32 s5, v1, 45 -; GCN-NEXT: v_readlane_b32 s6, v1, 46 -; GCN-NEXT: v_readlane_b32 s7, v1, 47 +; GCN-NEXT: v_readlane_b32 s0, v1, 41 +; GCN-NEXT: v_readlane_b32 s1, v1, 42 +; GCN-NEXT: v_readlane_b32 s2, v1, 43 +; GCN-NEXT: v_readlane_b32 s3, v1, 44 +; GCN-NEXT: v_readlane_b32 s4, v1, 45 +; GCN-NEXT: v_readlane_b32 s5, v1, 46 +; GCN-NEXT: v_readlane_b32 s6, v1, 47 +; GCN-NEXT: v_readlane_b32 s7, v1, 48 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 48 -; GCN-NEXT: v_readlane_b32 s1, v1, 49 -; GCN-NEXT: v_readlane_b32 s2, v1, 50 -; GCN-NEXT: v_readlane_b32 s3, v1, 51 -; GCN-NEXT: v_readlane_b32 s4, v1, 52 -; GCN-NEXT: v_readlane_b32 s5, v1, 53 -; GCN-NEXT: v_readlane_b32 s6, v1, 54 -; GCN-NEXT: v_readlane_b32 s7, v1, 55 +; GCN-NEXT: v_readlane_b32 s0, v1, 49 +; GCN-NEXT: v_readlane_b32 s1, v1, 50 +; GCN-NEXT: v_readlane_b32 s2, v1, 51 +; GCN-NEXT: v_readlane_b32 s3, v1, 52 +; GCN-NEXT: v_readlane_b32 s4, v1, 53 +; GCN-NEXT: v_readlane_b32 s5, v1, 54 +; GCN-NEXT: v_readlane_b32 s6, v1, 55 +; GCN-NEXT: v_readlane_b32 s7, v1, 56 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 56 -; GCN-NEXT: v_readlane_b32 s1, v1, 57 -; GCN-NEXT: v_readlane_b32 s2, v1, 58 -; GCN-NEXT: v_readlane_b32 s3, v1, 59 -; GCN-NEXT: v_readlane_b32 s4, v1, 60 -; GCN-NEXT: v_readlane_b32 s5, v1, 61 -; GCN-NEXT: v_readlane_b32 s6, v1, 62 -; GCN-NEXT: v_readlane_b32 s7, v1, 63 +; GCN-NEXT: v_readlane_b32 s0, v1, 57 +; GCN-NEXT: v_readlane_b32 s1, v1, 58 +; GCN-NEXT: v_readlane_b32 s2, v1, 59 +; GCN-NEXT: v_readlane_b32 s3, v1, 60 +; GCN-NEXT: v_readlane_b32 s4, v1, 61 +; GCN-NEXT: v_readlane_b32 s5, v1, 62 +; GCN-NEXT: v_readlane_b32 s6, v1, 63 +; GCN-NEXT: v_readlane_b32 s7, v2, 0 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 8 -; GCN-NEXT: v_readlane_b32 s1, v0, 9 -; GCN-NEXT: v_readlane_b32 s2, v0, 10 -; GCN-NEXT: v_readlane_b32 s3, v0, 11 -; GCN-NEXT: v_readlane_b32 s4, v0, 12 -; GCN-NEXT: v_readlane_b32 s5, v0, 13 -; GCN-NEXT: v_readlane_b32 s6, v0, 14 -; GCN-NEXT: v_readlane_b32 s7, v0, 15 +; GCN-NEXT: v_readlane_b32 s0, v0, 9 +; GCN-NEXT: v_readlane_b32 s1, v0, 10 +; GCN-NEXT: v_readlane_b32 s2, v0, 11 +; GCN-NEXT: v_readlane_b32 s3, v0, 12 +; GCN-NEXT: v_readlane_b32 s4, v0, 13 +; GCN-NEXT: v_readlane_b32 s5, v0, 14 +; GCN-NEXT: v_readlane_b32 s6, v0, 15 +; GCN-NEXT: v_readlane_b32 s7, v0, 16 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 16 -; GCN-NEXT: v_readlane_b32 s1, v0, 17 -; GCN-NEXT: v_readlane_b32 s2, v0, 18 -; GCN-NEXT: v_readlane_b32 s3, v0, 19 -; GCN-NEXT: v_readlane_b32 s4, v0, 20 -; GCN-NEXT: v_readlane_b32 s5, v0, 21 -; GCN-NEXT: v_readlane_b32 s6, v0, 22 -; GCN-NEXT: v_readlane_b32 s7, v0, 23 +; GCN-NEXT: v_readlane_b32 s0, v0, 17 +; GCN-NEXT: v_readlane_b32 s1, v0, 18 +; GCN-NEXT: v_readlane_b32 s2, v0, 19 +; GCN-NEXT: v_readlane_b32 s3, v0, 20 +; GCN-NEXT: v_readlane_b32 s4, v0, 21 +; GCN-NEXT: v_readlane_b32 s5, v0, 22 +; GCN-NEXT: v_readlane_b32 s6, v0, 23 +; GCN-NEXT: v_readlane_b32 s7, v0, 24 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 24 -; GCN-NEXT: v_readlane_b32 s1, v0, 25 -; GCN-NEXT: v_readlane_b32 s2, v0, 26 -; GCN-NEXT: v_readlane_b32 s3, v0, 27 -; GCN-NEXT: v_readlane_b32 s4, v0, 28 -; GCN-NEXT: v_readlane_b32 s5, v0, 29 -; GCN-NEXT: v_readlane_b32 s6, v0, 30 -; GCN-NEXT: v_readlane_b32 s7, v0, 31 +; GCN-NEXT: v_readlane_b32 s0, v0, 25 +; GCN-NEXT: v_readlane_b32 s1, v0, 26 +; GCN-NEXT: v_readlane_b32 s2, v0, 27 +; GCN-NEXT: v_readlane_b32 s3, v0, 28 +; GCN-NEXT: v_readlane_b32 s4, v0, 29 +; GCN-NEXT: v_readlane_b32 s5, v0, 30 +; GCN-NEXT: v_readlane_b32 s6, v0, 31 +; GCN-NEXT: v_readlane_b32 s7, v0, 32 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 32 -; GCN-NEXT: v_readlane_b32 s1, v0, 33 -; GCN-NEXT: v_readlane_b32 s2, v0, 34 -; GCN-NEXT: v_readlane_b32 s3, v0, 35 -; GCN-NEXT: v_readlane_b32 s4, v0, 36 -; GCN-NEXT: v_readlane_b32 s5, v0, 37 -; GCN-NEXT: v_readlane_b32 s6, v0, 38 -; GCN-NEXT: v_readlane_b32 s7, v0, 39 +; GCN-NEXT: v_readlane_b32 s0, v0, 33 +; GCN-NEXT: v_readlane_b32 s1, v0, 34 +; GCN-NEXT: v_readlane_b32 s2, v0, 35 +; GCN-NEXT: v_readlane_b32 s3, v0, 36 +; GCN-NEXT: v_readlane_b32 s4, v0, 37 +; GCN-NEXT: v_readlane_b32 s5, v0, 38 +; GCN-NEXT: v_readlane_b32 s6, v0, 39 +; GCN-NEXT: v_readlane_b32 s7, v0, 40 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 40 -; GCN-NEXT: v_readlane_b32 s1, v0, 41 -; GCN-NEXT: v_readlane_b32 s2, v0, 42 -; GCN-NEXT: v_readlane_b32 s3, v0, 43 -; GCN-NEXT: v_readlane_b32 s4, v0, 44 -; GCN-NEXT: v_readlane_b32 s5, v0, 45 -; GCN-NEXT: v_readlane_b32 s6, v0, 46 -; GCN-NEXT: v_readlane_b32 s7, v0, 47 +; GCN-NEXT: v_readlane_b32 s0, v0, 41 +; GCN-NEXT: v_readlane_b32 s1, v0, 42 +; GCN-NEXT: v_readlane_b32 s2, v0, 43 +; GCN-NEXT: v_readlane_b32 s3, v0, 44 +; GCN-NEXT: v_readlane_b32 s4, v0, 45 +; GCN-NEXT: v_readlane_b32 s5, v0, 46 +; GCN-NEXT: v_readlane_b32 s6, v0, 47 +; GCN-NEXT: v_readlane_b32 s7, v0, 48 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 48 -; GCN-NEXT: v_readlane_b32 s1, v0, 49 -; GCN-NEXT: v_readlane_b32 s2, v0, 50 -; GCN-NEXT: v_readlane_b32 s3, v0, 51 -; GCN-NEXT: v_readlane_b32 s4, v0, 52 -; GCN-NEXT: v_readlane_b32 s5, v0, 53 -; GCN-NEXT: v_readlane_b32 s6, v0, 54 -; GCN-NEXT: v_readlane_b32 s7, v0, 55 +; GCN-NEXT: v_readlane_b32 s0, v0, 49 +; GCN-NEXT: v_readlane_b32 s1, v0, 50 +; GCN-NEXT: v_readlane_b32 s2, v0, 51 +; GCN-NEXT: v_readlane_b32 s3, v0, 52 +; GCN-NEXT: v_readlane_b32 s4, v0, 53 +; GCN-NEXT: v_readlane_b32 s5, v0, 54 +; GCN-NEXT: v_readlane_b32 s6, v0, 55 +; GCN-NEXT: v_readlane_b32 s7, v0, 56 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v2, 0 -; GCN-NEXT: v_readlane_b32 s1, v2, 1 -; GCN-NEXT: v_readlane_b32 s2, v2, 2 -; GCN-NEXT: v_readlane_b32 s3, v2, 3 -; GCN-NEXT: v_readlane_b32 s4, v2, 4 -; GCN-NEXT: v_readlane_b32 s5, v2, 5 -; GCN-NEXT: v_readlane_b32 s6, v2, 6 -; GCN-NEXT: v_readlane_b32 s7, v2, 7 +; GCN-NEXT: v_readlane_b32 s0, v2, 1 +; GCN-NEXT: v_readlane_b32 s1, v2, 2 +; GCN-NEXT: v_readlane_b32 s2, v2, 3 +; GCN-NEXT: v_readlane_b32 s3, v2, 4 +; GCN-NEXT: v_readlane_b32 s4, v2, 5 +; GCN-NEXT: v_readlane_b32 s5, v2, 6 +; GCN-NEXT: v_readlane_b32 s6, v2, 7 +; GCN-NEXT: v_readlane_b32 s7, v2, 8 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND @@ -442,193 +444,195 @@ ret: define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 %in) #1 { ; GCN-LABEL: split_sgpr_spill_2_vgprs: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dword s2, s[0:1], 0xb +; GCN-NEXT: s_load_dword s0, s[0:1], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[36:51] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 0 -; GCN-NEXT: v_writelane_b32 v0, s5, 1 -; GCN-NEXT: v_writelane_b32 v0, s6, 2 -; GCN-NEXT: v_writelane_b32 v0, s7, 3 -; GCN-NEXT: v_writelane_b32 v0, s8, 4 -; GCN-NEXT: v_writelane_b32 v0, s9, 5 -; GCN-NEXT: v_writelane_b32 v0, s10, 6 -; GCN-NEXT: v_writelane_b32 v0, s11, 7 -; GCN-NEXT: v_writelane_b32 v0, s12, 8 -; GCN-NEXT: v_writelane_b32 v0, s13, 9 -; GCN-NEXT: v_writelane_b32 v0, s14, 10 -; GCN-NEXT: v_writelane_b32 v0, s15, 11 -; GCN-NEXT: v_writelane_b32 v0, s16, 12 -; GCN-NEXT: v_writelane_b32 v0, s17, 13 -; GCN-NEXT: v_writelane_b32 v0, s18, 14 -; GCN-NEXT: v_writelane_b32 v0, s19, 15 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:19] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 16 -; GCN-NEXT: v_writelane_b32 v0, s5, 17 -; GCN-NEXT: v_writelane_b32 v0, s6, 18 -; GCN-NEXT: v_writelane_b32 v0, s7, 19 -; GCN-NEXT: v_writelane_b32 v0, s8, 20 -; GCN-NEXT: v_writelane_b32 v0, s9, 21 -; GCN-NEXT: v_writelane_b32 v0, s10, 22 -; GCN-NEXT: v_writelane_b32 v0, s11, 23 -; GCN-NEXT: v_writelane_b32 v0, s12, 24 -; GCN-NEXT: v_writelane_b32 v0, s13, 25 -; GCN-NEXT: v_writelane_b32 v0, s14, 26 -; GCN-NEXT: v_writelane_b32 v0, s15, 27 -; GCN-NEXT: v_writelane_b32 v0, s16, 28 -; GCN-NEXT: v_writelane_b32 v0, s17, 29 -; GCN-NEXT: v_writelane_b32 v0, s18, 30 -; GCN-NEXT: v_writelane_b32 v0, s19, 31 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:19] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[20:27] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:1] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s3, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_cmp_lg_u32 s2, s3 -; GCN-NEXT: v_writelane_b32 v0, s36, 32 -; GCN-NEXT: v_writelane_b32 v0, s37, 33 -; GCN-NEXT: v_writelane_b32 v0, s38, 34 -; GCN-NEXT: v_writelane_b32 v0, s39, 35 -; GCN-NEXT: v_writelane_b32 v0, s40, 36 -; GCN-NEXT: v_writelane_b32 v0, s41, 37 -; GCN-NEXT: v_writelane_b32 v0, s42, 38 -; GCN-NEXT: v_writelane_b32 v0, s43, 39 -; GCN-NEXT: v_writelane_b32 v0, s44, 40 -; GCN-NEXT: v_writelane_b32 v0, s45, 41 -; GCN-NEXT: v_writelane_b32 v0, s46, 42 -; GCN-NEXT: v_writelane_b32 v0, s47, 43 -; GCN-NEXT: v_writelane_b32 v0, s48, 44 -; GCN-NEXT: v_writelane_b32 v0, s49, 45 -; GCN-NEXT: v_writelane_b32 v0, s50, 46 -; GCN-NEXT: v_writelane_b32 v0, s51, 47 -; GCN-NEXT: v_writelane_b32 v0, s4, 48 -; GCN-NEXT: v_writelane_b32 v0, s5, 49 -; GCN-NEXT: v_writelane_b32 v0, s6, 50 -; GCN-NEXT: v_writelane_b32 v0, s7, 51 -; GCN-NEXT: v_writelane_b32 v0, s8, 52 -; GCN-NEXT: v_writelane_b32 v0, s9, 53 -; GCN-NEXT: v_writelane_b32 v0, s10, 54 -; GCN-NEXT: v_writelane_b32 v0, s11, 55 -; GCN-NEXT: v_writelane_b32 v0, s12, 56 -; GCN-NEXT: v_writelane_b32 v0, s13, 57 -; GCN-NEXT: v_writelane_b32 v0, s14, 58 -; GCN-NEXT: v_writelane_b32 v0, s15, 59 -; GCN-NEXT: v_writelane_b32 v0, s16, 60 -; GCN-NEXT: v_writelane_b32 v0, s17, 61 -; GCN-NEXT: v_writelane_b32 v0, s18, 62 -; GCN-NEXT: v_writelane_b32 v0, s19, 63 -; GCN-NEXT: v_writelane_b32 v1, s20, 0 -; GCN-NEXT: v_writelane_b32 v1, s21, 1 -; GCN-NEXT: v_writelane_b32 v1, s22, 2 -; GCN-NEXT: v_writelane_b32 v1, s23, 3 -; GCN-NEXT: v_writelane_b32 v1, s24, 4 -; GCN-NEXT: v_writelane_b32 v1, s25, 5 -; GCN-NEXT: v_writelane_b32 v1, s26, 6 -; GCN-NEXT: v_writelane_b32 v1, s27, 7 -; GCN-NEXT: v_writelane_b32 v1, s0, 8 -; GCN-NEXT: v_writelane_b32 v1, s1, 9 +; GCN-NEXT: v_writelane_b32 v0, s0, 0 +; GCN-NEXT: v_writelane_b32 v0, s4, 1 +; GCN-NEXT: v_writelane_b32 v0, s5, 2 +; GCN-NEXT: v_writelane_b32 v0, s6, 3 +; GCN-NEXT: v_writelane_b32 v0, s7, 4 +; GCN-NEXT: v_writelane_b32 v0, s8, 5 +; GCN-NEXT: v_writelane_b32 v0, s9, 6 +; GCN-NEXT: v_writelane_b32 v0, s10, 7 +; GCN-NEXT: v_writelane_b32 v0, s11, 8 +; GCN-NEXT: v_writelane_b32 v0, s12, 9 +; GCN-NEXT: v_writelane_b32 v0, s13, 10 +; GCN-NEXT: v_writelane_b32 v0, s14, 11 +; GCN-NEXT: v_writelane_b32 v0, s15, 12 +; GCN-NEXT: v_writelane_b32 v0, s16, 13 +; GCN-NEXT: v_writelane_b32 v0, s17, 14 +; GCN-NEXT: v_writelane_b32 v0, s18, 15 +; GCN-NEXT: v_writelane_b32 v0, s19, 16 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:15] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[16:31] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s0, 17 +; GCN-NEXT: v_writelane_b32 v0, s1, 18 +; GCN-NEXT: v_writelane_b32 v0, s2, 19 +; GCN-NEXT: v_writelane_b32 v0, s3, 20 +; GCN-NEXT: v_writelane_b32 v0, s4, 21 +; GCN-NEXT: v_writelane_b32 v0, s5, 22 +; GCN-NEXT: v_writelane_b32 v0, s6, 23 +; GCN-NEXT: v_writelane_b32 v0, s7, 24 +; GCN-NEXT: v_writelane_b32 v0, s8, 25 +; GCN-NEXT: v_writelane_b32 v0, s9, 26 +; GCN-NEXT: v_writelane_b32 v0, s10, 27 +; GCN-NEXT: v_writelane_b32 v0, s11, 28 +; GCN-NEXT: v_writelane_b32 v0, s12, 29 +; GCN-NEXT: v_writelane_b32 v0, s13, 30 +; GCN-NEXT: v_writelane_b32 v0, s14, 31 +; GCN-NEXT: v_writelane_b32 v0, s15, 32 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[8:9] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_mov_b32 s10, 0 +; GCN-NEXT: v_readlane_b32 s11, v0, 0 +; GCN-NEXT: s_cmp_lg_u32 s11, s10 +; GCN-NEXT: v_writelane_b32 v0, s36, 33 +; GCN-NEXT: v_writelane_b32 v0, s37, 34 +; GCN-NEXT: v_writelane_b32 v0, s38, 35 +; GCN-NEXT: v_writelane_b32 v0, s39, 36 +; GCN-NEXT: v_writelane_b32 v0, s40, 37 +; GCN-NEXT: v_writelane_b32 v0, s41, 38 +; GCN-NEXT: v_writelane_b32 v0, s42, 39 +; GCN-NEXT: v_writelane_b32 v0, s43, 40 +; GCN-NEXT: v_writelane_b32 v0, s44, 41 +; GCN-NEXT: v_writelane_b32 v0, s45, 42 +; GCN-NEXT: v_writelane_b32 v0, s46, 43 +; GCN-NEXT: v_writelane_b32 v0, s47, 44 +; GCN-NEXT: v_writelane_b32 v0, s48, 45 +; GCN-NEXT: v_writelane_b32 v0, s49, 46 +; GCN-NEXT: v_writelane_b32 v0, s50, 47 +; GCN-NEXT: v_writelane_b32 v0, s51, 48 +; GCN-NEXT: v_writelane_b32 v0, s16, 49 +; GCN-NEXT: v_writelane_b32 v0, s17, 50 +; GCN-NEXT: v_writelane_b32 v0, s18, 51 +; GCN-NEXT: v_writelane_b32 v0, s19, 52 +; GCN-NEXT: v_writelane_b32 v0, s20, 53 +; GCN-NEXT: v_writelane_b32 v0, s21, 54 +; GCN-NEXT: v_writelane_b32 v0, s22, 55 +; GCN-NEXT: v_writelane_b32 v0, s23, 56 +; GCN-NEXT: v_writelane_b32 v0, s24, 57 +; GCN-NEXT: v_writelane_b32 v0, s25, 58 +; GCN-NEXT: v_writelane_b32 v0, s26, 59 +; GCN-NEXT: v_writelane_b32 v0, s27, 60 +; GCN-NEXT: v_writelane_b32 v0, s28, 61 +; GCN-NEXT: v_writelane_b32 v0, s29, 62 +; GCN-NEXT: v_writelane_b32 v0, s30, 63 +; GCN-NEXT: v_writelane_b32 v1, s31, 0 +; GCN-NEXT: v_writelane_b32 v1, s0, 1 +; GCN-NEXT: v_writelane_b32 v1, s1, 2 +; GCN-NEXT: v_writelane_b32 v1, s2, 3 +; GCN-NEXT: v_writelane_b32 v1, s3, 4 +; GCN-NEXT: v_writelane_b32 v1, s4, 5 +; GCN-NEXT: v_writelane_b32 v1, s5, 6 +; GCN-NEXT: v_writelane_b32 v1, s6, 7 +; GCN-NEXT: v_writelane_b32 v1, s7, 8 +; GCN-NEXT: v_writelane_b32 v1, s8, 9 +; GCN-NEXT: v_writelane_b32 v1, s9, 10 ; GCN-NEXT: s_cbranch_scc1 BB1_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-NEXT: v_readlane_b32 s2, v0, 2 -; GCN-NEXT: v_readlane_b32 s3, v0, 3 -; GCN-NEXT: v_readlane_b32 s4, v0, 4 -; GCN-NEXT: v_readlane_b32 s5, v0, 5 -; GCN-NEXT: v_readlane_b32 s6, v0, 6 -; GCN-NEXT: v_readlane_b32 s7, v0, 7 -; GCN-NEXT: v_readlane_b32 s8, v0, 8 -; GCN-NEXT: v_readlane_b32 s9, v0, 9 -; GCN-NEXT: v_readlane_b32 s10, v0, 10 -; GCN-NEXT: v_readlane_b32 s11, v0, 11 -; GCN-NEXT: v_readlane_b32 s12, v0, 12 -; GCN-NEXT: v_readlane_b32 s13, v0, 13 -; GCN-NEXT: v_readlane_b32 s14, v0, 14 -; GCN-NEXT: v_readlane_b32 s15, v0, 15 +; GCN-NEXT: v_readlane_b32 s0, v0, 1 +; GCN-NEXT: v_readlane_b32 s1, v0, 2 +; GCN-NEXT: v_readlane_b32 s2, v0, 3 +; GCN-NEXT: v_readlane_b32 s3, v0, 4 +; GCN-NEXT: v_readlane_b32 s4, v0, 5 +; GCN-NEXT: v_readlane_b32 s5, v0, 6 +; GCN-NEXT: v_readlane_b32 s6, v0, 7 +; GCN-NEXT: v_readlane_b32 s7, v0, 8 +; GCN-NEXT: v_readlane_b32 s8, v0, 9 +; GCN-NEXT: v_readlane_b32 s9, v0, 10 +; GCN-NEXT: v_readlane_b32 s10, v0, 11 +; GCN-NEXT: v_readlane_b32 s11, v0, 12 +; GCN-NEXT: v_readlane_b32 s12, v0, 13 +; GCN-NEXT: v_readlane_b32 s13, v0, 14 +; GCN-NEXT: v_readlane_b32 s14, v0, 15 +; GCN-NEXT: v_readlane_b32 s15, v0, 16 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 32 -; GCN-NEXT: v_readlane_b32 s1, v0, 33 -; GCN-NEXT: v_readlane_b32 s2, v0, 34 -; GCN-NEXT: v_readlane_b32 s3, v0, 35 -; GCN-NEXT: v_readlane_b32 s4, v0, 36 -; GCN-NEXT: v_readlane_b32 s5, v0, 37 -; GCN-NEXT: v_readlane_b32 s6, v0, 38 -; GCN-NEXT: v_readlane_b32 s7, v0, 39 -; GCN-NEXT: v_readlane_b32 s8, v0, 40 -; GCN-NEXT: v_readlane_b32 s9, v0, 41 -; GCN-NEXT: v_readlane_b32 s10, v0, 42 -; GCN-NEXT: v_readlane_b32 s11, v0, 43 -; GCN-NEXT: v_readlane_b32 s12, v0, 44 -; GCN-NEXT: v_readlane_b32 s13, v0, 45 -; GCN-NEXT: v_readlane_b32 s14, v0, 46 -; GCN-NEXT: v_readlane_b32 s15, v0, 47 +; GCN-NEXT: v_readlane_b32 s0, v0, 33 +; GCN-NEXT: v_readlane_b32 s1, v0, 34 +; GCN-NEXT: v_readlane_b32 s2, v0, 35 +; GCN-NEXT: v_readlane_b32 s3, v0, 36 +; GCN-NEXT: v_readlane_b32 s4, v0, 37 +; GCN-NEXT: v_readlane_b32 s5, v0, 38 +; GCN-NEXT: v_readlane_b32 s6, v0, 39 +; GCN-NEXT: v_readlane_b32 s7, v0, 40 +; GCN-NEXT: v_readlane_b32 s8, v0, 41 +; GCN-NEXT: v_readlane_b32 s9, v0, 42 +; GCN-NEXT: v_readlane_b32 s10, v0, 43 +; GCN-NEXT: v_readlane_b32 s11, v0, 44 +; GCN-NEXT: v_readlane_b32 s12, v0, 45 +; GCN-NEXT: v_readlane_b32 s13, v0, 46 +; GCN-NEXT: v_readlane_b32 s14, v0, 47 +; GCN-NEXT: v_readlane_b32 s15, v0, 48 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 16 -; GCN-NEXT: v_readlane_b32 s1, v0, 17 -; GCN-NEXT: v_readlane_b32 s2, v0, 18 -; GCN-NEXT: v_readlane_b32 s3, v0, 19 -; GCN-NEXT: v_readlane_b32 s4, v0, 20 -; GCN-NEXT: v_readlane_b32 s5, v0, 21 -; GCN-NEXT: v_readlane_b32 s6, v0, 22 -; GCN-NEXT: v_readlane_b32 s7, v0, 23 -; GCN-NEXT: v_readlane_b32 s8, v0, 24 -; GCN-NEXT: v_readlane_b32 s9, v0, 25 -; GCN-NEXT: v_readlane_b32 s10, v0, 26 -; GCN-NEXT: v_readlane_b32 s11, v0, 27 -; GCN-NEXT: v_readlane_b32 s12, v0, 28 -; GCN-NEXT: v_readlane_b32 s13, v0, 29 -; GCN-NEXT: v_readlane_b32 s14, v0, 30 -; GCN-NEXT: v_readlane_b32 s15, v0, 31 +; GCN-NEXT: v_readlane_b32 s0, v0, 17 +; GCN-NEXT: v_readlane_b32 s1, v0, 18 +; GCN-NEXT: v_readlane_b32 s2, v0, 19 +; GCN-NEXT: v_readlane_b32 s3, v0, 20 +; GCN-NEXT: v_readlane_b32 s4, v0, 21 +; GCN-NEXT: v_readlane_b32 s5, v0, 22 +; GCN-NEXT: v_readlane_b32 s6, v0, 23 +; GCN-NEXT: v_readlane_b32 s7, v0, 24 +; GCN-NEXT: v_readlane_b32 s8, v0, 25 +; GCN-NEXT: v_readlane_b32 s9, v0, 26 +; GCN-NEXT: v_readlane_b32 s10, v0, 27 +; GCN-NEXT: v_readlane_b32 s11, v0, 28 +; GCN-NEXT: v_readlane_b32 s12, v0, 29 +; GCN-NEXT: v_readlane_b32 s13, v0, 30 +; GCN-NEXT: v_readlane_b32 s14, v0, 31 +; GCN-NEXT: v_readlane_b32 s15, v0, 32 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s16, v1, 0 -; GCN-NEXT: v_readlane_b32 s17, v1, 1 -; GCN-NEXT: v_readlane_b32 s18, v1, 2 -; GCN-NEXT: v_readlane_b32 s19, v1, 3 -; GCN-NEXT: v_readlane_b32 s20, v1, 4 -; GCN-NEXT: v_readlane_b32 s21, v1, 5 -; GCN-NEXT: v_readlane_b32 s22, v1, 6 -; GCN-NEXT: v_readlane_b32 s23, v1, 7 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[16:23] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s24, v1, 8 -; GCN-NEXT: v_readlane_b32 s25, v1, 9 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[24:25] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 48 -; GCN-NEXT: v_readlane_b32 s1, v0, 49 -; GCN-NEXT: v_readlane_b32 s2, v0, 50 -; GCN-NEXT: v_readlane_b32 s3, v0, 51 -; GCN-NEXT: v_readlane_b32 s4, v0, 52 -; GCN-NEXT: v_readlane_b32 s5, v0, 53 -; GCN-NEXT: v_readlane_b32 s6, v0, 54 -; GCN-NEXT: v_readlane_b32 s7, v0, 55 -; GCN-NEXT: v_readlane_b32 s8, v0, 56 -; GCN-NEXT: v_readlane_b32 s9, v0, 57 -; GCN-NEXT: v_readlane_b32 s10, v0, 58 -; GCN-NEXT: v_readlane_b32 s11, v0, 59 -; GCN-NEXT: v_readlane_b32 s12, v0, 60 -; GCN-NEXT: v_readlane_b32 s13, v0, 61 -; GCN-NEXT: v_readlane_b32 s14, v0, 62 -; GCN-NEXT: v_readlane_b32 s15, v0, 63 +; GCN-NEXT: v_readlane_b32 s0, v1, 1 +; GCN-NEXT: v_readlane_b32 s1, v1, 2 +; GCN-NEXT: v_readlane_b32 s2, v1, 3 +; GCN-NEXT: v_readlane_b32 s3, v1, 4 +; GCN-NEXT: v_readlane_b32 s4, v1, 5 +; GCN-NEXT: v_readlane_b32 s5, v1, 6 +; GCN-NEXT: v_readlane_b32 s6, v1, 7 +; GCN-NEXT: v_readlane_b32 s7, v1, 8 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_readlane_b32 s0, v1, 9 +; GCN-NEXT: v_readlane_b32 s1, v1, 10 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[0:1] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_readlane_b32 s0, v0, 49 +; GCN-NEXT: v_readlane_b32 s1, v0, 50 +; GCN-NEXT: v_readlane_b32 s2, v0, 51 +; GCN-NEXT: v_readlane_b32 s3, v0, 52 +; GCN-NEXT: v_readlane_b32 s4, v0, 53 +; GCN-NEXT: v_readlane_b32 s5, v0, 54 +; GCN-NEXT: v_readlane_b32 s6, v0, 55 +; GCN-NEXT: v_readlane_b32 s7, v0, 56 +; GCN-NEXT: v_readlane_b32 s8, v0, 57 +; GCN-NEXT: v_readlane_b32 s9, v0, 58 +; GCN-NEXT: v_readlane_b32 s10, v0, 59 +; GCN-NEXT: v_readlane_b32 s11, v0, 60 +; GCN-NEXT: v_readlane_b32 s12, v0, 61 +; GCN-NEXT: v_readlane_b32 s13, v0, 62 +; GCN-NEXT: v_readlane_b32 s14, v0, 63 +; GCN-NEXT: v_readlane_b32 s15, v1, 0 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND @@ -663,13 +667,13 @@ ret: define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 { ; GCN-LABEL: no_vgprs_last_sgpr_spill: ; GCN: ; %bb.0: -; GCN-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 -; GCN-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 -; GCN-NEXT: s_mov_b32 s22, -1 -; GCN-NEXT: s_mov_b32 s23, 0xe8f000 -; GCN-NEXT: s_add_u32 s20, s20, s3 -; GCN-NEXT: s_addc_u32 s21, s21, 0 -; GCN-NEXT: s_load_dword s2, s[0:1], 0xb +; GCN-NEXT: s_mov_b32 s56, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s57, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s58, -1 +; GCN-NEXT: s_mov_b32 s59, 0xe8f000 +; GCN-NEXT: s_add_u32 s56, s56, s3 +; GCN-NEXT: s_addc_u32 s57, s57, 0 +; GCN-NEXT: s_load_dword s0, s[0:1], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART @@ -688,177 +692,179 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[36:51] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v31, s4, 0 -; GCN-NEXT: v_writelane_b32 v31, s5, 1 -; GCN-NEXT: v_writelane_b32 v31, s6, 2 -; GCN-NEXT: v_writelane_b32 v31, s7, 3 -; GCN-NEXT: v_writelane_b32 v31, s8, 4 -; GCN-NEXT: v_writelane_b32 v31, s9, 5 -; GCN-NEXT: v_writelane_b32 v31, s10, 6 -; GCN-NEXT: v_writelane_b32 v31, s11, 7 -; GCN-NEXT: v_writelane_b32 v31, s12, 8 -; GCN-NEXT: v_writelane_b32 v31, s13, 9 -; GCN-NEXT: v_writelane_b32 v31, s14, 10 -; GCN-NEXT: v_writelane_b32 v31, s15, 11 -; GCN-NEXT: v_writelane_b32 v31, s16, 12 -; GCN-NEXT: v_writelane_b32 v31, s17, 13 -; GCN-NEXT: v_writelane_b32 v31, s18, 14 -; GCN-NEXT: v_writelane_b32 v31, s19, 15 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:19] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v31, s4, 16 -; GCN-NEXT: v_writelane_b32 v31, s5, 17 -; GCN-NEXT: v_writelane_b32 v31, s6, 18 -; GCN-NEXT: v_writelane_b32 v31, s7, 19 -; GCN-NEXT: v_writelane_b32 v31, s8, 20 -; GCN-NEXT: v_writelane_b32 v31, s9, 21 -; GCN-NEXT: v_writelane_b32 v31, s10, 22 -; GCN-NEXT: v_writelane_b32 v31, s11, 23 -; GCN-NEXT: v_writelane_b32 v31, s12, 24 -; GCN-NEXT: v_writelane_b32 v31, s13, 25 -; GCN-NEXT: v_writelane_b32 v31, s14, 26 -; GCN-NEXT: v_writelane_b32 v31, s15, 27 -; GCN-NEXT: v_writelane_b32 v31, s16, 28 -; GCN-NEXT: v_writelane_b32 v31, s17, 29 -; GCN-NEXT: v_writelane_b32 v31, s18, 30 -; GCN-NEXT: v_writelane_b32 v31, s19, 31 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:19] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:1] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s3, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_cmp_lg_u32 s2, s3 -; GCN-NEXT: v_writelane_b32 v31, s36, 32 -; GCN-NEXT: v_writelane_b32 v31, s37, 33 -; GCN-NEXT: v_writelane_b32 v31, s38, 34 -; GCN-NEXT: v_writelane_b32 v31, s39, 35 -; GCN-NEXT: v_writelane_b32 v31, s40, 36 -; GCN-NEXT: v_writelane_b32 v31, s41, 37 -; GCN-NEXT: v_writelane_b32 v31, s42, 38 -; GCN-NEXT: v_writelane_b32 v31, s43, 39 -; GCN-NEXT: v_writelane_b32 v31, s44, 40 -; GCN-NEXT: v_writelane_b32 v31, s45, 41 -; GCN-NEXT: v_writelane_b32 v31, s46, 42 -; GCN-NEXT: v_writelane_b32 v31, s47, 43 -; GCN-NEXT: v_writelane_b32 v31, s48, 44 -; GCN-NEXT: v_writelane_b32 v31, s49, 45 -; GCN-NEXT: v_writelane_b32 v31, s50, 46 -; GCN-NEXT: v_writelane_b32 v31, s51, 47 -; GCN-NEXT: v_writelane_b32 v31, s4, 48 -; GCN-NEXT: v_writelane_b32 v31, s5, 49 -; GCN-NEXT: v_writelane_b32 v31, s6, 50 -; GCN-NEXT: v_writelane_b32 v31, s7, 51 -; GCN-NEXT: v_writelane_b32 v31, s8, 52 -; GCN-NEXT: v_writelane_b32 v31, s9, 53 -; GCN-NEXT: v_writelane_b32 v31, s10, 54 -; GCN-NEXT: v_writelane_b32 v31, s11, 55 -; GCN-NEXT: v_writelane_b32 v31, s12, 56 -; GCN-NEXT: v_writelane_b32 v31, s13, 57 -; GCN-NEXT: v_writelane_b32 v31, s14, 58 -; GCN-NEXT: v_writelane_b32 v31, s15, 59 -; GCN-NEXT: v_writelane_b32 v31, s16, 60 -; GCN-NEXT: v_writelane_b32 v31, s17, 61 -; GCN-NEXT: v_writelane_b32 v31, s18, 62 -; GCN-NEXT: v_writelane_b32 v31, s19, 63 -; GCN-NEXT: buffer_store_dword v0, off, s[20:23], 0 -; GCN-NEXT: v_writelane_b32 v0, s0, 0 -; GCN-NEXT: v_writelane_b32 v0, s1, 1 -; GCN-NEXT: s_mov_b64 s[0:1], exec -; GCN-NEXT: s_mov_b64 exec, 3 -; GCN-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[0:1] -; GCN-NEXT: buffer_load_dword v0, off, s[20:23], 0 +; GCN-NEXT: v_writelane_b32 v31, s0, 0 +; GCN-NEXT: v_writelane_b32 v31, s4, 1 +; GCN-NEXT: v_writelane_b32 v31, s5, 2 +; GCN-NEXT: v_writelane_b32 v31, s6, 3 +; GCN-NEXT: v_writelane_b32 v31, s7, 4 +; GCN-NEXT: v_writelane_b32 v31, s8, 5 +; GCN-NEXT: v_writelane_b32 v31, s9, 6 +; GCN-NEXT: v_writelane_b32 v31, s10, 7 +; GCN-NEXT: v_writelane_b32 v31, s11, 8 +; GCN-NEXT: v_writelane_b32 v31, s12, 9 +; GCN-NEXT: v_writelane_b32 v31, s13, 10 +; GCN-NEXT: v_writelane_b32 v31, s14, 11 +; GCN-NEXT: v_writelane_b32 v31, s15, 12 +; GCN-NEXT: v_writelane_b32 v31, s16, 13 +; GCN-NEXT: v_writelane_b32 v31, s17, 14 +; GCN-NEXT: v_writelane_b32 v31, s18, 15 +; GCN-NEXT: v_writelane_b32 v31, s19, 16 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:15] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[16:31] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[34:35] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_mov_b32 s33, 0 +; GCN-NEXT: v_readlane_b32 s52, v31, 0 +; GCN-NEXT: s_cmp_lg_u32 s52, s33 +; GCN-NEXT: v_writelane_b32 v31, s36, 17 +; GCN-NEXT: v_writelane_b32 v31, s37, 18 +; GCN-NEXT: v_writelane_b32 v31, s38, 19 +; GCN-NEXT: v_writelane_b32 v31, s39, 20 +; GCN-NEXT: v_writelane_b32 v31, s40, 21 +; GCN-NEXT: v_writelane_b32 v31, s41, 22 +; GCN-NEXT: v_writelane_b32 v31, s42, 23 +; GCN-NEXT: v_writelane_b32 v31, s43, 24 +; GCN-NEXT: v_writelane_b32 v31, s44, 25 +; GCN-NEXT: v_writelane_b32 v31, s45, 26 +; GCN-NEXT: v_writelane_b32 v31, s46, 27 +; GCN-NEXT: v_writelane_b32 v31, s47, 28 +; GCN-NEXT: v_writelane_b32 v31, s48, 29 +; GCN-NEXT: v_writelane_b32 v31, s49, 30 +; GCN-NEXT: v_writelane_b32 v31, s50, 31 +; GCN-NEXT: v_writelane_b32 v31, s51, 32 +; GCN-NEXT: v_writelane_b32 v31, s0, 33 +; GCN-NEXT: v_writelane_b32 v31, s1, 34 +; GCN-NEXT: v_writelane_b32 v31, s2, 35 +; GCN-NEXT: v_writelane_b32 v31, s3, 36 +; GCN-NEXT: v_writelane_b32 v31, s4, 37 +; GCN-NEXT: v_writelane_b32 v31, s5, 38 +; GCN-NEXT: v_writelane_b32 v31, s6, 39 +; GCN-NEXT: v_writelane_b32 v31, s7, 40 +; GCN-NEXT: v_writelane_b32 v31, s8, 41 +; GCN-NEXT: v_writelane_b32 v31, s9, 42 +; GCN-NEXT: v_writelane_b32 v31, s10, 43 +; GCN-NEXT: v_writelane_b32 v31, s11, 44 +; GCN-NEXT: v_writelane_b32 v31, s12, 45 +; GCN-NEXT: v_writelane_b32 v31, s13, 46 +; GCN-NEXT: v_writelane_b32 v31, s14, 47 +; GCN-NEXT: v_writelane_b32 v31, s15, 48 +; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 +; GCN-NEXT: v_writelane_b32 v0, s16, 0 +; GCN-NEXT: v_writelane_b32 v0, s17, 1 +; GCN-NEXT: v_writelane_b32 v0, s18, 2 +; GCN-NEXT: v_writelane_b32 v0, s19, 3 +; GCN-NEXT: v_writelane_b32 v0, s20, 4 +; GCN-NEXT: v_writelane_b32 v0, s21, 5 +; GCN-NEXT: v_writelane_b32 v0, s22, 6 +; GCN-NEXT: v_writelane_b32 v0, s23, 7 +; GCN-NEXT: v_writelane_b32 v0, s24, 8 +; GCN-NEXT: v_writelane_b32 v0, s25, 9 +; GCN-NEXT: v_writelane_b32 v0, s26, 10 +; GCN-NEXT: v_writelane_b32 v0, s27, 11 +; GCN-NEXT: v_writelane_b32 v0, s28, 12 +; GCN-NEXT: v_writelane_b32 v0, s29, 13 +; GCN-NEXT: v_writelane_b32 v0, s30, 14 +; GCN-NEXT: v_writelane_b32 v0, s31, 15 +; GCN-NEXT: s_mov_b64 s[16:17], exec +; GCN-NEXT: s_mov_b64 exec, 0xffff +; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[16:17] +; GCN-NEXT: v_writelane_b32 v31, s34, 49 +; GCN-NEXT: v_writelane_b32 v31, s35, 50 +; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 ; GCN-NEXT: s_cbranch_scc1 BB2_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v31, 0 -; GCN-NEXT: v_readlane_b32 s1, v31, 1 -; GCN-NEXT: v_readlane_b32 s2, v31, 2 -; GCN-NEXT: v_readlane_b32 s3, v31, 3 -; GCN-NEXT: v_readlane_b32 s4, v31, 4 -; GCN-NEXT: v_readlane_b32 s5, v31, 5 -; GCN-NEXT: v_readlane_b32 s6, v31, 6 -; GCN-NEXT: v_readlane_b32 s7, v31, 7 -; GCN-NEXT: v_readlane_b32 s8, v31, 8 -; GCN-NEXT: v_readlane_b32 s9, v31, 9 -; GCN-NEXT: v_readlane_b32 s10, v31, 10 -; GCN-NEXT: v_readlane_b32 s11, v31, 11 -; GCN-NEXT: v_readlane_b32 s12, v31, 12 -; GCN-NEXT: v_readlane_b32 s13, v31, 13 -; GCN-NEXT: v_readlane_b32 s14, v31, 14 -; GCN-NEXT: v_readlane_b32 s15, v31, 15 +; GCN-NEXT: v_readlane_b32 s0, v31, 1 +; GCN-NEXT: v_readlane_b32 s1, v31, 2 +; GCN-NEXT: v_readlane_b32 s2, v31, 3 +; GCN-NEXT: v_readlane_b32 s3, v31, 4 +; GCN-NEXT: v_readlane_b32 s4, v31, 5 +; GCN-NEXT: v_readlane_b32 s5, v31, 6 +; GCN-NEXT: v_readlane_b32 s6, v31, 7 +; GCN-NEXT: v_readlane_b32 s7, v31, 8 +; GCN-NEXT: v_readlane_b32 s8, v31, 9 +; GCN-NEXT: v_readlane_b32 s9, v31, 10 +; GCN-NEXT: v_readlane_b32 s10, v31, 11 +; GCN-NEXT: v_readlane_b32 s11, v31, 12 +; GCN-NEXT: v_readlane_b32 s12, v31, 13 +; GCN-NEXT: v_readlane_b32 s13, v31, 14 +; GCN-NEXT: v_readlane_b32 s14, v31, 15 +; GCN-NEXT: v_readlane_b32 s15, v31, 16 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 32 -; GCN-NEXT: v_readlane_b32 s1, v31, 33 -; GCN-NEXT: v_readlane_b32 s2, v31, 34 -; GCN-NEXT: v_readlane_b32 s3, v31, 35 -; GCN-NEXT: v_readlane_b32 s4, v31, 36 -; GCN-NEXT: v_readlane_b32 s5, v31, 37 -; GCN-NEXT: v_readlane_b32 s6, v31, 38 -; GCN-NEXT: v_readlane_b32 s7, v31, 39 -; GCN-NEXT: v_readlane_b32 s8, v31, 40 -; GCN-NEXT: v_readlane_b32 s9, v31, 41 -; GCN-NEXT: v_readlane_b32 s10, v31, 42 -; GCN-NEXT: v_readlane_b32 s11, v31, 43 -; GCN-NEXT: v_readlane_b32 s12, v31, 44 -; GCN-NEXT: v_readlane_b32 s13, v31, 45 -; GCN-NEXT: v_readlane_b32 s14, v31, 46 -; GCN-NEXT: v_readlane_b32 s15, v31, 47 +; GCN-NEXT: v_readlane_b32 s0, v31, 17 +; GCN-NEXT: v_readlane_b32 s1, v31, 18 +; GCN-NEXT: v_readlane_b32 s2, v31, 19 +; GCN-NEXT: v_readlane_b32 s3, v31, 20 +; GCN-NEXT: v_readlane_b32 s4, v31, 21 +; GCN-NEXT: v_readlane_b32 s5, v31, 22 +; GCN-NEXT: v_readlane_b32 s6, v31, 23 +; GCN-NEXT: v_readlane_b32 s7, v31, 24 +; GCN-NEXT: v_readlane_b32 s8, v31, 25 +; GCN-NEXT: v_readlane_b32 s9, v31, 26 +; GCN-NEXT: v_readlane_b32 s10, v31, 27 +; GCN-NEXT: v_readlane_b32 s11, v31, 28 +; GCN-NEXT: v_readlane_b32 s12, v31, 29 +; GCN-NEXT: v_readlane_b32 s13, v31, 30 +; GCN-NEXT: v_readlane_b32 s14, v31, 31 +; GCN-NEXT: v_readlane_b32 s15, v31, 32 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 16 -; GCN-NEXT: v_readlane_b32 s1, v31, 17 -; GCN-NEXT: v_readlane_b32 s2, v31, 18 -; GCN-NEXT: v_readlane_b32 s3, v31, 19 -; GCN-NEXT: v_readlane_b32 s4, v31, 20 -; GCN-NEXT: v_readlane_b32 s5, v31, 21 -; GCN-NEXT: v_readlane_b32 s6, v31, 22 -; GCN-NEXT: v_readlane_b32 s7, v31, 23 -; GCN-NEXT: v_readlane_b32 s8, v31, 24 -; GCN-NEXT: v_readlane_b32 s9, v31, 25 -; GCN-NEXT: v_readlane_b32 s10, v31, 26 -; GCN-NEXT: v_readlane_b32 s11, v31, 27 -; GCN-NEXT: v_readlane_b32 s12, v31, 28 -; GCN-NEXT: v_readlane_b32 s13, v31, 29 -; GCN-NEXT: v_readlane_b32 s14, v31, 30 -; GCN-NEXT: v_readlane_b32 s15, v31, 31 +; GCN-NEXT: v_readlane_b32 s0, v31, 33 +; GCN-NEXT: v_readlane_b32 s1, v31, 34 +; GCN-NEXT: v_readlane_b32 s2, v31, 35 +; GCN-NEXT: v_readlane_b32 s3, v31, 36 +; GCN-NEXT: v_readlane_b32 s4, v31, 37 +; GCN-NEXT: v_readlane_b32 s5, v31, 38 +; GCN-NEXT: v_readlane_b32 s6, v31, 39 +; GCN-NEXT: v_readlane_b32 s7, v31, 40 +; GCN-NEXT: v_readlane_b32 s8, v31, 41 +; GCN-NEXT: v_readlane_b32 s9, v31, 42 +; GCN-NEXT: v_readlane_b32 s10, v31, 43 +; GCN-NEXT: v_readlane_b32 s11, v31, 44 +; GCN-NEXT: v_readlane_b32 s12, v31, 45 +; GCN-NEXT: v_readlane_b32 s13, v31, 46 +; GCN-NEXT: v_readlane_b32 s14, v31, 47 +; GCN-NEXT: v_readlane_b32 s15, v31, 48 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 48 -; GCN-NEXT: v_readlane_b32 s1, v31, 49 -; GCN-NEXT: v_readlane_b32 s2, v31, 50 -; GCN-NEXT: v_readlane_b32 s3, v31, 51 -; GCN-NEXT: v_readlane_b32 s4, v31, 52 -; GCN-NEXT: v_readlane_b32 s5, v31, 53 -; GCN-NEXT: v_readlane_b32 s6, v31, 54 -; GCN-NEXT: v_readlane_b32 s7, v31, 55 -; GCN-NEXT: v_readlane_b32 s8, v31, 56 -; GCN-NEXT: v_readlane_b32 s9, v31, 57 -; GCN-NEXT: v_readlane_b32 s10, v31, 58 -; GCN-NEXT: v_readlane_b32 s11, v31, 59 -; GCN-NEXT: v_readlane_b32 s12, v31, 60 -; GCN-NEXT: v_readlane_b32 s13, v31, 61 -; GCN-NEXT: v_readlane_b32 s14, v31, 62 -; GCN-NEXT: v_readlane_b32 s15, v31, 63 +; GCN-NEXT: s_mov_b64 s[0:1], exec +; GCN-NEXT: s_mov_b64 exec, 0xffff +; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[0:1] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readlane_b32 s0, v0, 0 +; GCN-NEXT: v_readlane_b32 s1, v0, 1 +; GCN-NEXT: v_readlane_b32 s2, v0, 2 +; GCN-NEXT: v_readlane_b32 s3, v0, 3 +; GCN-NEXT: v_readlane_b32 s4, v0, 4 +; GCN-NEXT: v_readlane_b32 s5, v0, 5 +; GCN-NEXT: v_readlane_b32 s6, v0, 6 +; GCN-NEXT: v_readlane_b32 s7, v0, 7 +; GCN-NEXT: v_readlane_b32 s8, v0, 8 +; GCN-NEXT: v_readlane_b32 s9, v0, 9 +; GCN-NEXT: v_readlane_b32 s10, v0, 10 +; GCN-NEXT: v_readlane_b32 s11, v0, 11 +; GCN-NEXT: v_readlane_b32 s12, v0, 12 +; GCN-NEXT: v_readlane_b32 s13, v0, 13 +; GCN-NEXT: v_readlane_b32 s14, v0, 14 +; GCN-NEXT: v_readlane_b32 s15, v0, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b64 s[16:17], exec -; GCN-NEXT: s_mov_b64 exec, 3 -; GCN-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s16, v0, 0 -; GCN-NEXT: v_readlane_b32 s17, v0, 1 +; GCN-NEXT: v_readlane_b32 s0, v31, 49 +; GCN-NEXT: v_readlane_b32 s1, v31, 50 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[16:17] +; GCN-NEXT: ; use s[0:1] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: BB2_2: ; %ret ; GCN-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll index a03318ead716c2..9b629a5f911103 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll @@ -77,6 +77,101 @@ endif: ; preds = %else, %if ret void } +; Force save and restore of m0 during SMEM spill +; GCN-LABEL: {{^}}m0_unavailable_spill: + +; GCN: ; def m0, 1 + +; GCN: s_mov_b32 m0, s0 +; GCN: v_interp_mov_f32 + +; GCN: ; clobber m0 + +; TOSMEM: s_mov_b32 s2, m0 +; TOSMEM: s_add_u32 m0, s3, 0x100 +; TOSMEM-NEXT: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill +; TOSMEM: s_mov_b32 m0, s2 + +; TOSMEM: s_mov_b64 exec, +; TOSMEM: s_cbranch_execz +; TOSMEM: s_branch + +; TOSMEM: BB{{[0-9]+_[0-9]+}}: +; TOSMEM: s_add_u32 m0, s3, 0x100 +; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload + +; GCN-NOT: v_readlane_b32 m0 +; GCN-NOT: s_buffer_store_dword m0 +; GCN-NOT: s_buffer_load_dword m0 +define amdgpu_kernel void @m0_unavailable_spill(i32 %m0.arg) #0 { +main_body: + %m0 = call i32 asm sideeffect "; def $0, 1", "={m0}"() #0 + %tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0.arg) + call void asm sideeffect "; clobber $0", "~{m0}"() #0 + %cmp = fcmp ueq float 0.000000e+00, %tmp + br i1 %cmp, label %if, label %else + +if: ; preds = %main_body + store volatile i32 8, i32 addrspace(1)* undef + br label %endif + +else: ; preds = %main_body + store volatile i32 11, i32 addrspace(1)* undef + br label %endif + +endif: + ret void +} + +; GCN-LABEL: {{^}}restore_m0_lds: +; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]] +; TOSMEM: s_cmp_eq_u32 +; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it +; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_add_u32 m0, s3, 0x100 +; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill +; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_add_u32 m0, s3, 0x200 +; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill +; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_cbranch_scc1 + +; TOSMEM: s_mov_b32 m0, -1 + +; TOSMEM: s_mov_b32 s2, m0 +; TOSMEM: s_add_u32 m0, s3, 0x200 +; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload +; TOSMEM: s_mov_b32 m0, s2 +; TOSMEM: s_waitcnt lgkmcnt(0) + +; TOSMEM: ds_write_b64 + +; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_add_u32 m0, s3, 0x100 +; TOSMEM: s_buffer_load_dword s2, s[88:91], m0 ; 4-byte Folded Reload +; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_waitcnt lgkmcnt(0) +; TOSMEM-NOT: m0 +; TOSMEM: s_mov_b32 m0, s2 +; TOSMEM: ; use m0 + +; TOSMEM: s_dcache_wb +; TOSMEM: s_endpgm +define amdgpu_kernel void @restore_m0_lds(i32 %arg) { + %m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0 + %sval = load volatile i64, i64 addrspace(4)* undef + %cmp = icmp eq i32 %arg, 0 + br i1 %cmp, label %ret, label %bb + +bb: + store volatile i64 %sval, i64 addrspace(3)* undef + call void asm sideeffect "; use $0", "{m0}"(i32 %m0) #0 + br label %ret + +ret: + ret void +} + declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll index c69a9f58965e71..5fcbfea19971e0 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll @@ -90,10 +90,10 @@ define i32 @called(i32 %a) noinline { } define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) { -; GFX9-O0: v_mov_b32_e32 v0, s2 +; GFX9-O0: v_mov_b32_e32 v0, s0 ; GFX9-O3: v_mov_b32_e32 v2, s0 ; GFX9-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s1 ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_not_b64 exec, exec %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) @@ -136,8 +136,8 @@ define amdgpu_kernel void @call_i64(<4 x i32> inreg %tmp14, i64 inreg %arg) { ; GFX9-O0: buffer_store_dword v1 ; GFX9: s_swappc_b64 %tmp134 = call i64 @called_i64(i64 %tmp107) -; GFX9-O0: buffer_load_dword v6 -; GFX9-O0: buffer_load_dword v7 +; GFX9-O0: buffer_load_dword v4 +; GFX9-O0: buffer_load_dword v5 %tmp136 = add i64 %tmp134, %tmp107 %tmp137 = tail call i64 @llvm.amdgcn.wwm.i64(i64 %tmp136) %tmp138 = bitcast i64 %tmp137 to <2 x i32> diff --git a/llvm/test/CodeGen/ARM/legalize-bitcast.ll b/llvm/test/CodeGen/ARM/legalize-bitcast.ll index 478ff985bf4755..529775df5fd7d0 100644 --- a/llvm/test/CodeGen/ARM/legalize-bitcast.ll +++ b/llvm/test/CodeGen/ARM/legalize-bitcast.ll @@ -49,9 +49,9 @@ define i16 @int_to_vec(i80 %in) { ; CHECK-NEXT: vmov.32 d16[0], r0 ; CHECK-NEXT: @ implicit-def: $q9 ; CHECK-NEXT: vmov.f64 d18, d16 -; CHECK-NEXT: vrev32.16 q9, q9 -; CHECK-NEXT: @ kill: def $d18 killed $d18 killed $q9 -; CHECK-NEXT: vmov.u16 r0, d18[0] +; CHECK-NEXT: vrev32.16 q8, q9 +; CHECK-NEXT: @ kill: def $d16 killed $d16 killed $q8 +; CHECK-NEXT: vmov.u16 r0, d16[0] ; CHECK-NEXT: bx lr %vec = bitcast i80 %in to <5 x i16> %e0 = extractelement <5 x i16> %vec, i32 0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll index c63f24ea692ce7..a98c6eb9fd6cb8 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll @@ -235,15 +235,15 @@ define i32 @f64tou32(double %a) { ; FP32-NEXT: mfc1 $1, $f0 ; FP32-NEXT: lui $2, 16864 ; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f2 -; FP32-NEXT: mtc1 $2, $f3 -; FP32-NEXT: sub.d $f4, $f12, $f2 -; FP32-NEXT: trunc.w.d $f0, $f4 -; FP32-NEXT: mfc1 $2, $f0 +; FP32-NEXT: mtc1 $3, $f0 +; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: sub.d $f2, $f12, $f0 +; FP32-NEXT: trunc.w.d $f2, $f2 +; FP32-NEXT: mfc1 $2, $f2 ; FP32-NEXT: lui $3, 32768 ; FP32-NEXT: xor $2, $2, $3 ; FP32-NEXT: addiu $3, $zero, 1 -; FP32-NEXT: c.ult.d $f12, $f2 +; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 ; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $2, $1, $3 @@ -256,15 +256,15 @@ define i32 @f64tou32(double %a) { ; FP64-NEXT: mfc1 $1, $f0 ; FP64-NEXT: lui $2, 16864 ; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f1 -; FP64-NEXT: mthc1 $2, $f1 -; FP64-NEXT: sub.d $f2, $f12, $f1 -; FP64-NEXT: trunc.w.d $f0, $f2 -; FP64-NEXT: mfc1 $2, $f0 +; FP64-NEXT: mtc1 $3, $f0 +; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: sub.d $f1, $f12, $f0 +; FP64-NEXT: trunc.w.d $f1, $f1 +; FP64-NEXT: mfc1 $2, $f1 ; FP64-NEXT: lui $3, 32768 ; FP64-NEXT: xor $2, $2, $3 ; FP64-NEXT: addiu $3, $zero, 1 -; FP64-NEXT: c.ult.d $f12, $f1 +; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 ; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $2, $1, $3 @@ -282,15 +282,15 @@ define zeroext i16 @f64tou16(double %a) { ; FP32-NEXT: mfc1 $1, $f0 ; FP32-NEXT: lui $2, 16864 ; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f2 -; FP32-NEXT: mtc1 $2, $f3 -; FP32-NEXT: sub.d $f4, $f12, $f2 -; FP32-NEXT: trunc.w.d $f0, $f4 -; FP32-NEXT: mfc1 $2, $f0 +; FP32-NEXT: mtc1 $3, $f0 +; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: sub.d $f2, $f12, $f0 +; FP32-NEXT: trunc.w.d $f2, $f2 +; FP32-NEXT: mfc1 $2, $f2 ; FP32-NEXT: lui $3, 32768 ; FP32-NEXT: xor $2, $2, $3 ; FP32-NEXT: addiu $3, $zero, 1 -; FP32-NEXT: c.ult.d $f12, $f2 +; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 ; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $2, $1, $3 @@ -304,15 +304,15 @@ define zeroext i16 @f64tou16(double %a) { ; FP64-NEXT: mfc1 $1, $f0 ; FP64-NEXT: lui $2, 16864 ; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f1 -; FP64-NEXT: mthc1 $2, $f1 -; FP64-NEXT: sub.d $f2, $f12, $f1 -; FP64-NEXT: trunc.w.d $f0, $f2 -; FP64-NEXT: mfc1 $2, $f0 +; FP64-NEXT: mtc1 $3, $f0 +; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: sub.d $f1, $f12, $f0 +; FP64-NEXT: trunc.w.d $f1, $f1 +; FP64-NEXT: mfc1 $2, $f1 ; FP64-NEXT: lui $3, 32768 ; FP64-NEXT: xor $2, $2, $3 ; FP64-NEXT: addiu $3, $zero, 1 -; FP64-NEXT: c.ult.d $f12, $f1 +; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 ; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $2, $1, $3 @@ -331,15 +331,15 @@ define zeroext i8 @f64tou8(double %a) { ; FP32-NEXT: mfc1 $1, $f0 ; FP32-NEXT: lui $2, 16864 ; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f2 -; FP32-NEXT: mtc1 $2, $f3 -; FP32-NEXT: sub.d $f4, $f12, $f2 -; FP32-NEXT: trunc.w.d $f0, $f4 -; FP32-NEXT: mfc1 $2, $f0 +; FP32-NEXT: mtc1 $3, $f0 +; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: sub.d $f2, $f12, $f0 +; FP32-NEXT: trunc.w.d $f2, $f2 +; FP32-NEXT: mfc1 $2, $f2 ; FP32-NEXT: lui $3, 32768 ; FP32-NEXT: xor $2, $2, $3 ; FP32-NEXT: addiu $3, $zero, 1 -; FP32-NEXT: c.ult.d $f12, $f2 +; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 ; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $2, $1, $3 @@ -353,15 +353,15 @@ define zeroext i8 @f64tou8(double %a) { ; FP64-NEXT: mfc1 $1, $f0 ; FP64-NEXT: lui $2, 16864 ; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f1 -; FP64-NEXT: mthc1 $2, $f1 -; FP64-NEXT: sub.d $f2, $f12, $f1 -; FP64-NEXT: trunc.w.d $f0, $f2 -; FP64-NEXT: mfc1 $2, $f0 +; FP64-NEXT: mtc1 $3, $f0 +; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: sub.d $f1, $f12, $f0 +; FP64-NEXT: trunc.w.d $f1, $f1 +; FP64-NEXT: mfc1 $2, $f1 ; FP64-NEXT: lui $3, 32768 ; FP64-NEXT: xor $2, $2, $3 ; FP64-NEXT: addiu $3, $zero, 1 -; FP64-NEXT: c.ult.d $f12, $f1 +; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 ; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $2, $1, $3 diff --git a/llvm/test/CodeGen/Mips/atomic-min-max.ll b/llvm/test/CodeGen/Mips/atomic-min-max.ll index a6200851940cd4..646af650c00e79 100644 --- a/llvm/test/CodeGen/Mips/atomic-min-max.ll +++ b/llvm/test/CodeGen/Mips/atomic-min-max.ll @@ -1154,26 +1154,26 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 65535 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB4_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: slt $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movn $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB4_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: slt $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movn $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB4_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1194,26 +1194,26 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 65535 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB4_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: slt $11, $8, $5 -; MIPS64R6-NEXT: seleqz $9, $8, $11 -; MIPS64R6-NEXT: selnez $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB4_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: slt $10, $7, $5 +; MIPS64R6-NEXT: seleqz $8, $7, $10 +; MIPS64R6-NEXT: selnez $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB4_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1232,28 +1232,28 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 65535 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB4_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movn $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB4_1 +; MIPS64EL-NEXT: slt $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movn $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB4_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1273,28 +1273,28 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB4_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $11, $8, $5 -; MIPS64ELR6-NEXT: seleqz $9, $8, $11 -; MIPS64ELR6-NEXT: selnez $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB4_1 +; MIPS64ELR6-NEXT: slt $10, $7, $5 +; MIPS64ELR6-NEXT: seleqz $8, $7, $10 +; MIPS64ELR6-NEXT: selnez $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB4_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1635,26 +1635,26 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 65535 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB5_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: slt $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movz $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB5_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: slt $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movz $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB5_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1675,26 +1675,26 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 65535 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB5_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: slt $11, $8, $5 -; MIPS64R6-NEXT: selnez $9, $8, $11 -; MIPS64R6-NEXT: seleqz $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB5_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: slt $10, $7, $5 +; MIPS64R6-NEXT: selnez $8, $7, $10 +; MIPS64R6-NEXT: seleqz $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB5_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1713,28 +1713,28 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 65535 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB5_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movz $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB5_1 +; MIPS64EL-NEXT: slt $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movz $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB5_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1754,28 +1754,28 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB5_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $11, $8, $5 -; MIPS64ELR6-NEXT: selnez $9, $8, $11 -; MIPS64ELR6-NEXT: seleqz $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB5_1 +; MIPS64ELR6-NEXT: slt $10, $7, $5 +; MIPS64ELR6-NEXT: selnez $8, $7, $10 +; MIPS64ELR6-NEXT: seleqz $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB5_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2116,26 +2116,26 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 65535 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB6_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: sltu $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movn $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB6_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: sltu $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movn $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB6_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2156,26 +2156,26 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 65535 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB6_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: sltu $11, $8, $5 -; MIPS64R6-NEXT: seleqz $9, $8, $11 -; MIPS64R6-NEXT: selnez $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB6_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: sltu $10, $7, $5 +; MIPS64R6-NEXT: seleqz $8, $7, $10 +; MIPS64R6-NEXT: selnez $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB6_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2194,28 +2194,28 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 65535 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB6_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movn $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB6_1 +; MIPS64EL-NEXT: sltu $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movn $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB6_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2235,28 +2235,28 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB6_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $11, $8, $5 -; MIPS64ELR6-NEXT: seleqz $9, $8, $11 -; MIPS64ELR6-NEXT: selnez $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB6_1 +; MIPS64ELR6-NEXT: sltu $10, $7, $5 +; MIPS64ELR6-NEXT: seleqz $8, $7, $10 +; MIPS64ELR6-NEXT: selnez $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB6_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2597,26 +2597,26 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 65535 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB7_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: sltu $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movz $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB7_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: sltu $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movz $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB7_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2637,26 +2637,26 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 65535 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB7_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: sltu $11, $8, $5 -; MIPS64R6-NEXT: selnez $9, $8, $11 -; MIPS64R6-NEXT: seleqz $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB7_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: sltu $10, $7, $5 +; MIPS64R6-NEXT: selnez $8, $7, $10 +; MIPS64R6-NEXT: seleqz $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB7_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2675,28 +2675,28 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 65535 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB7_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movz $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB7_1 +; MIPS64EL-NEXT: sltu $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movz $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB7_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2716,28 +2716,28 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB7_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $11, $8, $5 -; MIPS64ELR6-NEXT: selnez $9, $8, $11 -; MIPS64ELR6-NEXT: seleqz $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB7_1 +; MIPS64ELR6-NEXT: sltu $10, $7, $5 +; MIPS64ELR6-NEXT: selnez $8, $7, $10 +; MIPS64ELR6-NEXT: seleqz $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB7_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3079,26 +3079,26 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 255 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB8_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: slt $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movn $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB8_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: slt $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movn $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB8_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3119,26 +3119,26 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 255 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB8_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: slt $11, $8, $5 -; MIPS64R6-NEXT: seleqz $9, $8, $11 -; MIPS64R6-NEXT: selnez $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB8_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: slt $10, $7, $5 +; MIPS64R6-NEXT: seleqz $8, $7, $10 +; MIPS64R6-NEXT: selnez $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB8_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3157,28 +3157,28 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 255 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB8_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movn $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB8_1 +; MIPS64EL-NEXT: slt $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movn $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB8_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3198,28 +3198,28 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 255 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB8_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $11, $8, $5 -; MIPS64ELR6-NEXT: seleqz $9, $8, $11 -; MIPS64ELR6-NEXT: selnez $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB8_1 +; MIPS64ELR6-NEXT: slt $10, $7, $5 +; MIPS64ELR6-NEXT: seleqz $8, $7, $10 +; MIPS64ELR6-NEXT: selnez $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB8_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3560,26 +3560,26 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 255 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB9_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: slt $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movz $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB9_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: slt $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movz $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB9_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3600,26 +3600,26 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 255 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB9_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: slt $11, $8, $5 -; MIPS64R6-NEXT: selnez $9, $8, $11 -; MIPS64R6-NEXT: seleqz $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB9_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: slt $10, $7, $5 +; MIPS64R6-NEXT: selnez $8, $7, $10 +; MIPS64R6-NEXT: seleqz $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB9_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3638,28 +3638,28 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 255 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB9_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movz $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB9_1 +; MIPS64EL-NEXT: slt $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movz $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB9_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3679,28 +3679,28 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 255 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB9_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $11, $8, $5 -; MIPS64ELR6-NEXT: selnez $9, $8, $11 -; MIPS64ELR6-NEXT: seleqz $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB9_1 +; MIPS64ELR6-NEXT: slt $10, $7, $5 +; MIPS64ELR6-NEXT: selnez $8, $7, $10 +; MIPS64ELR6-NEXT: seleqz $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB9_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4041,26 +4041,26 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 255 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB10_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: sltu $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movn $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB10_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: sltu $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movn $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB10_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4081,26 +4081,26 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 255 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB10_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: sltu $11, $8, $5 -; MIPS64R6-NEXT: seleqz $9, $8, $11 -; MIPS64R6-NEXT: selnez $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB10_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: sltu $10, $7, $5 +; MIPS64R6-NEXT: seleqz $8, $7, $10 +; MIPS64R6-NEXT: selnez $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB10_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4119,28 +4119,28 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 255 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB10_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movn $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB10_1 +; MIPS64EL-NEXT: sltu $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movn $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB10_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4160,28 +4160,28 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 255 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB10_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $11, $8, $5 -; MIPS64ELR6-NEXT: seleqz $9, $8, $11 -; MIPS64ELR6-NEXT: selnez $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB10_1 +; MIPS64ELR6-NEXT: sltu $10, $7, $5 +; MIPS64ELR6-NEXT: seleqz $8, $7, $10 +; MIPS64ELR6-NEXT: selnez $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB10_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4522,26 +4522,26 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 255 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB11_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: sltu $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movz $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB11_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: sltu $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movz $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB11_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4562,26 +4562,26 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 255 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB11_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: sltu $11, $8, $5 -; MIPS64R6-NEXT: selnez $9, $8, $11 -; MIPS64R6-NEXT: seleqz $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB11_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: sltu $10, $7, $5 +; MIPS64R6-NEXT: selnez $8, $7, $10 +; MIPS64R6-NEXT: seleqz $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB11_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4600,28 +4600,28 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 255 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB11_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movz $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB11_1 +; MIPS64EL-NEXT: sltu $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movz $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB11_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4641,28 +4641,28 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 255 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB11_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $11, $8, $5 -; MIPS64ELR6-NEXT: selnez $9, $8, $11 -; MIPS64ELR6-NEXT: seleqz $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB11_1 +; MIPS64ELR6-NEXT: sltu $10, $7, $5 +; MIPS64ELR6-NEXT: selnez $8, $7, $10 +; MIPS64ELR6-NEXT: seleqz $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB11_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/Mips/atomic.ll b/llvm/test/CodeGen/Mips/atomic.ll index 3846fda47b1389..59ff83e4969cce 100644 --- a/llvm/test/CodeGen/Mips/atomic.ll +++ b/llvm/test/CodeGen/Mips/atomic.ll @@ -2559,28 +2559,28 @@ define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind { ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $4, $4, $3 +; MIPS64R6O0-NEXT: andi $1, $1, 3 +; MIPS64R6O0-NEXT: xori $1, $1, 3 +; MIPS64R6O0-NEXT: sll $1, $1, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 255 +; MIPS64R6O0-NEXT: sllv $3, $3, $1 +; MIPS64R6O0-NEXT: nor $5, $zero, $3 +; MIPS64R6O0-NEXT: sllv $4, $4, $1 ; MIPS64R6O0-NEXT: .LBB8_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($2) -; MIPS64R6O0-NEXT: addu $9, $8, $4 -; MIPS64R6O0-NEXT: and $9, $9, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($2) -; MIPS64R6O0-NEXT: beqzc $10, .LBB8_1 +; MIPS64R6O0-NEXT: ll $7, 0($2) +; MIPS64R6O0-NEXT: addu $8, $7, $4 +; MIPS64R6O0-NEXT: and $8, $8, $3 +; MIPS64R6O0-NEXT: and $9, $7, $5 +; MIPS64R6O0-NEXT: or $9, $9, $8 +; MIPS64R6O0-NEXT: sc $9, 0($2) +; MIPS64R6O0-NEXT: beqzc $9, .LBB8_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: and $6, $7, $3 +; MIPS64R6O0-NEXT: srlv $6, $6, $1 +; MIPS64R6O0-NEXT: seb $6, $6 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -3075,28 +3075,28 @@ define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind { ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $4, $4, $3 +; MIPS64R6O0-NEXT: andi $1, $1, 3 +; MIPS64R6O0-NEXT: xori $1, $1, 3 +; MIPS64R6O0-NEXT: sll $1, $1, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 255 +; MIPS64R6O0-NEXT: sllv $3, $3, $1 +; MIPS64R6O0-NEXT: nor $5, $zero, $3 +; MIPS64R6O0-NEXT: sllv $4, $4, $1 ; MIPS64R6O0-NEXT: .LBB9_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($2) -; MIPS64R6O0-NEXT: subu $9, $8, $4 -; MIPS64R6O0-NEXT: and $9, $9, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($2) -; MIPS64R6O0-NEXT: beqzc $10, .LBB9_1 +; MIPS64R6O0-NEXT: ll $7, 0($2) +; MIPS64R6O0-NEXT: subu $8, $7, $4 +; MIPS64R6O0-NEXT: and $8, $8, $3 +; MIPS64R6O0-NEXT: and $9, $7, $5 +; MIPS64R6O0-NEXT: or $9, $9, $8 +; MIPS64R6O0-NEXT: sc $9, 0($2) +; MIPS64R6O0-NEXT: beqzc $9, .LBB9_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: and $6, $7, $3 +; MIPS64R6O0-NEXT: srlv $6, $6, $1 +; MIPS64R6O0-NEXT: seb $6, $6 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -3601,29 +3601,29 @@ define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind { ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $4, $4, $3 +; MIPS64R6O0-NEXT: andi $1, $1, 3 +; MIPS64R6O0-NEXT: xori $1, $1, 3 +; MIPS64R6O0-NEXT: sll $1, $1, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 255 +; MIPS64R6O0-NEXT: sllv $3, $3, $1 +; MIPS64R6O0-NEXT: nor $5, $zero, $3 +; MIPS64R6O0-NEXT: sllv $4, $4, $1 ; MIPS64R6O0-NEXT: .LBB10_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($2) -; MIPS64R6O0-NEXT: and $9, $8, $4 -; MIPS64R6O0-NEXT: nor $9, $zero, $9 -; MIPS64R6O0-NEXT: and $9, $9, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($2) -; MIPS64R6O0-NEXT: beqzc $10, .LBB10_1 +; MIPS64R6O0-NEXT: ll $7, 0($2) +; MIPS64R6O0-NEXT: and $8, $7, $4 +; MIPS64R6O0-NEXT: nor $8, $zero, $8 +; MIPS64R6O0-NEXT: and $8, $8, $3 +; MIPS64R6O0-NEXT: and $9, $7, $5 +; MIPS64R6O0-NEXT: or $9, $9, $8 +; MIPS64R6O0-NEXT: sc $9, 0($2) +; MIPS64R6O0-NEXT: beqzc $9, .LBB10_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: and $6, $7, $3 +; MIPS64R6O0-NEXT: srlv $6, $6, $1 +; MIPS64R6O0-NEXT: seb $6, $6 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -4115,27 +4115,27 @@ define signext i8 @AtomicSwap8(i8 signext %newval) nounwind { ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $4, $4, $3 +; MIPS64R6O0-NEXT: andi $1, $1, 3 +; MIPS64R6O0-NEXT: xori $1, $1, 3 +; MIPS64R6O0-NEXT: sll $1, $1, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 255 +; MIPS64R6O0-NEXT: sllv $3, $3, $1 +; MIPS64R6O0-NEXT: nor $5, $zero, $3 +; MIPS64R6O0-NEXT: sllv $4, $4, $1 ; MIPS64R6O0-NEXT: .LBB11_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($2) -; MIPS64R6O0-NEXT: and $9, $4, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($2) -; MIPS64R6O0-NEXT: beqzc $10, .LBB11_1 +; MIPS64R6O0-NEXT: ll $7, 0($2) +; MIPS64R6O0-NEXT: and $8, $4, $3 +; MIPS64R6O0-NEXT: and $9, $7, $5 +; MIPS64R6O0-NEXT: or $9, $9, $8 +; MIPS64R6O0-NEXT: sc $9, 0($2) +; MIPS64R6O0-NEXT: beqzc $9, .LBB11_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: and $6, $7, $3 +; MIPS64R6O0-NEXT: srlv $6, $6, $1 +; MIPS64R6O0-NEXT: seb $6, $6 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -4666,32 +4666,32 @@ define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwi ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $6, $zero, 255 -; MIPS64R6O0-NEXT: sllv $6, $6, $3 -; MIPS64R6O0-NEXT: nor $7, $zero, $6 +; MIPS64R6O0-NEXT: andi $1, $1, 3 +; MIPS64R6O0-NEXT: xori $1, $1, 3 +; MIPS64R6O0-NEXT: sll $1, $1, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 255 +; MIPS64R6O0-NEXT: sllv $3, $3, $1 +; MIPS64R6O0-NEXT: nor $6, $zero, $3 ; MIPS64R6O0-NEXT: andi $4, $4, 255 -; MIPS64R6O0-NEXT: sllv $4, $4, $3 +; MIPS64R6O0-NEXT: sllv $4, $4, $1 ; MIPS64R6O0-NEXT: andi $5, $5, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 +; MIPS64R6O0-NEXT: sllv $5, $5, $1 ; MIPS64R6O0-NEXT: .LBB12_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $9, 0($2) -; MIPS64R6O0-NEXT: and $10, $9, $6 -; MIPS64R6O0-NEXT: bnec $10, $4, .LBB12_3 +; MIPS64R6O0-NEXT: ll $8, 0($2) +; MIPS64R6O0-NEXT: and $9, $8, $3 +; MIPS64R6O0-NEXT: bnec $9, $4, .LBB12_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 -; MIPS64R6O0-NEXT: and $9, $9, $7 -; MIPS64R6O0-NEXT: or $9, $9, $5 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB12_1 +; MIPS64R6O0-NEXT: and $8, $8, $6 +; MIPS64R6O0-NEXT: or $8, $8, $5 +; MIPS64R6O0-NEXT: sc $8, 0($2) +; MIPS64R6O0-NEXT: beqzc $8, .LBB12_1 ; MIPS64R6O0-NEXT: .LBB12_3: # %entry -; MIPS64R6O0-NEXT: srlv $8, $10, $3 -; MIPS64R6O0-NEXT: seb $8, $8 +; MIPS64R6O0-NEXT: srlv $7, $9, $1 +; MIPS64R6O0-NEXT: seb $7, $7 ; MIPS64R6O0-NEXT: # %bb.4: # %entry -; MIPS64R6O0-NEXT: sw $8, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: # %entry ; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 @@ -5236,28 +5236,28 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n ; MIPS64R6O0-NEXT: sll $2, $2, 3 ; MIPS64R6O0-NEXT: ori $3, $zero, 255 ; MIPS64R6O0-NEXT: sllv $3, $3, $2 -; MIPS64R6O0-NEXT: nor $7, $zero, $3 -; MIPS64R6O0-NEXT: andi $8, $5, 255 -; MIPS64R6O0-NEXT: sllv $8, $8, $2 +; MIPS64R6O0-NEXT: nor $4, $zero, $3 +; MIPS64R6O0-NEXT: andi $7, $5, 255 +; MIPS64R6O0-NEXT: sllv $7, $7, $2 ; MIPS64R6O0-NEXT: andi $6, $6, 255 ; MIPS64R6O0-NEXT: sllv $6, $6, $2 ; MIPS64R6O0-NEXT: .LBB13_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $10, 0($1) -; MIPS64R6O0-NEXT: and $11, $10, $3 -; MIPS64R6O0-NEXT: bnec $11, $8, .LBB13_3 +; MIPS64R6O0-NEXT: ll $9, 0($1) +; MIPS64R6O0-NEXT: and $10, $9, $3 +; MIPS64R6O0-NEXT: bnec $10, $7, .LBB13_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 -; MIPS64R6O0-NEXT: and $10, $10, $7 -; MIPS64R6O0-NEXT: or $10, $10, $6 -; MIPS64R6O0-NEXT: sc $10, 0($1) -; MIPS64R6O0-NEXT: beqzc $10, .LBB13_1 +; MIPS64R6O0-NEXT: and $9, $9, $4 +; MIPS64R6O0-NEXT: or $9, $9, $6 +; MIPS64R6O0-NEXT: sc $9, 0($1) +; MIPS64R6O0-NEXT: beqzc $9, .LBB13_1 ; MIPS64R6O0-NEXT: .LBB13_3: # %entry -; MIPS64R6O0-NEXT: srlv $9, $11, $2 -; MIPS64R6O0-NEXT: seb $9, $9 +; MIPS64R6O0-NEXT: srlv $8, $10, $2 +; MIPS64R6O0-NEXT: seb $8, $8 ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $9, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: # %entry ; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -5775,28 +5775,28 @@ define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind { ; MIPS64R6O0-NEXT: ld $1, %got_disp(z)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 2 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 65535 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $4, $4, $3 +; MIPS64R6O0-NEXT: andi $1, $1, 3 +; MIPS64R6O0-NEXT: xori $1, $1, 2 +; MIPS64R6O0-NEXT: sll $1, $1, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 65535 +; MIPS64R6O0-NEXT: sllv $3, $3, $1 +; MIPS64R6O0-NEXT: nor $5, $zero, $3 +; MIPS64R6O0-NEXT: sllv $4, $4, $1 ; MIPS64R6O0-NEXT: .LBB14_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($2) -; MIPS64R6O0-NEXT: addu $9, $8, $4 -; MIPS64R6O0-NEXT: and $9, $9, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($2) -; MIPS64R6O0-NEXT: beqzc $10, .LBB14_1 +; MIPS64R6O0-NEXT: ll $7, 0($2) +; MIPS64R6O0-NEXT: addu $8, $7, $4 +; MIPS64R6O0-NEXT: and $8, $8, $3 +; MIPS64R6O0-NEXT: and $9, $7, $5 +; MIPS64R6O0-NEXT: or $9, $9, $8 +; MIPS64R6O0-NEXT: sc $9, 0($2) +; MIPS64R6O0-NEXT: beqzc $9, .LBB14_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seh $7, $7 +; MIPS64R6O0-NEXT: and $6, $7, $3 +; MIPS64R6O0-NEXT: srlv $6, $6, $1 +; MIPS64R6O0-NEXT: seh $6, $6 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seh $2, $1 @@ -6359,33 +6359,33 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) { ; MIPS64R6O0-NEXT: sll $3, $5, 0 ; MIPS64R6O0-NEXT: addu $2, $3, $2 ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: daddiu $8, $zero, -4 -; MIPS64R6O0-NEXT: and $8, $4, $8 -; MIPS64R6O0-NEXT: andi $3, $4, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 2 -; MIPS64R6O0-NEXT: sll $3, $3, 3 +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $3, $4, $3 +; MIPS64R6O0-NEXT: andi $4, $4, 3 +; MIPS64R6O0-NEXT: xori $4, $4, 2 +; MIPS64R6O0-NEXT: sll $4, $4, 3 ; MIPS64R6O0-NEXT: ori $5, $zero, 65535 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 +; MIPS64R6O0-NEXT: sllv $5, $5, $4 ; MIPS64R6O0-NEXT: nor $6, $zero, $5 ; MIPS64R6O0-NEXT: andi $7, $2, 65535 -; MIPS64R6O0-NEXT: sllv $7, $7, $3 +; MIPS64R6O0-NEXT: sllv $7, $7, $4 ; MIPS64R6O0-NEXT: andi $1, $1, 65535 -; MIPS64R6O0-NEXT: sllv $1, $1, $3 +; MIPS64R6O0-NEXT: sllv $1, $1, $4 ; MIPS64R6O0-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $10, 0($8) -; MIPS64R6O0-NEXT: and $11, $10, $5 -; MIPS64R6O0-NEXT: bnec $11, $7, .LBB15_3 +; MIPS64R6O0-NEXT: ll $9, 0($3) +; MIPS64R6O0-NEXT: and $10, $9, $5 +; MIPS64R6O0-NEXT: bnec $10, $7, .LBB15_3 ; MIPS64R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; MIPS64R6O0-NEXT: and $10, $10, $6 -; MIPS64R6O0-NEXT: or $10, $10, $1 -; MIPS64R6O0-NEXT: sc $10, 0($8) -; MIPS64R6O0-NEXT: beqzc $10, .LBB15_1 +; MIPS64R6O0-NEXT: and $9, $9, $6 +; MIPS64R6O0-NEXT: or $9, $9, $1 +; MIPS64R6O0-NEXT: sc $9, 0($3) +; MIPS64R6O0-NEXT: beqzc $9, .LBB15_1 ; MIPS64R6O0-NEXT: .LBB15_3: -; MIPS64R6O0-NEXT: srlv $9, $11, $3 -; MIPS64R6O0-NEXT: seh $9, $9 +; MIPS64R6O0-NEXT: srlv $8, $10, $4 +; MIPS64R6O0-NEXT: seh $8, $8 ; MIPS64R6O0-NEXT: # %bb.4: ; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $9, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seh $2, $1 @@ -7145,8 +7145,8 @@ define i32 @zeroreg() nounwind { ; MIPS64R6O0-NEXT: sc $6, 0($1) ; MIPS64R6O0-NEXT: beqzc $6, .LBB17_1 ; MIPS64R6O0-NEXT: .LBB17_3: # %entry -; MIPS64R6O0-NEXT: xor $2, $5, $3 -; MIPS64R6O0-NEXT: sltiu $2, $2, 1 +; MIPS64R6O0-NEXT: xor $1, $5, $3 +; MIPS64R6O0-NEXT: sltiu $2, $1, 1 ; MIPS64R6O0-NEXT: sync ; MIPS64R6O0-NEXT: jrc $ra ; diff --git a/llvm/test/CodeGen/Mips/implicit-sret.ll b/llvm/test/CodeGen/Mips/implicit-sret.ll index e86cec37d51008..b9f6568e40c926 100644 --- a/llvm/test/CodeGen/Mips/implicit-sret.ll +++ b/llvm/test/CodeGen/Mips/implicit-sret.ll @@ -48,8 +48,8 @@ define internal { i32, i128, i64 } @implicit_sret_impl() unnamed_addr nounwind { ; CHECK-NEXT: sd $zero, 8($4) ; CHECK-NEXT: daddiu $3, $zero, 30 ; CHECK-NEXT: sd $3, 24($4) -; CHECK-NEXT: addiu $5, $zero, 10 -; CHECK-NEXT: sw $5, 0($4) +; CHECK-NEXT: addiu $3, $zero, 10 +; CHECK-NEXT: sw $3, 0($4) ; CHECK-NEXT: jr $ra ; CHECK-NEXT: nop ret { i32, i128, i64 } { i32 10, i128 20, i64 30 } @@ -70,10 +70,12 @@ define internal void @test2() unnamed_addr nounwind { ; CHECK-NEXT: lw $3, 4($sp) ; CHECK-NEXT: # implicit-def: $a0_64 ; CHECK-NEXT: move $4, $3 -; CHECK-NEXT: # implicit-def: $a1_64 -; CHECK-NEXT: move $5, $2 -; CHECK-NEXT: # implicit-def: $a2_64 -; CHECK-NEXT: move $6, $1 +; CHECK-NEXT: # implicit-def: $v1_64 +; CHECK-NEXT: move $3, $2 +; CHECK-NEXT: # implicit-def: $v0_64 +; CHECK-NEXT: move $2, $1 +; CHECK-NEXT: move $5, $3 +; CHECK-NEXT: move $6, $2 ; CHECK-NEXT: jal use_sret2 ; CHECK-NEXT: nop ; CHECK-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/addegluecrash.ll b/llvm/test/CodeGen/PowerPC/addegluecrash.ll index a1d9805458368b..c38f377869f866 100644 --- a/llvm/test/CodeGen/PowerPC/addegluecrash.ll +++ b/llvm/test/CodeGen/PowerPC/addegluecrash.ll @@ -21,11 +21,11 @@ define void @bn_mul_comba8(i64* nocapture %r, i64* nocapture readonly %a, i64* n ; CHECK-NEXT: addze 5, 5 ; CHECK-NEXT: add 4, 5, 4 ; CHECK-NEXT: cmpld 7, 4, 5 -; CHECK-NEXT: mfocrf 10, 1 -; CHECK-NEXT: rlwinm 10, 10, 29, 31, 31 -; CHECK-NEXT: # implicit-def: $x4 -; CHECK-NEXT: mr 4, 10 -; CHECK-NEXT: clrldi 4, 4, 32 +; CHECK-NEXT: mfocrf 4, 1 +; CHECK-NEXT: rlwinm 4, 4, 29, 31, 31 +; CHECK-NEXT: # implicit-def: $x5 +; CHECK-NEXT: mr 5, 4 +; CHECK-NEXT: clrldi 4, 5, 32 ; CHECK-NEXT: std 4, 0(3) ; CHECK-NEXT: blr %1 = load i64, i64* %a, align 8 diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll index c79980345d64d5..8b4e3640ef6bc5 100644 --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -1548,8 +1548,8 @@ define <2 x i64> @test46(<2 x float> %a) { ; CHECK-FISL-NEXT: ld r3, -24(r1) ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs1, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs1, vs1 +; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 +; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test46: @@ -1616,8 +1616,8 @@ define <2 x i64> @test47(<2 x float> %a) { ; CHECK-FISL-NEXT: ld r3, -24(r1) ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs1, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs1, vs1 +; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 +; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test47: @@ -1859,13 +1859,13 @@ define <2 x i64> @test60(<2 x i64> %a, <2 x i64> %b) { ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r4, -20(r1) -; CHECK-FISL-NEXT: ld r3, -40(r1) -; CHECK-FISL-NEXT: sld r3, r3, r4 +; CHECK-FISL-NEXT: lwz r3, -20(r1) +; CHECK-FISL-NEXT: ld r4, -40(r1) +; CHECK-FISL-NEXT: sld r3, r4, r3 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r4, -28(r1) -; CHECK-FISL-NEXT: ld r3, -48(r1) -; CHECK-FISL-NEXT: sld r3, r3, r4 +; CHECK-FISL-NEXT: lwz r3, -28(r1) +; CHECK-FISL-NEXT: ld r4, -48(r1) +; CHECK-FISL-NEXT: sld r3, r4, r3 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 ; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 @@ -1925,13 +1925,13 @@ define <2 x i64> @test61(<2 x i64> %a, <2 x i64> %b) { ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r4, -20(r1) -; CHECK-FISL-NEXT: ld r3, -40(r1) -; CHECK-FISL-NEXT: srd r3, r3, r4 +; CHECK-FISL-NEXT: lwz r3, -20(r1) +; CHECK-FISL-NEXT: ld r4, -40(r1) +; CHECK-FISL-NEXT: srd r3, r4, r3 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r4, -28(r1) -; CHECK-FISL-NEXT: ld r3, -48(r1) -; CHECK-FISL-NEXT: srd r3, r3, r4 +; CHECK-FISL-NEXT: lwz r3, -28(r1) +; CHECK-FISL-NEXT: ld r4, -48(r1) +; CHECK-FISL-NEXT: srd r3, r4, r3 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 ; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 @@ -1991,13 +1991,13 @@ define <2 x i64> @test62(<2 x i64> %a, <2 x i64> %b) { ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r4, -20(r1) -; CHECK-FISL-NEXT: ld r3, -40(r1) -; CHECK-FISL-NEXT: srad r3, r3, r4 +; CHECK-FISL-NEXT: lwz r3, -20(r1) +; CHECK-FISL-NEXT: ld r4, -40(r1) +; CHECK-FISL-NEXT: srad r3, r4, r3 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r4, -28(r1) -; CHECK-FISL-NEXT: ld r3, -48(r1) -; CHECK-FISL-NEXT: srad r3, r3, r4 +; CHECK-FISL-NEXT: lwz r3, -28(r1) +; CHECK-FISL-NEXT: ld r4, -48(r1) +; CHECK-FISL-NEXT: srad r3, r4, r3 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 ; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 @@ -2426,12 +2426,12 @@ define <2 x i32> @test80(i32 %v) { ; CHECK-FISL: # %bb.0: ; CHECK-FISL-NEXT: # kill: def $r3 killed $r3 killed $x3 ; CHECK-FISL-NEXT: stw r3, -16(r1) -; CHECK-FISL-NEXT: addi r4, r1, -16 -; CHECK-FISL-NEXT: lxvw4x vs0, 0, r4 +; CHECK-FISL-NEXT: addi r3, r1, -16 +; CHECK-FISL-NEXT: lxvw4x vs0, 0, r3 ; CHECK-FISL-NEXT: xxspltw v2, vs0, 0 -; CHECK-FISL-NEXT: addis r4, r2, .LCPI65_0@toc@ha -; CHECK-FISL-NEXT: addi r4, r4, .LCPI65_0@toc@l -; CHECK-FISL-NEXT: lxvw4x v3, 0, r4 +; CHECK-FISL-NEXT: addis r3, r2, .LCPI65_0@toc@ha +; CHECK-FISL-NEXT: addi r3, r3, .LCPI65_0@toc@l +; CHECK-FISL-NEXT: lxvw4x v3, 0, r3 ; CHECK-FISL-NEXT: vadduwm v2, v2, v3 ; CHECK-FISL-NEXT: blr ; diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll index 8c16a465730837..ff04ddc247eac6 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll @@ -382,14 +382,14 @@ if.end: ; CHECK-LABEL: test4: ; CHECK-NEXT: .functype test4 (i32) -> (){{$}} -; CHECK: block {{$}} ; CHECK-NEXT: block {{$}} -; CHECK: br_if 0, $pop{{[0-9]+}}{{$}} -; CHECK: br 1{{$}} -; CHECK-NEXT: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: block {{$}} +; CHECK-NEXT: br_table $0, 1, 1, 1, 1, 1, 0{{$}} +; CHECK-NEXT: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: end_block{{$}} -; CHECK-NEXT: br_table $0, 0, 0, 0, 0, 0, 0{{$}} -; CHECK-NEXT: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: i32.const $push[[C:[0-9]+]]=, 622{{$}} +; CHECK-NEXT: i32.eq $drop=, $0, $pop[[C]]{{$}} +; CHECK-NEXT: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: end_block{{$}} ; CHECK-NEXT: return{{$}} define void @test4(i32 %t) { @@ -649,20 +649,16 @@ end: ; CHECK: br_if 0, {{[^,]+}}{{$}} ; CHECK-NEXT: end_loop{{$}} ; CHECK-NEXT: block {{$}} -; CHECK: br_if 0, {{[^,]+}}{{$}} -; CHECK: br 3{{$}} -; CHECK-NEXT: .LBB{{[0-9]+}}_7: -; CHECK-NEXT: end_block{{$}} -; CHECK: block {{$}} -; CHECK-NEXT: br_table $0, 0, 3, 1, 2, 0 -; CHECK-NEXT: .LBB{{[0-9]+}}_8: +; CHECK-NOT: br_if +; CHECK: br_table $pop{{[^,]+}}, 0, 3, 1, 2, 3 +; CHECK-NEXT: .LBB{{[0-9]+}}_6: ; CHECK-NEXT: end_block{{$}} ; CHECK-NEXT: end_loop{{$}} ; CHECK-NEXT: return{{$}} -; CHECK-NEXT: .LBB{{[0-9]+}}_9: +; CHECK-NEXT: .LBB{{[0-9]+}}_7: ; CHECK-NEXT: end_block{{$}} ; CHECK: br 0{{$}} -; CHECK-NEXT: .LBB{{[0-9]+}}_10: +; CHECK-NEXT: .LBB{{[0-9]+}}_8: ; CHECK-NEXT: end_loop{{$}} define void @test10() { bb0: @@ -767,25 +763,22 @@ bb8: ; CHECK-LABEL: test12: ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: loop {{$}} ; CHECK-NEXT: block {{$}} +; CHECK-NEXT: loop {{$}} ; CHECK-NEXT: block {{$}} ; CHECK-NEXT: block {{$}} +; CHECK: br_table {{[^,]+}}, 1, 3, 3, 3, 1, 0{{$}} +; CHECK-NEXT: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: end_block{{$}} ; CHECK: br_if 0, {{[^,]+}}{{$}} ; CHECK: br_if 2, {{[^,]+}}{{$}} -; CHECK: br_if 1, {{[^,]+}}{{$}} -; CHECK-NEXT: br 2{{$}} ; CHECK-NEXT: .LBB{{[0-9]+}}_4: ; CHECK-NEXT: end_block{{$}} -; CHECK-NEXT: br_table $2, 1, 0, 0, 0, 1, 1{{$}} +; CHECK: br 0{{$}} ; CHECK-NEXT: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: end_loop{{$}} ; CHECK-NEXT: end_block{{$}} ; CHECK-NEXT: return{{$}} -; CHECK-NEXT: .LBB{{[0-9]+}}_6: -; CHECK-NEXT: end_block{{$}} -; CHECK: br 0{{$}} -; CHECK-NEXT: .LBB{{[0-9]+}}_7: -; CHECK-NEXT: end_loop{{$}} define void @test12(i8* %arg) { bb: br label %bb1 diff --git a/llvm/test/CodeGen/WebAssembly/indirectbr.ll b/llvm/test/CodeGen/WebAssembly/indirectbr.ll index d32f941cbeb2a8..da737613f30fa7 100644 --- a/llvm/test/CodeGen/WebAssembly/indirectbr.ll +++ b/llvm/test/CodeGen/WebAssembly/indirectbr.ll @@ -13,20 +13,36 @@ target triple = "wasm32" ; Just check the barest skeleton of the structure ; CHECK-LABEL: test1: +; CHECK: block +; CHECK: block +; CHECK: block +; CHECK: block ; CHECK: i32.load -; CHECK: i32.load $[[DEST:.+]]= +; CHECK: i32.load +; CHECK: i32.const +; CHECK: i32.add $push[[DEST:.+]]= +; CHECK: br_table $pop[[DEST]] +; CHECK: end_block +; CHECK: end_block +; CHECK: end_block +; CHECK: end_block ; CHECK: loop ; CHECK: block ; CHECK: block +; CHECK: block +; CHECK: block +; CHECK: br_table ${{[^,]+}}, 0, 1, 2, 2 +; CHECK: end_block ; CHECK: end_block +; CHECK: end_block +; CHECK: block ; CHECK: block ; CHECK: block -; CHECK: br_table $[[DEST]] +; CHECK: br_table ${{[^,]+}}, 1, 2, 0 +; CHECK: end_block ; CHECK: end_block ; CHECK: end_block -; CHECK: i32.load $[[DEST]]= ; CHECK: end_loop - ; CHECK: test1.targets: ; CHECK-NEXT: .int32 ; CHECK-NEXT: .int32 diff --git a/llvm/test/CodeGen/WebAssembly/stack-insts.ll b/llvm/test/CodeGen/WebAssembly/stack-insts.ll index c4ccdddf406fa7..506f43c1a6e516 100644 --- a/llvm/test/CodeGen/WebAssembly/stack-insts.ll +++ b/llvm/test/CodeGen/WebAssembly/stack-insts.ll @@ -8,7 +8,7 @@ declare void @foo1() ; Tests if br_table is printed correctly with a tab. ; CHECK-LABEL: test0: -; CHECK: br_table {0, 1, 0, 1, 0} +; CHECK: br_table {0, 1, 0, 1, 2} define void @test0(i32 %n) { entry: switch i32 %n, label %sw.epilog [ diff --git a/llvm/test/CodeGen/WebAssembly/switch-unreachable-default.ll b/llvm/test/CodeGen/WebAssembly/switch-unreachable-default.ll new file mode 100644 index 00000000000000..f862a453f45739 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/switch-unreachable-default.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; Test that switches are lowered correctly in the presence of an +; unreachable default branch target. + +; CHECK-LABEL: foo: +; CHECK-NEXT: .functype foo (i32) -> (i32) +; CHECK-NEXT: block +; CHECK-NEXT: block +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: br_table {0, 1, 0} +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: end_block +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: return +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: end_block +; CHECK-NEXT: i32.const 1 +; CHECK-NEXT: end_function +define i32 @foo(i32 %x) { +entry: + switch i32 %x, label %unreachable [ + i32 0, label %bb0 + i32 1, label %bb1 + ] + +bb0: + ret i32 0 + +bb1: + ret i32 1 + +unreachable: + unreachable +} diff --git a/llvm/test/CodeGen/WebAssembly/switch.ll b/llvm/test/CodeGen/WebAssembly/switch.ll index 1b0dfc8e56b436..3a9da703e78988 100644 --- a/llvm/test/CodeGen/WebAssembly/switch.ll +++ b/llvm/test/CodeGen/WebAssembly/switch.ll @@ -21,20 +21,20 @@ declare void @foo5() ; CHECK: block {{$}} ; CHECK: block {{$}} ; CHECK: block {{$}} -; CHECK: br_table {{[^,]+}}, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4, 5, 0{{$}} -; CHECK: .LBB{{[0-9]+}}_2: +; CHECK: br_table {{[^,]+}}, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6{{$}} +; CHECK: .LBB{{[0-9]+}}_1: ; CHECK: call foo0{{$}} -; CHECK: .LBB{{[0-9]+}}_3: +; CHECK: .LBB{{[0-9]+}}_2: ; CHECK: call foo1{{$}} -; CHECK: .LBB{{[0-9]+}}_4: +; CHECK: .LBB{{[0-9]+}}_3: ; CHECK: call foo2{{$}} -; CHECK: .LBB{{[0-9]+}}_5: +; CHECK: .LBB{{[0-9]+}}_4: ; CHECK: call foo3{{$}} -; CHECK: .LBB{{[0-9]+}}_6: +; CHECK: .LBB{{[0-9]+}}_5: ; CHECK: call foo4{{$}} -; CHECK: .LBB{{[0-9]+}}_7: +; CHECK: .LBB{{[0-9]+}}_6: ; CHECK: call foo5{{$}} -; CHECK: .LBB{{[0-9]+}}_8: +; CHECK: .LBB{{[0-9]+}}_7: ; CHECK: return{{$}} define void @bar32(i32 %n) { entry: @@ -101,20 +101,20 @@ sw.epilog: ; preds = %entry, %sw.bb.5, %s ; CHECK: block {{$}} ; CHECK: block {{$}} ; CHECK: block {{$}} -; CHECK: br_table {{[^,]+}}, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4, 5, 0{{$}} -; CHECK: .LBB{{[0-9]+}}_2: +; CHECK: br_table {{[^,]+}}, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6{{$}} +; CHECK: .LBB{{[0-9]+}}_1: ; CHECK: call foo0{{$}} -; CHECK: .LBB{{[0-9]+}}_3: +; CHECK: .LBB{{[0-9]+}}_2: ; CHECK: call foo1{{$}} -; CHECK: .LBB{{[0-9]+}}_4: +; CHECK: .LBB{{[0-9]+}}_3: ; CHECK: call foo2{{$}} -; CHECK: .LBB{{[0-9]+}}_5: +; CHECK: .LBB{{[0-9]+}}_4: ; CHECK: call foo3{{$}} -; CHECK: .LBB{{[0-9]+}}_6: +; CHECK: .LBB{{[0-9]+}}_5: ; CHECK: call foo4{{$}} -; CHECK: .LBB{{[0-9]+}}_7: +; CHECK: .LBB{{[0-9]+}}_6: ; CHECK: call foo5{{$}} -; CHECK: .LBB{{[0-9]+}}_8: +; CHECK: .LBB{{[0-9]+}}_7: ; CHECK: return{{$}} define void @bar64(i64 %n) { entry: diff --git a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll index 48ad2a2c077705..b5635c7e0f067c 100644 --- a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll +++ b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll @@ -8,34 +8,34 @@ define i32 @z() nounwind ssp { ; CHECK-LABEL: z: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: subl $144, %esp +; CHECK-NEXT: subl $148, %esp ; CHECK-NEXT: movl L___stack_chk_guard$non_lazy_ptr, %eax ; CHECK-NEXT: movl (%eax), %eax ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movb $48, {{[0-9]+}}(%esp) -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl -; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp) +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al +; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp) ; CHECK-NEXT: movb $15, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: movl $8, %edx -; CHECK-NEXT: leal {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: movl $8, %ecx +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: movl %eax, %edi -; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %edx, %esi ; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: addl $36, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; CHECK-NEXT: movl %edx, %esi ; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl -; CHECK-NEXT: movb %bl, 32(%eax) -; CHECK-NEXT: movb %bl, 68(%eax) +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movb %cl, 32(%eax) +; CHECK-NEXT: movb %cl, 68(%eax) ; CHECK-NEXT: calll _f ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -50,10 +50,9 @@ define i32 @z() nounwind ssp { ; CHECK-NEXT: jne LBB0_3 ; CHECK-NEXT: ## %bb.2: ## %SP_return ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload -; CHECK-NEXT: addl $144, %esp +; CHECK-NEXT: addl $148, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi -; CHECK-NEXT: popl %ebx ; CHECK-NEXT: retl ; CHECK-NEXT: LBB0_3: ## %CallStackCheckFailBlk ; CHECK-NEXT: calll ___stack_chk_fail diff --git a/llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll b/llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll index 96ceb1985810ed..6a43e864e965d3 100644 --- a/llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll +++ b/llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -O0 -no-integrated-as | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -O0 -no-integrated-as -verify-machineinstrs | FileCheck %s ; PR7509 target triple = "i386-apple-darwin10" %asmtype = type { i32, i8*, i32, i32 } @@ -6,13 +7,27 @@ target triple = "i386-apple-darwin10" ; Arguments 1 and 4 must be the same. No other output arguments may be ; allocated %eax. -; CHECK: InlineAsm Start -; CHECK: arg1 %[[A1:...]] -; CHECK-NOT: ax -; CHECK: arg4 %[[A1]] -; CHECK: InlineAsm End - define i32 @func(i8* %s) nounwind ssp { +; CHECK-LABEL: func: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, (%esp) ## 4-byte Spill +; CHECK-NEXT: movl (%esp), %ecx ## 4-byte Reload +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: arg0 %eax +; CHECK-NEXT: arg1 %ecx +; CHECK-NEXT: arg2 %edx +; CHECK-NEXT: arg3 %esi +; CHECK-NEXT: arg4 %ecx +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: movl %ecx, %edi +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl entry: %0 = tail call %asmtype asm "arg0 $0\0A\09arg1 $1\0A\09arg2 $2\0A\09arg3 $3\0A\09arg4 $4", "={ax},=r,=r,=r,1,~{dirflag},~{fpsr},~{flags}"(i8* %s) nounwind, !srcloc !0 ; <%0> [#uses=1] %asmresult = extractvalue %asmtype %0, 0 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll index 16fde4074ea0e3..7a1f34c65c183d 100644 --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -126,8 +126,8 @@ define void @narrow_writeback_and(i64* %ptr) { ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O0-NEXT: andl $-256, %eax -; CHECK-O0-NEXT: movl %eax, %ecx -; CHECK-O0-NEXT: movq %rcx, (%rdi) +; CHECK-O0-NEXT: # kill: def $rax killed $eax +; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: narrow_writeback_and: @@ -231,10 +231,10 @@ define i128 @load_i128(i128* %ptr) { ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O0-NEXT: .cfi_offset %rbx, -16 ; CHECK-O0-NEXT: xorl %eax, %eax -; CHECK-O0-NEXT: movl %eax, %ecx -; CHECK-O0-NEXT: movq %rcx, %rax -; CHECK-O0-NEXT: movq %rcx, %rdx -; CHECK-O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: # kill: def $rax killed $eax +; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; CHECK-O0-NEXT: lock cmpxchg16b (%rdi) ; CHECK-O0-NEXT: popq %rbx @@ -326,14 +326,14 @@ define i256 @load_i256(i256* %ptr) { ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: callq __atomic_load ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi -; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdi -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; CHECK-O0-NEXT: movq %rdi, 24(%r9) -; CHECK-O0-NEXT: movq %rsi, 16(%r9) -; CHECK-O0-NEXT: movq %rdx, 8(%r9) -; CHECK-O0-NEXT: movq %rax, (%r9) +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; CHECK-O0-NEXT: movq %rsi, 24(%rdi) +; CHECK-O0-NEXT: movq %rdx, 16(%rdi) +; CHECK-O0-NEXT: movq %rcx, 8(%rdi) +; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; CHECK-O0-NEXT: addq $56, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 @@ -831,8 +831,8 @@ define i64 @load_fold_udiv1(i64* %p) { ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: movl %ecx, %edx -; CHECK-O0-NEXT: movl $15, %esi -; CHECK-O0-NEXT: divq %rsi +; CHECK-O0-NEXT: movl $15, %ecx +; CHECK-O0-NEXT: divq %rcx ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_udiv1: @@ -1024,8 +1024,8 @@ define i64 @load_fold_urem1(i64* %p) { ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: movl %ecx, %edx -; CHECK-O0-NEXT: movl $15, %esi -; CHECK-O0-NEXT: divq %rsi +; CHECK-O0-NEXT: movl $15, %ecx +; CHECK-O0-NEXT: divq %rcx ; CHECK-O0-NEXT: movq %rdx, %rax ; CHECK-O0-NEXT: retq ; @@ -1475,9 +1475,9 @@ define i1 @load_fold_icmp3(i64* %p1, i64* %p2) { ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq (%rsi), %rcx ; CHECK-O0-NEXT: subq %rcx, %rax -; CHECK-O0-NEXT: sete %dl +; CHECK-O0-NEXT: sete %cl ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movb %dl, %al +; CHECK-O0-NEXT: movb %cl, %al ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_icmp3: @@ -2076,8 +2076,8 @@ define void @rmw_fold_and1(i64* %p, i64 %v) { ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O0-NEXT: andl $15, %eax -; CHECK-O0-NEXT: movl %eax, %ecx -; CHECK-O0-NEXT: movq %rcx, (%rdi) +; CHECK-O0-NEXT: # kill: def $rax killed $eax +; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_and1: @@ -2541,9 +2541,8 @@ define i16 @load_i8_anyext_i16(i8* %ptr) { ; CHECK-O0-CUR-LABEL: load_i8_anyext_i16: ; CHECK-O0-CUR: # %bb.0: ; CHECK-O0-CUR-NEXT: movb (%rdi), %al -; CHECK-O0-CUR-NEXT: movzbl %al, %ecx -; CHECK-O0-CUR-NEXT: # kill: def $cx killed $cx killed $ecx -; CHECK-O0-CUR-NEXT: movw %cx, %ax +; CHECK-O0-CUR-NEXT: movzbl %al, %eax +; CHECK-O0-CUR-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_i8_anyext_i16: @@ -2671,13 +2670,12 @@ define i16 @load_combine(i8* %p) { ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movb (%rdi), %al ; CHECK-O0-NEXT: movb 1(%rdi), %cl -; CHECK-O0-NEXT: movzbl %al, %edx -; CHECK-O0-NEXT: # kill: def $dx killed $dx killed $edx -; CHECK-O0-NEXT: movzbl %cl, %esi -; CHECK-O0-NEXT: # kill: def $si killed $si killed $esi -; CHECK-O0-NEXT: shlw $8, %si -; CHECK-O0-NEXT: orw %si, %dx -; CHECK-O0-NEXT: movw %dx, %ax +; CHECK-O0-NEXT: movzbl %al, %eax +; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-O0-NEXT: movzbl %cl, %ecx +; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx +; CHECK-O0-NEXT: shlw $8, %cx +; CHECK-O0-NEXT: orw %cx, %ax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_combine: diff --git a/llvm/test/CodeGen/X86/atomic32.ll b/llvm/test/CodeGen/X86/atomic32.ll index 4fb03356f99f4e..3fe5ef8311ce7f 100644 --- a/llvm/test/CodeGen/X86/atomic32.ll +++ b/llvm/test/CodeGen/X86/atomic32.ll @@ -70,8 +70,8 @@ define void @atomic_fetch_and32() nounwind { ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill @@ -94,8 +94,8 @@ define void @atomic_fetch_and32() nounwind { ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %dl -; X86-NEXT: testb $1, %dl +; X86-NEXT: sete %cl +; X86-NEXT: testb $1, %cl ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill @@ -124,8 +124,8 @@ define void @atomic_fetch_or32() nounwind { ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: orl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill @@ -148,8 +148,8 @@ define void @atomic_fetch_or32() nounwind { ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: orl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %dl -; X86-NEXT: testb $1, %dl +; X86-NEXT: sete %cl +; X86-NEXT: testb $1, %cl ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill @@ -178,8 +178,8 @@ define void @atomic_fetch_xor32() nounwind { ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: xorl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill @@ -202,8 +202,8 @@ define void @atomic_fetch_xor32() nounwind { ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: xorl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %dl -; X86-NEXT: testb $1, %dl +; X86-NEXT: sete %cl +; X86-NEXT: testb $1, %cl ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill @@ -234,8 +234,8 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { ; X64-NEXT: andl %edx, %ecx ; X64-NEXT: notl %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB5_2 ; X64-NEXT: jmp .LBB5_1 @@ -244,7 +244,6 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { ; ; X86-LABEL: atomic_fetch_nand32: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx ; X86-NEXT: subl $8, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl sc32, %ecx @@ -258,14 +257,13 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { ; X86-NEXT: andl %edx, %ecx ; X86-NEXT: notl %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %bl -; X86-NEXT: testb $1, %bl +; X86-NEXT: sete %cl +; X86-NEXT: testb $1, %cl ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: jne .LBB5_2 ; X86-NEXT: jmp .LBB5_1 ; X86-NEXT: .LBB5_2: # %atomicrmw.end ; X86-NEXT: addl $8, %esp -; X86-NEXT: popl %ebx ; X86-NEXT: retl %t1 = atomicrmw nand i32* @sc32, i32 %x acquire ret void @@ -285,8 +283,8 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmovgel %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB6_2 @@ -296,7 +294,6 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; ; X86-CMOV-LABEL: atomic_fetch_max32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: pushl %ebx ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx @@ -310,20 +307,18 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmovgel %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %bl -; X86-CMOV-NEXT: testb $1, %bl +; X86-CMOV-NEXT: sete %dl +; X86-CMOV-NEXT: testb $1, %dl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB6_2 ; X86-CMOV-NEXT: jmp .LBB6_1 ; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end ; X86-CMOV-NEXT: addl $12, %esp -; X86-CMOV-NEXT: popl %ebx ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_max32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %ebx ; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $20, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -352,20 +347,18 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %bl -; X86-NOCMOV-NEXT: testb $1, %bl +; X86-NOCMOV-NEXT: sete %dl +; X86-NOCMOV-NEXT: testb $1, %dl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB6_2 ; X86-NOCMOV-NEXT: jmp .LBB6_1 ; X86-NOCMOV-NEXT: .LBB6_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $20, %esp ; X86-NOCMOV-NEXT: popl %esi -; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_max32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %ebx ; X86-NOX87-NEXT: pushl %esi ; X86-NOX87-NEXT: subl $20, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -394,15 +387,14 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X86-NOX87-NEXT: movl %ecx, %eax ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %bl -; X86-NOX87-NEXT: testb $1, %bl +; X86-NOX87-NEXT: sete %dl +; X86-NOX87-NEXT: testb $1, %dl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB6_2 ; X86-NOX87-NEXT: jmp .LBB6_1 ; X86-NOX87-NEXT: .LBB6_2: # %atomicrmw.end ; X86-NOX87-NEXT: addl $20, %esp ; X86-NOX87-NEXT: popl %esi -; X86-NOX87-NEXT: popl %ebx ; X86-NOX87-NEXT: retl %t1 = atomicrmw max i32* @sc32, i32 %x acquire ret void @@ -422,8 +414,8 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmovlel %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB7_2 @@ -433,7 +425,6 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; ; X86-CMOV-LABEL: atomic_fetch_min32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: pushl %ebx ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx @@ -447,20 +438,18 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmovlel %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %bl -; X86-CMOV-NEXT: testb $1, %bl +; X86-CMOV-NEXT: sete %dl +; X86-CMOV-NEXT: testb $1, %dl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB7_2 ; X86-CMOV-NEXT: jmp .LBB7_1 ; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end ; X86-CMOV-NEXT: addl $12, %esp -; X86-CMOV-NEXT: popl %ebx ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_min32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %ebx ; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $20, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -489,20 +478,18 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %bl -; X86-NOCMOV-NEXT: testb $1, %bl +; X86-NOCMOV-NEXT: sete %dl +; X86-NOCMOV-NEXT: testb $1, %dl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB7_2 ; X86-NOCMOV-NEXT: jmp .LBB7_1 ; X86-NOCMOV-NEXT: .LBB7_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $20, %esp ; X86-NOCMOV-NEXT: popl %esi -; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_min32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %ebx ; X86-NOX87-NEXT: pushl %esi ; X86-NOX87-NEXT: subl $20, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -531,15 +518,14 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X86-NOX87-NEXT: movl %ecx, %eax ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %bl -; X86-NOX87-NEXT: testb $1, %bl +; X86-NOX87-NEXT: sete %dl +; X86-NOX87-NEXT: testb $1, %dl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB7_2 ; X86-NOX87-NEXT: jmp .LBB7_1 ; X86-NOX87-NEXT: .LBB7_2: # %atomicrmw.end ; X86-NOX87-NEXT: addl $20, %esp ; X86-NOX87-NEXT: popl %esi -; X86-NOX87-NEXT: popl %ebx ; X86-NOX87-NEXT: retl %t1 = atomicrmw min i32* @sc32, i32 %x acquire ret void @@ -559,8 +545,8 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmoval %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB8_2 @@ -570,7 +556,6 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; ; X86-CMOV-LABEL: atomic_fetch_umax32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: pushl %ebx ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx @@ -584,20 +569,18 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmoval %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %bl -; X86-CMOV-NEXT: testb $1, %bl +; X86-CMOV-NEXT: sete %dl +; X86-CMOV-NEXT: testb $1, %dl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB8_2 ; X86-CMOV-NEXT: jmp .LBB8_1 ; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end ; X86-CMOV-NEXT: addl $12, %esp -; X86-CMOV-NEXT: popl %ebx ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_umax32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %ebx ; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $20, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -626,20 +609,18 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %bl -; X86-NOCMOV-NEXT: testb $1, %bl +; X86-NOCMOV-NEXT: sete %dl +; X86-NOCMOV-NEXT: testb $1, %dl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB8_2 ; X86-NOCMOV-NEXT: jmp .LBB8_1 ; X86-NOCMOV-NEXT: .LBB8_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $20, %esp ; X86-NOCMOV-NEXT: popl %esi -; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_umax32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %ebx ; X86-NOX87-NEXT: pushl %esi ; X86-NOX87-NEXT: subl $20, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -668,15 +649,14 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X86-NOX87-NEXT: movl %ecx, %eax ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %bl -; X86-NOX87-NEXT: testb $1, %bl +; X86-NOX87-NEXT: sete %dl +; X86-NOX87-NEXT: testb $1, %dl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB8_2 ; X86-NOX87-NEXT: jmp .LBB8_1 ; X86-NOX87-NEXT: .LBB8_2: # %atomicrmw.end ; X86-NOX87-NEXT: addl $20, %esp ; X86-NOX87-NEXT: popl %esi -; X86-NOX87-NEXT: popl %ebx ; X86-NOX87-NEXT: retl %t1 = atomicrmw umax i32* @sc32, i32 %x acquire ret void @@ -696,8 +676,8 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmovbel %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB9_2 @@ -707,7 +687,6 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; ; X86-CMOV-LABEL: atomic_fetch_umin32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: pushl %ebx ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx @@ -721,20 +700,18 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmovbel %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %bl -; X86-CMOV-NEXT: testb $1, %bl +; X86-CMOV-NEXT: sete %dl +; X86-CMOV-NEXT: testb $1, %dl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB9_2 ; X86-CMOV-NEXT: jmp .LBB9_1 ; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end ; X86-CMOV-NEXT: addl $12, %esp -; X86-CMOV-NEXT: popl %ebx ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_umin32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %ebx ; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $20, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -763,20 +740,18 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %bl -; X86-NOCMOV-NEXT: testb $1, %bl +; X86-NOCMOV-NEXT: sete %dl +; X86-NOCMOV-NEXT: testb $1, %dl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB9_2 ; X86-NOCMOV-NEXT: jmp .LBB9_1 ; X86-NOCMOV-NEXT: .LBB9_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $20, %esp ; X86-NOCMOV-NEXT: popl %esi -; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_umin32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %ebx ; X86-NOX87-NEXT: pushl %esi ; X86-NOX87-NEXT: subl $20, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -805,15 +780,14 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X86-NOX87-NEXT: movl %ecx, %eax ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %bl -; X86-NOX87-NEXT: testb $1, %bl +; X86-NOX87-NEXT: sete %dl +; X86-NOX87-NEXT: testb $1, %dl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB9_2 ; X86-NOX87-NEXT: jmp .LBB9_1 ; X86-NOX87-NEXT: .LBB9_2: # %atomicrmw.end ; X86-NOX87-NEXT: addl $20, %esp ; X86-NOX87-NEXT: popl %esi -; X86-NOX87-NEXT: popl %ebx ; X86-NOX87-NEXT: retl %t1 = atomicrmw umin i32* @sc32, i32 %x acquire ret void diff --git a/llvm/test/CodeGen/X86/atomic64.ll b/llvm/test/CodeGen/X86/atomic64.ll index 0149851ea4671a..fe7635bdc3ff56 100644 --- a/llvm/test/CodeGen/X86/atomic64.ll +++ b/llvm/test/CodeGen/X86/atomic64.ll @@ -137,12 +137,12 @@ define void @atomic_fetch_and64() nounwind { ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl $5, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil -; X64-NEXT: movq %rax, %rdx -; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: # kill: def $rcx killed $ecx +; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB2_2 ; X64-NEXT: jmp .LBB2_1 @@ -202,8 +202,8 @@ define void @atomic_fetch_or64() nounwind { ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: orq $5, %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -265,8 +265,8 @@ define void @atomic_fetch_xor64() nounwind { ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: xorq $5, %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -330,8 +330,8 @@ define void @atomic_fetch_nand64(i64 %x) nounwind { ; X64-NEXT: andq %rdx, %rcx ; X64-NEXT: notq %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB5_2 ; X64-NEXT: jmp .LBB5_1 @@ -373,8 +373,8 @@ define void @atomic_fetch_max64(i64 %x) nounwind { ; X64-NEXT: subq %rdx, %rcx ; X64-NEXT: cmovgeq %rax, %rdx ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB6_2 @@ -473,8 +473,8 @@ define void @atomic_fetch_min64(i64 %x) nounwind { ; X64-NEXT: subq %rdx, %rcx ; X64-NEXT: cmovleq %rax, %rdx ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB7_2 @@ -571,8 +571,8 @@ define void @atomic_fetch_umax64(i64 %x) nounwind { ; X64-NEXT: subq %rdx, %rcx ; X64-NEXT: cmovaq %rax, %rdx ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB8_2 @@ -669,8 +669,8 @@ define void @atomic_fetch_umin64(i64 %x) nounwind { ; X64-NEXT: subq %rdx, %rcx ; X64-NEXT: cmovbeq %rax, %rdx ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB9_2 diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll index 718449d7a771f1..f448bfec2ec99d 100644 --- a/llvm/test/CodeGen/X86/avx-load-store.ll +++ b/llvm/test/CodeGen/X86/avx-load-store.ll @@ -175,8 +175,8 @@ define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp ; CHECK_O0: # %bb.0: ; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 -; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> @@ -197,8 +197,8 @@ define void @double_save_volatile(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nou ; CHECK_O0: # %bb.0: ; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 -; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> @@ -239,10 +239,10 @@ define void @f_f() nounwind { ; CHECK_O0-NEXT: .LBB9_3: # %cif_mixed_test_all ; CHECK_O0-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967295,0,0,0] ; CHECK_O0-NEXT: vmovdqa %xmm0, %xmm0 -; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1 +; CHECK_O0-NEXT: # kill: def $ymm0 killed $xmm0 ; CHECK_O0-NEXT: # implicit-def: $rax -; CHECK_O0-NEXT: # implicit-def: $ymm2 -; CHECK_O0-NEXT: vmaskmovps %ymm2, %ymm1, (%rax) +; CHECK_O0-NEXT: # implicit-def: $ymm1 +; CHECK_O0-NEXT: vmaskmovps %ymm1, %ymm0, (%rax) ; CHECK_O0-NEXT: .LBB9_4: # %cif_mixed_test_any_check allocas: br i1 undef, label %cif_mask_all, label %cif_mask_mixed @@ -276,8 +276,8 @@ define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind { ; CHECK_O0-NEXT: vmovdqu 16(%rsi), %xmm1 ; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 -; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %b = load <8 x i32>, <8 x i32>* %bp, align 1 @@ -321,8 +321,8 @@ define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind { ; CHECK_O0-NEXT: vmovdqa 16(%rsi), %xmm1 ; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 -; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %b = load <4 x i64>, <4 x i64>* %bp, align 16 diff --git a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll index c4e009d54ec7a1..186370ca675c70 100755 --- a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll @@ -40,22 +40,20 @@ define void @test_xmm(i32 %shift, i32 %mulp, <2 x i64> %a,i8* %arraydecay,i8* %f ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; CHECK-NEXT: vpmovd2m %xmm0, %k0 ; CHECK-NEXT: kmovq %k0, %k1 -; CHECK-NEXT: kmovd %k0, %esi -; CHECK-NEXT: ## kill: def $sil killed $sil killed $esi -; CHECK-NEXT: movzbl %sil, %edi -; CHECK-NEXT: ## kill: def $di killed $di killed $edi -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload -; CHECK-NEXT: movw %di, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: movq %rcx, %rdi -; CHECK-NEXT: movl $4, %r8d -; CHECK-NEXT: movl %r8d, %esi -; CHECK-NEXT: movl %r8d, %edx +; CHECK-NEXT: kmovd %k0, %ecx +; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: ## kill: def $cx killed $cx killed $ecx +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload +; CHECK-NEXT: movl $4, %edx +; CHECK-NEXT: movl %edx, %esi ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; CHECK-NEXT: callq _calc_expected_mask_val ; CHECK-NEXT: ## kill: def $ax killed $ax killed $rax -; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %r9w ## 2-byte Reload -; CHECK-NEXT: movzwl %r9w, %edi +; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx ## 2-byte Reload +; CHECK-NEXT: movzwl %cx, %edi ; CHECK-NEXT: movzwl %ax, %esi ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload diff --git a/llvm/test/CodeGen/X86/crash-O0.ll b/llvm/test/CodeGen/X86/crash-O0.ll index a93d3dd267b52f..9f9e5584d6f21f 100644 --- a/llvm/test/CodeGen/X86/crash-O0.ll +++ b/llvm/test/CodeGen/X86/crash-O0.ll @@ -79,11 +79,12 @@ define i64 @addressModeWith32bitIndex(i32 %V) { ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: ## kill: def $rax killed $eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: cqto -; CHECK-NEXT: movslq %edi, %rsi -; CHECK-NEXT: idivq (%rcx,%rsi,8) +; CHECK-NEXT: movslq %edi, %rcx +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload +; CHECK-NEXT: idivq (%rsi,%rcx,8) ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq %gep = getelementptr i64, i64* null, i32 %V diff --git a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll index 7d05a869be893d..664d9ded1e0e18 100644 --- a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll +++ b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll @@ -7,8 +7,8 @@ define void @foo(i32* %p) !dbg !4 { bb: %tmp = load i32, i32* %p, align 4, !dbg !7 ; CHECK: $eax = MOV32rm killed {{.*}} $rdi, {{.*}} debug-location !7 :: (load 4 from %ir.p) - ; CHECK-NEXT: $ecx = MOV32rr killed $eax, implicit-def $rcx, debug-location !7 - ; CHECK-NEXT: $rdx = MOV64rr $rcx, debug-location !7 + ; CHECK-NEXT: $rax = KILL killed renamable $eax, debug-location !7 + ; CHECK-NEXT: $rcx = MOV64rr $rax, debug-location !7 switch i32 %tmp, label %bb7 [ i32 0, label %bb1 diff --git a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll index 5d7c83fa19d44a..7fffa21f0d24d3 100644 --- a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll +++ b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -1013,11 +1013,11 @@ define <16 x float> @test_load_nt16xfloat(<16 x float>* nocapture %ptr) { ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm2, %xmm1 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm2 +; AVX1-NEXT: vmovaps %xmm1, %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xfloat: @@ -1067,11 +1067,11 @@ define <8 x double> @test_load_nt8xdouble(<8 x double>* nocapture %ptr) { ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm2, %xmm1 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm2 +; AVX1-NEXT: vmovaps %xmm1, %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xdouble: @@ -1121,11 +1121,11 @@ define <64 x i8> @test_load_nt64xi8(<64 x i8>* nocapture %ptr) { ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm2, %xmm1 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm2 +; AVX1-NEXT: vmovaps %xmm1, %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt64xi8: @@ -1175,11 +1175,11 @@ define <32 x i16> @test_load_nt32xi16(<32 x i16>* nocapture %ptr) { ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm2, %xmm1 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm2 +; AVX1-NEXT: vmovaps %xmm1, %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt32xi16: @@ -1229,11 +1229,11 @@ define <16 x i32> @test_load_nt16xi32(<16 x i32>* nocapture %ptr) { ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm2, %xmm1 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm2 +; AVX1-NEXT: vmovaps %xmm1, %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xi32: @@ -1283,11 +1283,11 @@ define <8 x i64> @test_load_nt8xi64(<8 x i64>* nocapture %ptr) { ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm2, %xmm1 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm2 +; AVX1-NEXT: vmovaps %xmm1, %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xi64: diff --git a/llvm/test/CodeGen/X86/pr1489.ll b/llvm/test/CodeGen/X86/pr1489.ll index 6226ea6caf90f5..d1148eecb0da9c 100644 --- a/llvm/test/CodeGen/X86/pr1489.ll +++ b/llvm/test/CodeGen/X86/pr1489.ll @@ -16,9 +16,9 @@ define i32 @quux() nounwind { ; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E ; CHECK-NEXT: calll _lrintf ; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: setl %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: setl %al +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl @@ -42,9 +42,9 @@ define i32 @foo() nounwind { ; CHECK-NEXT: movl $-1236950581, (%eax) ## imm = 0xB645A1CB ; CHECK-NEXT: calll _lrint ; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: setl %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: setl %al +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl @@ -67,9 +67,9 @@ define i32 @bar() nounwind { ; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E ; CHECK-NEXT: calll _lrintf ; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: setl %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: setl %al +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl @@ -90,9 +90,9 @@ define i32 @baz() nounwind { ; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E ; CHECK-NEXT: calll _lrintf ; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: setl %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: setl %al +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/pr27591.ll b/llvm/test/CodeGen/X86/pr27591.ll index 97ad6814f19261..7455584ac698ae 100644 --- a/llvm/test/CodeGen/X86/pr27591.ll +++ b/llvm/test/CodeGen/X86/pr27591.ll @@ -9,9 +9,9 @@ define void @test1(i32 %x) #0 { ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: cmpl $0, %edi ; CHECK-NEXT: setne %al -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: andl $1, %ecx -; CHECK-NEXT: movl %ecx, %edi +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: movl %eax, %edi ; CHECK-NEXT: callq callee1 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq @@ -27,10 +27,10 @@ define void @test2(i32 %x) #0 { ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: cmpl $0, %edi ; CHECK-NEXT: setne %al -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: andl $1, %ecx -; CHECK-NEXT: negl %ecx -; CHECK-NEXT: movl %ecx, %edi +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: movl %eax, %edi ; CHECK-NEXT: callq callee2 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr30430.ll b/llvm/test/CodeGen/X86/pr30430.ll index 4d40aa09eeab1a..e524245daa1129 100644 --- a/llvm/test/CodeGen/X86/pr30430.ll +++ b/llvm/test/CodeGen/X86/pr30430.ll @@ -75,28 +75,28 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] ; CHECK-NEXT: # implicit-def: $ymm2 ; CHECK-NEXT: vmovaps %xmm1, %xmm2 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2 -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[2,3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[0] +; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] ; CHECK-NEXT: # implicit-def: $ymm3 -; CHECK-NEXT: vmovaps %xmm1, %xmm3 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm3 -; CHECK-NEXT: # implicit-def: $zmm24 -; CHECK-NEXT: vmovaps %zmm3, %zmm24 -; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm24, %zmm24 -; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovaps %xmm2, %xmm3 +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 +; CHECK-NEXT: # implicit-def: $zmm2 +; CHECK-NEXT: vmovaps %ymm1, %ymm2 +; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0 +; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0 ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/pr30813.ll b/llvm/test/CodeGen/X86/pr30813.ll index e3e096bda6c288..7266c5bd8d0154 100644 --- a/llvm/test/CodeGen/X86/pr30813.ll +++ b/llvm/test/CodeGen/X86/pr30813.ll @@ -1,9 +1,8 @@ ; RUN: llc -mtriple=x86_64-linux-gnu -O0 %s -o - | FileCheck %s ; CHECK: patatino: ; CHECK: .cfi_startproc -; CHECK: movzwl (%rax), [[REG0:%e[abcd]x]] -; CHECK: movl [[REG0]], %e[[REG1C:[abcd]]]x -; CHECK: movq %r[[REG1C]]x, ({{%r[abcd]x}}) +; CHECK: movzwl (%rax), %e[[REG0:[abcd]x]] +; CHECK: movq %r[[REG0]], ({{%r[abcd]x}}) ; CHECK: retq define void @patatino() { diff --git a/llvm/test/CodeGen/X86/pr32241.ll b/llvm/test/CodeGen/X86/pr32241.ll index 6d628e6962eda7..1f3d273dfc416f 100644 --- a/llvm/test/CodeGen/X86/pr32241.ll +++ b/llvm/test/CodeGen/X86/pr32241.ll @@ -23,14 +23,14 @@ define i32 @_Z3foov() { ; CHECK-NEXT: .LBB0_2: # %lor.end ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; CHECK-NEXT: cmpl %ecx, %edx +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: cmpl %eax, %ecx ; CHECK-NEXT: setl %al ; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: xorl $-1, %ecx -; CHECK-NEXT: cmpl $0, %ecx +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: xorl $-1, %eax +; CHECK-NEXT: cmpl $0, %eax ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: jne .LBB0_4 @@ -42,9 +42,9 @@ define i32 @_Z3foov() { ; CHECK-NEXT: .LBB0_4: # %lor.end5 ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: # kill: def $cx killed $cx killed $ecx -; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: addl $16, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 4 diff --git a/llvm/test/CodeGen/X86/pr32284.ll b/llvm/test/CodeGen/X86/pr32284.ll index a1041ab889c23b..533473663d73ba 100644 --- a/llvm/test/CodeGen/X86/pr32284.ll +++ b/llvm/test/CodeGen/X86/pr32284.ll @@ -10,28 +10,28 @@ define void @foo() { ; X86-O0-LABEL: foo: ; X86-O0: # %bb.0: # %entry ; X86-O0-NEXT: xorl %eax, %eax -; X86-O0-NEXT: movl %eax, %ecx -; X86-O0-NEXT: xorl %eax, %eax +; X86-O0-NEXT: # kill: def $rax killed $eax +; X86-O0-NEXT: xorl %ecx, %ecx ; X86-O0-NEXT: movzbl c, %edx -; X86-O0-NEXT: subl %edx, %eax -; X86-O0-NEXT: movslq %eax, %rsi -; X86-O0-NEXT: subq %rsi, %rcx -; X86-O0-NEXT: # kill: def $cl killed $cl killed $rcx -; X86-O0-NEXT: cmpb $0, %cl -; X86-O0-NEXT: setne %cl -; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movb %cl, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: subl %edx, %ecx +; X86-O0-NEXT: movslq %ecx, %rcx +; X86-O0-NEXT: subq %rcx, %rax +; X86-O0-NEXT: # kill: def $al killed $al killed $rax +; X86-O0-NEXT: cmpb $0, %al +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; X86-O0-NEXT: cmpb $0, c -; X86-O0-NEXT: setne %cl -; X86-O0-NEXT: xorb $-1, %cl -; X86-O0-NEXT: xorb $-1, %cl -; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %eax -; X86-O0-NEXT: movzbl c, %edx -; X86-O0-NEXT: cmpl %edx, %eax -; X86-O0-NEXT: setle %cl -; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %eax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: movzbl c, %ecx +; X86-O0-NEXT: cmpl %ecx, %eax +; X86-O0-NEXT: setle %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax ; X86-O0-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X86-O0-NEXT: retq ; @@ -63,13 +63,13 @@ define void @foo() { ; 686-O0-NEXT: xorb $-1, %al ; 686-O0-NEXT: xorb $-1, %al ; 686-O0-NEXT: andb $1, %al -; 686-O0-NEXT: movzbl %al, %ecx -; 686-O0-NEXT: movzbl c, %edx -; 686-O0-NEXT: cmpl %edx, %ecx +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movzbl c, %ecx +; 686-O0-NEXT: cmpl %ecx, %eax ; 686-O0-NEXT: setle %al ; 686-O0-NEXT: andb $1, %al -; 686-O0-NEXT: movzbl %al, %ecx -; 686-O0-NEXT: movl %ecx, (%esp) +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movl %eax, (%esp) ; 686-O0-NEXT: addl $8, %esp ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl @@ -126,33 +126,33 @@ define void @f1() { ; X86-O0-NEXT: movabsq $8381627093, %rcx # imm = 0x1F3957AD5 ; X86-O0-NEXT: addq %rcx, %rax ; X86-O0-NEXT: cmpq $0, %rax -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movb %dl, -{{[0-9]+}}(%rsp) -; X86-O0-NEXT: movl var_5, %esi -; X86-O0-NEXT: xorl $-1, %esi -; X86-O0-NEXT: cmpl $0, %esi -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: xorb $-1, %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %esi -; X86-O0-NEXT: movl %esi, %eax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: movl var_5, %eax +; X86-O0-NEXT: xorl $-1, %eax +; X86-O0-NEXT: cmpl $0, %eax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: movslq var_5, %rcx ; X86-O0-NEXT: addq $7093, %rcx # imm = 0x1BB5 ; X86-O0-NEXT: cmpq %rcx, %rax -; X86-O0-NEXT: setg %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %esi -; X86-O0-NEXT: movl %esi, %eax +; X86-O0-NEXT: setg %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: movq %rax, var_57 -; X86-O0-NEXT: movl var_5, %esi -; X86-O0-NEXT: xorl $-1, %esi -; X86-O0-NEXT: cmpl $0, %esi -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: xorb $-1, %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %esi -; X86-O0-NEXT: movl %esi, %eax +; X86-O0-NEXT: movl var_5, %eax +; X86-O0-NEXT: xorl $-1, %eax +; X86-O0-NEXT: cmpl $0, %eax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: movq %rax, _ZN8struct_210member_2_0E ; X86-O0-NEXT: retq ; @@ -178,20 +178,17 @@ define void @f1() { ; ; 686-O0-LABEL: f1: ; 686-O0: # %bb.0: # %entry -; 686-O0-NEXT: pushl %ebp -; 686-O0-NEXT: .cfi_def_cfa_offset 8 ; 686-O0-NEXT: pushl %ebx -; 686-O0-NEXT: .cfi_def_cfa_offset 12 +; 686-O0-NEXT: .cfi_def_cfa_offset 8 ; 686-O0-NEXT: pushl %edi -; 686-O0-NEXT: .cfi_def_cfa_offset 16 +; 686-O0-NEXT: .cfi_def_cfa_offset 12 ; 686-O0-NEXT: pushl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 20 +; 686-O0-NEXT: .cfi_def_cfa_offset 16 ; 686-O0-NEXT: subl $1, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 21 -; 686-O0-NEXT: .cfi_offset %esi, -20 -; 686-O0-NEXT: .cfi_offset %edi, -16 -; 686-O0-NEXT: .cfi_offset %ebx, -12 -; 686-O0-NEXT: .cfi_offset %ebp, -8 +; 686-O0-NEXT: .cfi_def_cfa_offset 17 +; 686-O0-NEXT: .cfi_offset %esi, -16 +; 686-O0-NEXT: .cfi_offset %edi, -12 +; 686-O0-NEXT: .cfi_offset %ebx, -8 ; 686-O0-NEXT: movl var_5, %eax ; 686-O0-NEXT: movl %eax, %ecx ; 686-O0-NEXT: sarl $31, %ecx @@ -217,18 +214,16 @@ define void @f1() { ; 686-O0-NEXT: movl var_5, %edi ; 686-O0-NEXT: subl $-1, %edi ; 686-O0-NEXT: sete %bl -; 686-O0-NEXT: movzbl %bl, %ebp -; 686-O0-NEXT: movl %ebp, _ZN8struct_210member_2_0E +; 686-O0-NEXT: movzbl %bl, %ebx +; 686-O0-NEXT: movl %ebx, _ZN8struct_210member_2_0E ; 686-O0-NEXT: movl $0, _ZN8struct_210member_2_0E+4 ; 686-O0-NEXT: addl $1, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 20 -; 686-O0-NEXT: popl %esi ; 686-O0-NEXT: .cfi_def_cfa_offset 16 -; 686-O0-NEXT: popl %edi +; 686-O0-NEXT: popl %esi ; 686-O0-NEXT: .cfi_def_cfa_offset 12 -; 686-O0-NEXT: popl %ebx +; 686-O0-NEXT: popl %edi ; 686-O0-NEXT: .cfi_def_cfa_offset 8 -; 686-O0-NEXT: popl %ebp +; 686-O0-NEXT: popl %ebx ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl ; @@ -310,25 +305,25 @@ define void @f2() { ; X86-O0-NEXT: setne %cl ; X86-O0-NEXT: xorb $-1, %cl ; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %edx -; X86-O0-NEXT: xorl %edx, %eax +; X86-O0-NEXT: movzbl %cl, %ecx +; X86-O0-NEXT: xorl %ecx, %eax ; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax ; X86-O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X86-O0-NEXT: movzbl var_7, %edx -; X86-O0-NEXT: # kill: def $dx killed $dx killed $edx -; X86-O0-NEXT: cmpw $0, %dx -; X86-O0-NEXT: setne %cl -; X86-O0-NEXT: xorb $-1, %cl -; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %esi -; X86-O0-NEXT: movzbl var_7, %edi -; X86-O0-NEXT: cmpl %edi, %esi -; X86-O0-NEXT: sete %cl -; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %esi -; X86-O0-NEXT: # kill: def $si killed $si killed $esi -; X86-O0-NEXT: # implicit-def: $r8 -; X86-O0-NEXT: movw %si, (%r8) +; X86-O0-NEXT: movzbl var_7, %eax +; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax +; X86-O0-NEXT: cmpw $0, %ax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: movzbl var_7, %ecx +; X86-O0-NEXT: cmpl %ecx, %eax +; X86-O0-NEXT: sete %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax +; X86-O0-NEXT: # implicit-def: $rcx +; X86-O0-NEXT: movw %ax, (%rcx) ; X86-O0-NEXT: retq ; ; X64-LABEL: f2: @@ -350,43 +345,33 @@ define void @f2() { ; ; 686-O0-LABEL: f2: ; 686-O0: # %bb.0: # %entry -; 686-O0-NEXT: pushl %edi -; 686-O0-NEXT: .cfi_def_cfa_offset 8 -; 686-O0-NEXT: pushl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 12 ; 686-O0-NEXT: subl $2, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 14 -; 686-O0-NEXT: .cfi_offset %esi, -12 -; 686-O0-NEXT: .cfi_offset %edi, -8 +; 686-O0-NEXT: .cfi_def_cfa_offset 6 ; 686-O0-NEXT: movzbl var_7, %eax ; 686-O0-NEXT: cmpb $0, var_7 ; 686-O0-NEXT: setne %cl ; 686-O0-NEXT: xorb $-1, %cl ; 686-O0-NEXT: andb $1, %cl -; 686-O0-NEXT: movzbl %cl, %edx -; 686-O0-NEXT: xorl %edx, %eax +; 686-O0-NEXT: movzbl %cl, %ecx +; 686-O0-NEXT: xorl %ecx, %eax ; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax ; 686-O0-NEXT: movw %ax, (%esp) -; 686-O0-NEXT: movzbl var_7, %edx -; 686-O0-NEXT: # kill: def $dx killed $dx killed $edx -; 686-O0-NEXT: cmpw $0, %dx -; 686-O0-NEXT: setne %cl -; 686-O0-NEXT: xorb $-1, %cl -; 686-O0-NEXT: andb $1, %cl -; 686-O0-NEXT: movzbl %cl, %esi -; 686-O0-NEXT: movzbl var_7, %edi -; 686-O0-NEXT: cmpl %edi, %esi -; 686-O0-NEXT: sete %cl -; 686-O0-NEXT: andb $1, %cl -; 686-O0-NEXT: movzbl %cl, %esi -; 686-O0-NEXT: # kill: def $si killed $si killed $esi -; 686-O0-NEXT: # implicit-def: $edi -; 686-O0-NEXT: movw %si, (%edi) +; 686-O0-NEXT: movzbl var_7, %eax +; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax +; 686-O0-NEXT: cmpw $0, %ax +; 686-O0-NEXT: setne %al +; 686-O0-NEXT: xorb $-1, %al +; 686-O0-NEXT: andb $1, %al +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movzbl var_7, %ecx +; 686-O0-NEXT: cmpl %ecx, %eax +; 686-O0-NEXT: sete %al +; 686-O0-NEXT: andb $1, %al +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax +; 686-O0-NEXT: # implicit-def: $ecx +; 686-O0-NEXT: movw %ax, (%ecx) ; 686-O0-NEXT: addl $2, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 12 -; 686-O0-NEXT: popl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 8 -; 686-O0-NEXT: popl %edi ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl ; @@ -446,35 +431,35 @@ define void @f3() #0 { ; X86-O0-NEXT: movl var_13, %eax ; X86-O0-NEXT: xorl $-1, %eax ; X86-O0-NEXT: movl %eax, %eax -; X86-O0-NEXT: movl %eax, %ecx +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: cmpl $0, var_13 -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: xorb $-1, %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %eax -; X86-O0-NEXT: movl %eax, %esi -; X86-O0-NEXT: movl var_13, %eax -; X86-O0-NEXT: xorl $-1, %eax -; X86-O0-NEXT: xorl var_16, %eax -; X86-O0-NEXT: movl %eax, %eax -; X86-O0-NEXT: movl %eax, %edi -; X86-O0-NEXT: andq %rdi, %rsi -; X86-O0-NEXT: orq %rsi, %rcx -; X86-O0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: setne %cl +; X86-O0-NEXT: xorb $-1, %cl +; X86-O0-NEXT: andb $1, %cl +; X86-O0-NEXT: movzbl %cl, %ecx +; X86-O0-NEXT: # kill: def $rcx killed $ecx +; X86-O0-NEXT: movl var_13, %edx +; X86-O0-NEXT: xorl $-1, %edx +; X86-O0-NEXT: xorl var_16, %edx +; X86-O0-NEXT: movl %edx, %edx +; X86-O0-NEXT: # kill: def $rdx killed $edx +; X86-O0-NEXT: andq %rdx, %rcx +; X86-O0-NEXT: orq %rcx, %rax +; X86-O0-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; X86-O0-NEXT: movl var_13, %eax ; X86-O0-NEXT: xorl $-1, %eax ; X86-O0-NEXT: movl %eax, %eax -; X86-O0-NEXT: movl %eax, %ecx +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: cmpl $0, var_13 -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: xorb $-1, %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %eax -; X86-O0-NEXT: movl %eax, %esi -; X86-O0-NEXT: andq $0, %rsi -; X86-O0-NEXT: orq %rsi, %rcx -; X86-O0-NEXT: # kill: def $ecx killed $ecx killed $rcx -; X86-O0-NEXT: movl %ecx, var_46 +; X86-O0-NEXT: setne %cl +; X86-O0-NEXT: xorb $-1, %cl +; X86-O0-NEXT: andb $1, %cl +; X86-O0-NEXT: movzbl %cl, %ecx +; X86-O0-NEXT: # kill: def $rcx killed $ecx +; X86-O0-NEXT: andq $0, %rcx +; X86-O0-NEXT: orq %rcx, %rax +; X86-O0-NEXT: # kill: def $eax killed $eax killed $rax +; X86-O0-NEXT: movl %eax, var_46 ; X86-O0-NEXT: retq ; ; X64-LABEL: f3: @@ -499,31 +484,28 @@ define void @f3() #0 { ; 686-O0-NEXT: .cfi_offset %ebp, -8 ; 686-O0-NEXT: movl %esp, %ebp ; 686-O0-NEXT: .cfi_def_cfa_register %ebp -; 686-O0-NEXT: pushl %edi ; 686-O0-NEXT: pushl %esi ; 686-O0-NEXT: andl $-8, %esp -; 686-O0-NEXT: subl $8, %esp -; 686-O0-NEXT: .cfi_offset %esi, -16 -; 686-O0-NEXT: .cfi_offset %edi, -12 +; 686-O0-NEXT: subl $16, %esp +; 686-O0-NEXT: .cfi_offset %esi, -12 ; 686-O0-NEXT: movl var_13, %eax ; 686-O0-NEXT: movl %eax, %ecx ; 686-O0-NEXT: notl %ecx ; 686-O0-NEXT: testl %eax, %eax -; 686-O0-NEXT: sete %dl -; 686-O0-NEXT: movzbl %dl, %eax -; 686-O0-NEXT: movl var_16, %esi -; 686-O0-NEXT: movl %ecx, %edi -; 686-O0-NEXT: xorl %esi, %edi -; 686-O0-NEXT: andl %edi, %eax +; 686-O0-NEXT: sete %al +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movl var_16, %edx +; 686-O0-NEXT: movl %ecx, %esi +; 686-O0-NEXT: xorl %edx, %esi +; 686-O0-NEXT: andl %esi, %eax ; 686-O0-NEXT: orl %eax, %ecx ; 686-O0-NEXT: movl %ecx, (%esp) ; 686-O0-NEXT: movl $0, {{[0-9]+}}(%esp) ; 686-O0-NEXT: movl var_13, %eax ; 686-O0-NEXT: notl %eax ; 686-O0-NEXT: movl %eax, var_46 -; 686-O0-NEXT: leal -8(%ebp), %esp +; 686-O0-NEXT: leal -4(%ebp), %esp ; 686-O0-NEXT: popl %esi -; 686-O0-NEXT: popl %edi ; 686-O0-NEXT: popl %ebp ; 686-O0-NEXT: .cfi_def_cfa %esp, 4 ; 686-O0-NEXT: retl diff --git a/llvm/test/CodeGen/X86/pr32340.ll b/llvm/test/CodeGen/X86/pr32340.ll index 1e428ac7d83a6b..98685b959f642f 100644 --- a/llvm/test/CodeGen/X86/pr32340.ll +++ b/llvm/test/CodeGen/X86/pr32340.ll @@ -14,37 +14,37 @@ define void @foo() { ; X64-LABEL: foo: ; X64: # %bb.0: # %entry ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: movl %eax, %ecx +; X64-NEXT: # kill: def $rax killed $eax ; X64-NEXT: movw $0, var_825 -; X64-NEXT: movzwl var_32, %eax +; X64-NEXT: movzwl var_32, %ecx ; X64-NEXT: movzwl var_901, %edx -; X64-NEXT: movl %eax, %esi +; X64-NEXT: movl %ecx, %esi ; X64-NEXT: xorl %edx, %esi -; X64-NEXT: movl %eax, %edx +; X64-NEXT: movl %ecx, %edx ; X64-NEXT: xorl %esi, %edx -; X64-NEXT: addl %eax, %edx -; X64-NEXT: movslq %edx, %rdi -; X64-NEXT: movq %rdi, var_826 -; X64-NEXT: movzwl var_32, %eax -; X64-NEXT: movl %eax, %edi -; X64-NEXT: movzwl var_901, %eax -; X64-NEXT: xorl $51981, %eax # imm = 0xCB0D -; X64-NEXT: movslq %eax, %r8 -; X64-NEXT: movabsq $-1142377792914660288, %r9 # imm = 0xF02575732E06E440 -; X64-NEXT: xorq %r9, %r8 -; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: xorq %r8, %r9 -; X64-NEXT: xorq $-1, %r9 -; X64-NEXT: xorq %r9, %rdi -; X64-NEXT: movq %rdi, %r8 -; X64-NEXT: orq var_57, %r8 -; X64-NEXT: orq %r8, %rdi -; X64-NEXT: # kill: def $di killed $di killed $rdi -; X64-NEXT: movw %di, var_900 -; X64-NEXT: cmpq var_28, %rcx -; X64-NEXT: setne %r10b -; X64-NEXT: andb $1, %r10b -; X64-NEXT: movzbl %r10b, %eax +; X64-NEXT: addl %ecx, %edx +; X64-NEXT: movslq %edx, %rcx +; X64-NEXT: movq %rcx, var_826 +; X64-NEXT: movzwl var_32, %ecx +; X64-NEXT: # kill: def $rcx killed $ecx +; X64-NEXT: movzwl var_901, %edx +; X64-NEXT: xorl $51981, %edx # imm = 0xCB0D +; X64-NEXT: movslq %edx, %rdx +; X64-NEXT: movabsq $-1142377792914660288, %rsi # imm = 0xF02575732E06E440 +; X64-NEXT: xorq %rsi, %rdx +; X64-NEXT: movq %rcx, %rsi +; X64-NEXT: xorq %rdx, %rsi +; X64-NEXT: xorq $-1, %rsi +; X64-NEXT: xorq %rsi, %rcx +; X64-NEXT: movq %rcx, %rdx +; X64-NEXT: orq var_57, %rdx +; X64-NEXT: orq %rdx, %rcx +; X64-NEXT: # kill: def $cx killed $cx killed $rcx +; X64-NEXT: movw %cx, var_900 +; X64-NEXT: cmpq var_28, %rax +; X64-NEXT: setne %al +; X64-NEXT: andb $1, %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: movw %ax, var_827 ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr32345.ll b/llvm/test/CodeGen/X86/pr32345.ll index d5f7fde77f6d2a..165e0292d46488 100644 --- a/llvm/test/CodeGen/X86/pr32345.ll +++ b/llvm/test/CodeGen/X86/pr32345.ll @@ -15,23 +15,23 @@ define void @foo() { ; X640-NEXT: xorl %ecx, %eax ; X640-NEXT: movzwl var_27, %ecx ; X640-NEXT: xorl %ecx, %eax -; X640-NEXT: movslq %eax, %rdx -; X640-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; X640-NEXT: cltq +; X640-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; X640-NEXT: movzwl var_22, %eax ; X640-NEXT: movzwl var_27, %ecx ; X640-NEXT: xorl %ecx, %eax ; X640-NEXT: movzwl var_27, %ecx ; X640-NEXT: xorl %ecx, %eax -; X640-NEXT: movslq %eax, %rdx -; X640-NEXT: movzwl var_27, %eax -; X640-NEXT: subl $16610, %eax # imm = 0x40E2 -; X640-NEXT: movl %eax, %eax -; X640-NEXT: movl %eax, %ecx +; X640-NEXT: cltq +; X640-NEXT: movzwl var_27, %ecx +; X640-NEXT: subl $16610, %ecx # imm = 0x40E2 +; X640-NEXT: movl %ecx, %ecx +; X640-NEXT: # kill: def $rcx killed $ecx ; X640-NEXT: # kill: def $cl killed $rcx -; X640-NEXT: sarq %cl, %rdx -; X640-NEXT: # kill: def $dl killed $dl killed $rdx -; X640-NEXT: # implicit-def: $rsi -; X640-NEXT: movb %dl, (%rsi) +; X640-NEXT: sarq %cl, %rax +; X640-NEXT: # kill: def $al killed $al killed $rax +; X640-NEXT: # implicit-def: $rcx +; X640-NEXT: movb %al, (%rcx) ; X640-NEXT: retq ; ; 6860-LABEL: foo: @@ -41,43 +41,37 @@ define void @foo() { ; 6860-NEXT: .cfi_offset %ebp, -8 ; 6860-NEXT: movl %esp, %ebp ; 6860-NEXT: .cfi_def_cfa_register %ebp -; 6860-NEXT: pushl %ebx -; 6860-NEXT: pushl %edi -; 6860-NEXT: pushl %esi ; 6860-NEXT: andl $-8, %esp -; 6860-NEXT: subl $32, %esp -; 6860-NEXT: .cfi_offset %esi, -20 -; 6860-NEXT: .cfi_offset %edi, -16 -; 6860-NEXT: .cfi_offset %ebx, -12 +; 6860-NEXT: subl $24, %esp ; 6860-NEXT: movw var_22, %ax ; 6860-NEXT: movzwl var_27, %ecx ; 6860-NEXT: movw %cx, %dx ; 6860-NEXT: xorw %dx, %ax -; 6860-NEXT: # implicit-def: $esi -; 6860-NEXT: movw %ax, %si -; 6860-NEXT: xorl %ecx, %esi -; 6860-NEXT: # kill: def $si killed $si killed $esi -; 6860-NEXT: movzwl %si, %ecx -; 6860-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; 6860-NEXT: # implicit-def: $edx +; 6860-NEXT: movw %ax, %dx +; 6860-NEXT: xorl %ecx, %edx +; 6860-NEXT: # kill: def $dx killed $dx killed $edx +; 6860-NEXT: movzwl %dx, %eax +; 6860-NEXT: movl %eax, {{[0-9]+}}(%esp) ; 6860-NEXT: movl $0, {{[0-9]+}}(%esp) ; 6860-NEXT: movw var_22, %ax ; 6860-NEXT: movzwl var_27, %ecx ; 6860-NEXT: movw %cx, %dx ; 6860-NEXT: xorw %dx, %ax -; 6860-NEXT: # implicit-def: $edi -; 6860-NEXT: movw %ax, %di -; 6860-NEXT: xorl %ecx, %edi -; 6860-NEXT: # kill: def $di killed $di killed $edi -; 6860-NEXT: movzwl %di, %ebx +; 6860-NEXT: # implicit-def: $edx +; 6860-NEXT: movw %ax, %dx +; 6860-NEXT: xorl %ecx, %edx +; 6860-NEXT: # kill: def $dx killed $dx killed $edx +; 6860-NEXT: movzwl %dx, %eax ; 6860-NEXT: # kill: def $cl killed $cl killed $ecx ; 6860-NEXT: addb $30, %cl -; 6860-NEXT: xorl %eax, %eax +; 6860-NEXT: xorl %edx, %edx ; 6860-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; 6860-NEXT: shrdl %cl, %eax, %ebx +; 6860-NEXT: shrdl %cl, %edx, %eax ; 6860-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload ; 6860-NEXT: testb $32, %cl -; 6860-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; 6860-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: jne .LBB0_2 ; 6860-NEXT: # %bb.1: # %bb ; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -87,10 +81,7 @@ define void @foo() { ; 6860-NEXT: # kill: def $al killed $al killed $eax ; 6860-NEXT: # implicit-def: $ecx ; 6860-NEXT: movb %al, (%ecx) -; 6860-NEXT: leal -12(%ebp), %esp -; 6860-NEXT: popl %esi -; 6860-NEXT: popl %edi -; 6860-NEXT: popl %ebx +; 6860-NEXT: movl %ebp, %esp ; 6860-NEXT: popl %ebp ; 6860-NEXT: .cfi_def_cfa %esp, 4 ; 6860-NEXT: retl diff --git a/llvm/test/CodeGen/X86/pr32451.ll b/llvm/test/CodeGen/X86/pr32451.ll index 4754d8e4cf6cb6..3b1997234ce555 100644 --- a/llvm/test/CodeGen/X86/pr32451.ll +++ b/llvm/test/CodeGen/X86/pr32451.ll @@ -9,29 +9,24 @@ target triple = "x86_64-unknown-linux-gnu" define i8** @japi1_convert_690(i8**, i8***, i32) { ; CHECK-LABEL: japi1_convert_690: ; CHECK: # %bb.0: # %top -; CHECK-NEXT: pushl %ebx -; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: subl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: .cfi_def_cfa_offset 20 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll julia.gc_root_decl -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll jl_get_ptls_states -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl 4(%ecx), %edx -; CHECK-NEXT: movb (%edx), %bl -; CHECK-NEXT: andb $1, %bl -; CHECK-NEXT: movzbl %bl, %edx +; CHECK-NEXT: movb (%edx), %dl +; CHECK-NEXT: andb $1, %dl +; CHECK-NEXT: movzbl %dl, %edx ; CHECK-NEXT: movl %edx, (%esp) -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll jl_box_int32 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %eax, (%ecx) ; CHECK-NEXT: addl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: popl %ebx ; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl top: diff --git a/llvm/test/CodeGen/X86/pr34592.ll b/llvm/test/CodeGen/X86/pr34592.ll index 0f73036a4c6c91..25b068c8fad6f7 100644 --- a/llvm/test/CodeGen/X86/pr34592.ll +++ b/llvm/test/CodeGen/X86/pr34592.ll @@ -10,7 +10,7 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1 ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: andq $-32, %rsp -; CHECK-NEXT: subq $192, %rsp +; CHECK-NEXT: subq $160, %rsp ; CHECK-NEXT: vmovaps 240(%rbp), %ymm8 ; CHECK-NEXT: vmovaps 208(%rbp), %ymm9 ; CHECK-NEXT: vmovaps 176(%rbp), %ymm10 @@ -27,14 +27,14 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1 ; CHECK-NEXT: vpalignr {{.*#+}} ymm2 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23] ; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,3,2,0] ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5],ymm2[6,7] -; CHECK-NEXT: vmovaps %xmm7, %xmm9 -; CHECK-NEXT: vpslldq {{.*#+}} xmm9 = zero,zero,zero,zero,zero,zero,zero,zero,xmm9[0,1,2,3,4,5,6,7] -; CHECK-NEXT: # implicit-def: $ymm2 -; CHECK-NEXT: vmovaps %xmm9, %xmm2 -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Reload -; CHECK-NEXT: vpalignr {{.*#+}} ymm9 = ymm11[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm11[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] -; CHECK-NEXT: vpermq {{.*#+}} ymm9 = ymm9[0,1,0,3] -; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm9[4,5,6,7] +; CHECK-NEXT: vmovaps %xmm7, %xmm2 +; CHECK-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] +; CHECK-NEXT: # implicit-def: $ymm9 +; CHECK-NEXT: vmovaps %xmm2, %xmm9 +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload +; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] +; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3] +; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm11[4,5,6,7] ; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7] ; CHECK-NEXT: vpermq {{.*#+}} ymm8 = ymm8[2,1,1,3] ; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5] @@ -43,14 +43,11 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1 ; CHECK-NEXT: vmovq {{.*#+}} xmm7 = xmm7[0],zero ; CHECK-NEXT: # implicit-def: $ymm8 ; CHECK-NEXT: vmovaps %xmm7, %xmm8 -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm6 = ymm8[0,1],ymm6[0,1] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm8[0,1],ymm6[0,1] ; CHECK-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps %ymm5, %ymm1 -; CHECK-NEXT: vmovaps %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm6, %ymm2 -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm5 # 32-byte Reload ; CHECK-NEXT: vmovaps %ymm3, (%rsp) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm5, %ymm3 +; CHECK-NEXT: vmovaps %ymm9, %ymm3 ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 diff --git a/llvm/test/CodeGen/X86/pr39733.ll b/llvm/test/CodeGen/X86/pr39733.ll index 4c7153852d22cd..75f9dc51b85eb5 100644 --- a/llvm/test/CodeGen/X86/pr39733.ll +++ b/llvm/test/CodeGen/X86/pr39733.ll @@ -23,8 +23,8 @@ define void @test55() { ; CHECK-NEXT: vmovaps %xmm1, %xmm2 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2 -; CHECK-NEXT: vmovdqa %ymm2, (%rsp) +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; CHECK-NEXT: vmovdqa %ymm0, (%rsp) ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 diff --git a/llvm/test/CodeGen/X86/pr44749.ll b/llvm/test/CodeGen/X86/pr44749.ll index d465009c7c38ae..1012d8c723b135 100644 --- a/llvm/test/CodeGen/X86/pr44749.ll +++ b/llvm/test/CodeGen/X86/pr44749.ll @@ -14,20 +14,22 @@ define i32 @a() { ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: callq _b ; CHECK-NEXT: cvtsi2sd %eax, %xmm0 -; CHECK-NEXT: movq _calloc@{{.*}}(%rip), %rcx -; CHECK-NEXT: subq $-1, %rcx -; CHECK-NEXT: setne %dl -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi +; CHECK-NEXT: movq _calloc@{{.*}}(%rip), %rax +; CHECK-NEXT: subq $-1, %rax +; CHECK-NEXT: setne %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: ## kill: def $rcx killed $ecx +; CHECK-NEXT: leaq {{.*}}(%rip), %rdx ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: setae %dl -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi +; CHECK-NEXT: setae %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: ## kill: def $rcx killed $ecx +; CHECK-NEXT: leaq {{.*}}(%rip), %rdx ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: cvttsd2si %xmm0, %eax +; CHECK-NEXT: cvttsd2si %xmm0, %ecx +; CHECK-NEXT: movq %rax, (%rsp) ## 8-byte Spill +; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir index 0fe9f60897fd14..2821f00940ecf6 100644 --- a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir +++ b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir @@ -23,15 +23,15 @@ body: | ; CHECK: successors: %bb.3(0x80000000) ; CHECK: $rax = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) ; CHECK: renamable $ecx = MOV32r0 implicit-def $eflags - ; CHECK: renamable $rdx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit + ; CHECK: renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit ; CHECK: MOV64mi32 killed renamable $rax, 1, $noreg, 0, $noreg, 0 :: (volatile store 8) - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rdx :: (store 8 into %stack.0) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.0) ; CHECK: bb.3: ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) ; CHECK: renamable $ecx = MOV32r0 implicit-def dead $eflags - ; CHECK: renamable $rdx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit - ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rdx :: (store 8 into %stack.1) + ; CHECK: renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit + ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.1) ; CHECK: JMP64r killed renamable $rax bb.0: liveins: $edi, $rsi diff --git a/llvm/test/CodeGen/X86/swift-return.ll b/llvm/test/CodeGen/X86/swift-return.ll index c62e92f2cac551..4934419055acd7 100644 --- a/llvm/test/CodeGen/X86/swift-return.ll +++ b/llvm/test/CodeGen/X86/swift-return.ll @@ -28,11 +28,10 @@ define i16 @test(i32 %key) { ; CHECK-O0-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-O0-NEXT: callq gen -; CHECK-O0-NEXT: movswl %ax, %ecx -; CHECK-O0-NEXT: movsbl %dl, %esi -; CHECK-O0-NEXT: addl %esi, %ecx -; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx -; CHECK-O0-NEXT: movw %cx, %ax +; CHECK-O0-NEXT: cwtl +; CHECK-O0-NEXT: movsbl %dl, %ecx +; CHECK-O0-NEXT: addl %ecx, %eax +; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-O0-NEXT: popq %rcx ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq @@ -80,16 +79,16 @@ define i32 @test2(i32 %key) #0 { ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-O0-NEXT: movq %rsp, %rax ; CHECK-O0-NEXT: callq gen2 +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edx -; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %esi -; CHECK-O0-NEXT: movl (%rsp), %edi -; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %r8d -; CHECK-O0-NEXT: addl %r8d, %edi -; CHECK-O0-NEXT: addl %esi, %edi -; CHECK-O0-NEXT: addl %edx, %edi -; CHECK-O0-NEXT: addl %ecx, %edi -; CHECK-O0-NEXT: movl %edi, %eax +; CHECK-O0-NEXT: movl (%rsp), %esi +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-O0-NEXT: addl %edi, %esi +; CHECK-O0-NEXT: addl %edx, %esi +; CHECK-O0-NEXT: addl %ecx, %esi +; CHECK-O0-NEXT: addl %eax, %esi +; CHECK-O0-NEXT: movl %esi, %eax ; CHECK-O0-NEXT: addq $24, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq @@ -264,17 +263,17 @@ define void @consume_i1_ret() { ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O0-NEXT: callq produce_i1_ret ; CHECK-O0-NEXT: andb $1, %al -; CHECK-O0-NEXT: movzbl %al, %esi -; CHECK-O0-NEXT: movl %esi, var +; CHECK-O0-NEXT: movzbl %al, %eax +; CHECK-O0-NEXT: movl %eax, var ; CHECK-O0-NEXT: andb $1, %dl -; CHECK-O0-NEXT: movzbl %dl, %esi -; CHECK-O0-NEXT: movl %esi, var +; CHECK-O0-NEXT: movzbl %dl, %eax +; CHECK-O0-NEXT: movl %eax, var ; CHECK-O0-NEXT: andb $1, %cl -; CHECK-O0-NEXT: movzbl %cl, %esi -; CHECK-O0-NEXT: movl %esi, var +; CHECK-O0-NEXT: movzbl %cl, %eax +; CHECK-O0-NEXT: movl %eax, var ; CHECK-O0-NEXT: andb $1, %r8b -; CHECK-O0-NEXT: movzbl %r8b, %esi -; CHECK-O0-NEXT: movl %esi, var +; CHECK-O0-NEXT: movzbl %r8b, %eax +; CHECK-O0-NEXT: movl %eax, var ; CHECK-O0-NEXT: popq %rax ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq diff --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll index 1388c61c18984a..1afae31b2b8d23 100644 --- a/llvm/test/CodeGen/X86/swifterror.ll +++ b/llvm/test/CodeGen/X86/swifterror.ll @@ -790,8 +790,8 @@ a: ; CHECK-O0-LABEL: testAssign4 ; CHECK-O0: callq _foo2 ; CHECK-O0: xorl %eax, %eax -; CHECK-O0: movl %eax, %ecx -; CHECK-O0: movq %rcx, [[SLOT:[-a-z0-9\(\)\%]*]] +; CHECK-O0: ## kill: def $rax killed $eax +; CHECK-O0: movq %rax, [[SLOT:[-a-z0-9\(\)\%]*]] ; CHECK-O0: movq [[SLOT]], %rax ; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]] ; CHECK-O0: movq [[SLOT2]], %r12 diff --git a/llvm/test/DebugInfo/X86/op_deref.ll b/llvm/test/DebugInfo/X86/op_deref.ll index 5de9976d6de2a2..1b49dc554f7efa 100644 --- a/llvm/test/DebugInfo/X86/op_deref.ll +++ b/llvm/test/DebugInfo/X86/op_deref.ll @@ -6,10 +6,10 @@ ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=DWARF3 ; DWARF4: DW_AT_location [DW_FORM_sec_offset] (0x00000000 -; DWARF4-NEXT: {{.*}}: DW_OP_breg1 RDX+0, DW_OP_deref +; DWARF4-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref ; DWARF3: DW_AT_location [DW_FORM_data4] (0x00000000 -; DWARF3-NEXT: {{.*}}: DW_OP_breg1 RDX+0, DW_OP_deref +; DWARF3-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000067] = "vla") @@ -17,8 +17,8 @@ ; Check the DEBUG_VALUE comments for good measure. ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK ; vla should have a register-indirect address at one point. -; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rdx+0] -; ASM-CHECK: DW_OP_breg1 +; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rcx+0] +; ASM-CHECK: DW_OP_breg2 ; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s --check-prefix=PRETTY-PRINT ; PRETTY-PRINT: DIExpression(DW_OP_deref) diff --git a/llvm/utils/gn/build/toolchain/BUILD.gn b/llvm/utils/gn/build/toolchain/BUILD.gn index 453aab57252507..86e95d3de188c8 100644 --- a/llvm/utils/gn/build/toolchain/BUILD.gn +++ b/llvm/utils/gn/build/toolchain/BUILD.gn @@ -55,10 +55,10 @@ template("unix_toolchain") { tool("solink") { outfile = "{{output_dir}}/{{target_output_name}}{{output_extension}}" if (current_os == "mac") { - command = "$ld -shared {{ldflags}} -o $outfile {{libs}} {{inputs}}" + command = "$ld -shared {{ldflags}} -o $outfile {{inputs}} {{libs}}" default_output_extension = ".dylib" } else { - command = "$ld -shared {{ldflags}} -Wl,-z,defs -Wl,-soname,{{target_output_name}}{{output_extension}} -o $outfile {{libs}} {{inputs}}" + command = "$ld -shared {{ldflags}} -Wl,-z,defs -Wl,-soname,{{target_output_name}}{{output_extension}} -o $outfile {{inputs}} {{libs}}" default_output_extension = ".so" } description = "SOLINK $outfile" @@ -71,10 +71,10 @@ template("unix_toolchain") { tool("solink_module") { outfile = "{{output_dir}}/{{target_output_name}}{{output_extension}}" if (current_os == "mac") { - command = "$ld -shared {{ldflags}} -Wl,-flat_namespace -Wl,-undefined,suppress -o $outfile {{libs}} {{inputs}}" + command = "$ld -shared {{ldflags}} -Wl,-flat_namespace -Wl,-undefined,suppress -o $outfile {{inputs}} {{libs}}" default_output_extension = ".dylib" } else { - command = "$ld -shared {{ldflags}} -Wl,-soname,{{target_output_name}}{{output_extension}} -o $outfile {{libs}} {{inputs}}" + command = "$ld -shared {{ldflags}} -Wl,-soname,{{target_output_name}}{{output_extension}} -o $outfile {{inputs}} {{libs}}" default_output_extension = ".so" } description = "SOLINK $outfile" @@ -86,9 +86,9 @@ template("unix_toolchain") { tool("link") { outfile = "{{output_dir}}/{{target_output_name}}{{output_extension}}" if (current_os == "mac") { - command = "$ld {{ldflags}} -o $outfile {{libs}} {{inputs}}" + command = "$ld {{ldflags}} -o $outfile {{inputs}} {{libs}}" } else { - command = "$ld {{ldflags}} -o $outfile {{libs}} -Wl,--start-group {{inputs}} -Wl,--end-group" + command = "$ld {{ldflags}} -o $outfile -Wl,--start-group {{inputs}} -Wl,--end-group {{libs}}" } description = "LINK $outfile" outputs = [ outfile ] @@ -245,7 +245,7 @@ toolchain("win") { dllfile = "$outprefix{{output_extension}}" libfile = "$outprefix.lib" pdbfile = "$outprefix.pdb" - command = "$link /nologo /dll {{ldflags}} /out:$dllfile /implib:$libfile /pdb:$pdbfile {{libs}} {{inputs}}" + command = "$link /nologo /dll {{ldflags}} /out:$dllfile /implib:$libfile /pdb:$pdbfile {{inputs}} {{libs}} " description = "LINK $dllfile" link_output = libfile depend_output = libfile @@ -272,7 +272,7 @@ toolchain("win") { outprefix = "{{output_dir}}/{{target_output_name}}" dllfile = "$outprefix{{output_extension}}" pdbfile = "$outprefix.pdb" - command = "$link /nologo /dll {{ldflags}} /out:$dllfile /pdb:$pdbfile {{libs}} {{inputs}}" + command = "$link /nologo /dll {{ldflags}} /out:$dllfile /pdb:$pdbfile {{inputs}} {{libs}} " description = "LINK_MODULE $dllfile" outputs = [ dllfile ] lib_switch = "" @@ -286,7 +286,7 @@ toolchain("win") { outprefix = "{{output_dir}}/{{target_output_name}}" outfile = "$outprefix{{output_extension}}" pdbfile = "$outprefix.pdb" - command = "$link /nologo {{ldflags}} /out:$outfile /pdb:$pdbfile {{libs}} {{inputs}}" + command = "$link /nologo {{ldflags}} /out:$outfile /pdb:$pdbfile {{inputs}} {{libs}}" description = "LINK $outfile" outputs = [ outfile ] lib_switch = "" diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn index a145b523b8f415..0653dd6a4a6e6b 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn @@ -28,6 +28,7 @@ static_library("modernize") { "RawStringLiteralCheck.cpp", "RedundantVoidArgCheck.cpp", "ReplaceAutoPtrCheck.cpp", + "ReplaceDisallowCopyAndAssignMacroCheck.cpp", "ReplaceRandomShuffleCheck.cpp", "ReturnBracedInitListCheck.cpp", "ShrinkToFitCheck.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn index 30530ae57ad8b7..f3f1019ba46094 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn @@ -69,6 +69,7 @@ static_library("LLVMWebAssemblyCodeGen") { "WebAssemblyExceptionInfo.cpp", "WebAssemblyExplicitLocals.cpp", "WebAssemblyFastISel.cpp", + "WebAssemblyFixBrTableDefaults.cpp", "WebAssemblyFixFunctionBitcasts.cpp", "WebAssemblyFixIrreducibleControlFlow.cpp", "WebAssemblyFrameLowering.cpp", diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td index 460f5becc1f9e1..e38b8ba55b5df1 100644 --- a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td +++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td @@ -64,8 +64,7 @@ def Shape_AddOp : Shape_Op<"add", [SameOperandsAndResultType]> { let results = (outs Shape_SizeType:$result); } -def Shape_BroadcastOp : Shape_Op<"broadcast", - [DeclareOpInterfaceMethods]> { +def Shape_BroadcastOp : Shape_Op<"broadcast", []> { let summary = "Returns the broadcasted output shape of two inputs"; let description = [{ Computes the broadcasted output shape following: @@ -92,7 +91,7 @@ def Shape_BroadcastOp : Shape_Op<"broadcast", } def Shape_ConstShapeOp : Shape_Op<"const_shape", [ConstantLike, NoSideEffect]> { - let summary = "Creates a constant of !shape.shape type."; + let summary = "Creates a constant of !shape.shape type"; let description = [{ Creates a !shape.shape with rank given by the length of `shape` and with dimension sizes given by the values of `shape`. @@ -111,10 +110,7 @@ def Shape_ConstShapeOp : Shape_Op<"const_shape", [ConstantLike, NoSideEffect]> { let hasFolder = 1; } -def Shape_ConstSizeOp : Shape_Op<"const_size", - [ConstantLike, - NoSideEffect, - DeclareOpInterfaceMethods]> { +def Shape_ConstSizeOp : Shape_Op<"const_size", [ConstantLike, NoSideEffect]> { let summary = "Creates a constant of type `shape.size`"; let description = [{ Creates a `shape.size` type representing the constant size given by `value`. @@ -131,10 +127,7 @@ def Shape_ConstSizeOp : Shape_Op<"const_size", let hasFolder = 1; } -def Shape_FromExtentsOp : Shape_Op<"from_extents", [ - NoSideEffect, - DeclareOpInterfaceMethods - ]> { +def Shape_FromExtentsOp : Shape_Op<"from_extents", [NoSideEffect]> { let summary = "Creates a shape from extents"; let description = [{ Creates a shape from multiple SSA values representing the extents of @@ -188,8 +181,7 @@ def Shape_ToExtentTensorOp : Shape_Op<"to_extent_tensor", []> { let hasFolder = 1; } -def Shape_GetExtentOp : Shape_Op<"get_extent", - [NoSideEffect, DeclareOpInterfaceMethods]> { +def Shape_GetExtentOp : Shape_Op<"get_extent", [NoSideEffect]> { let summary = "Gets the specified extent from a shape"; let description = [{ Gets the extent indexed by `dim` from `shape`. @@ -214,14 +206,13 @@ def Shape_GetExtentOp : Shape_Op<"get_extent", let hasFolder = 1; } -def Shape_IndexToSizeOp : Shape_Op<"index_to_size", [ - NoSideEffect, - DeclareOpInterfaceMethods]> { +def Shape_IndexToSizeOp : Shape_Op<"index_to_size", [NoSideEffect]> { let summary = "Converts a standard index to a shape size"; let description = [{ - Converts a standard index to a `shape.size`. - This operation and its inverse, `size_to_index`, facilitate index conversion - between the standard and the shape dialect. + Converts a standard index to a `shape.size`. This operation and its + inverse, `size_to_index`, facilitate index conversion between the standard + and the shape dialect. + The behavior is undefined for negative indices. }]; @@ -278,10 +269,7 @@ def Shape_MulOp : Shape_Op<"mul", [SameOperandsAndResultType]> { let results = (outs Shape_SizeType:$result); } -def Shape_NumElementsOp : Shape_Op<"num_elements", [ - NoSideEffect, - DeclareOpInterfaceMethods]> { - +def Shape_NumElementsOp : Shape_Op<"num_elements", [NoSideEffect]> { let summary = "Returns the number of elements for a given shape"; let description = [{ Returns the number of elements for a given shape which is the product of its @@ -337,8 +325,7 @@ def Shape_ReduceOp : Shape_Op<"reduce", []> { let regions = (region SizedRegion<1>:$body); } -def Shape_ShapeOfOp : Shape_Op<"shape_of", - [NoSideEffect, DeclareOpInterfaceMethods]> { +def Shape_ShapeOfOp : Shape_Op<"shape_of", [NoSideEffect]> { let summary = "Returns shape of a value or shaped type operand"; let arguments = (ins AnyTypeOf<[AnyShaped, Shape_ValueShapeType]>:$arg); @@ -349,9 +336,7 @@ def Shape_ShapeOfOp : Shape_Op<"shape_of", let hasFolder = 1; } -def Shape_SizeToIndexOp : Shape_Op<"size_to_index", [ - NoSideEffect, - DeclareOpInterfaceMethods]> { +def Shape_SizeToIndexOp : Shape_Op<"size_to_index", [NoSideEffect]> { let summary = "Casts between index types of the shape and standard dialect"; let description = [{ Converts a `shape.size` to a standard index. @@ -395,9 +380,8 @@ def Shape_DebugPrintOp : Shape_Op<"debug_print", []> { let results = (outs Shape_ShapeOrSizeType:$output); } -def Shape_SplitAtOp : Shape_Op<"split_at", - [DeclareOpInterfaceMethods]> { - let summary = "Splits a shape at a given index."; +def Shape_SplitAtOp : Shape_Op<"split_at", []> { + let summary = "Splits a shape at a given index"; let description = [{ Splits a shape at a given dimension `index`, returning two shapes. If `index` is negative, it is treated as indexing from the back of the @@ -425,9 +409,8 @@ def Shape_SplitAtOp : Shape_Op<"split_at", let hasFolder = 1; } -def Shape_ConcatOp : Shape_Op<"concat", - [DeclareOpInterfaceMethods]> { - let summary = "Concatenates two shapes."; +def Shape_ConcatOp : Shape_Op<"concat", []> { + let summary = "Concatenates two shapes"; let description = [{ Creates a shape whose dimensions consist of first the dimensions from `lhs` followed by the dimensions of `rhs`. @@ -449,9 +432,8 @@ def Shape_ConcatOp : Shape_Op<"concat", //===----------------------------------------------------------------------===// //TODO(tpopp): Move the code below and witnesses to a different file. -def Shape_AnyOp : Shape_Op<"any", - [NoSideEffect, DeclareOpInterfaceMethods]> { - let summary = "Return any combination of the input shapes."; +def Shape_AnyOp : Shape_Op<"any", [NoSideEffect]> { + let summary = "Return any combination of the input shapes"; let description = [{ This operation takes multiple input shapes and returns some combination of their dimensions. This can be best seen with examples below. @@ -473,7 +455,7 @@ def Shape_AnyOp : Shape_Op<"any", } def Shape_AssumingAllOp : Shape_Op<"assuming_all", [NoSideEffect]> { - let summary = "Return a logical AND of all witnesses."; + let summary = "Return a logical AND of all witnesses"; let description = [{ Used to simplify constraints as any single failing precondition is enough to prevent execution. @@ -502,7 +484,7 @@ def Shape_AssumingAllOp : Shape_Op<"assuming_all", [NoSideEffect]> { def Shape_AssumingOp : Shape_Op<"assuming", [SingleBlockImplicitTerminator<"AssumingYieldOp">, RecursiveSideEffects]> { - let summary = "Execute the region."; + let summary = "Execute the region"; let description = [{ Executes the region assuming all witnesses are true. @@ -540,7 +522,7 @@ def Shape_AssumingYieldOp : Shape_Op<"assuming_yield", } def Shape_CstrBroadcastableOp : Shape_Op<"cstr_broadcastable", []> { - let summary = "Determines if 2 shapes can be successfully broadcasted."; + let summary = "Determines if 2 shapes can be successfully broadcasted"; let description = [{ Given 2 input shapes, return a witness specifying if they are broadcastable. This broadcastable follows the same logic as what shape.broadcast documents. @@ -561,7 +543,7 @@ def Shape_CstrBroadcastableOp : Shape_Op<"cstr_broadcastable", []> { } def Shape_CstrEqOp : Shape_Op<"cstr_eq", []> { - let summary = "Determines if all input shapes are equal."; + let summary = "Determines if all input shapes are equal"; let description = [{ Given 1 or more input shapes, determine if all shapes are the exact same. diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp index b0103e15fa35ad..5f7301f29dab40 100644 --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -98,15 +98,6 @@ void ShapeDialect::printType(Type type, DialectAsmPrinter &os) const { // AnyOp //===----------------------------------------------------------------------===// -LogicalResult -AnyOp::inferReturnTypes(MLIRContext *context, Optional location, - ValueRange operands, DictionaryAttr attributes, - RegionRange regions, - SmallVectorImpl &inferredReturnTypes) { - inferredReturnTypes.push_back(ShapeType::get(context)); - return success(); -} - //===----------------------------------------------------------------------===// // AssumingOp //===----------------------------------------------------------------------===// @@ -155,15 +146,6 @@ static void print(OpAsmPrinter &p, AssumingOp op) { // BroadcastOp //===----------------------------------------------------------------------===// -LogicalResult -BroadcastOp::inferReturnTypes(MLIRContext *context, Optional location, - ValueRange operands, DictionaryAttr attributes, - RegionRange regions, - SmallVectorImpl &inferredReturnTypes) { - inferredReturnTypes.push_back(ShapeType::get(context)); - return success(); -} - OpFoldResult BroadcastOp::fold(ArrayRef operands) { if (!operands[0] || !operands[1]) return nullptr; @@ -184,16 +166,6 @@ OpFoldResult BroadcastOp::fold(ArrayRef operands) { // ConcatOp //===----------------------------------------------------------------------===// -LogicalResult -ConcatOp::inferReturnTypes(MLIRContext *context, Optional location, - ValueRange operands, DictionaryAttr attributes, - RegionRange regions, - SmallVectorImpl &inferredReturnTypes) { - auto shapeType = ShapeType::get(context); - inferredReturnTypes.push_back(shapeType); - return success(); -} - OpFoldResult ConcatOp::fold(ArrayRef operands) { if (!operands[0] || !operands[1]) return nullptr; @@ -255,15 +227,6 @@ OpFoldResult ConstShapeOp::fold(ArrayRef) { return shapeAttr(); } // ConstSizeOp //===----------------------------------------------------------------------===// -LogicalResult -ConstSizeOp::inferReturnTypes(MLIRContext *context, Optional location, - ValueRange operands, DictionaryAttr attributes, - RegionRange regions, - SmallVectorImpl &inferredReturnTypes) { - inferredReturnTypes.push_back(SizeType::get(context)); - return success(); -} - OpFoldResult ConstSizeOp::fold(ArrayRef) { return valueAttr(); } //===----------------------------------------------------------------------===// @@ -278,26 +241,10 @@ OpFoldResult IndexToSizeOp::fold(ArrayRef operands) { return {}; } -LogicalResult IndexToSizeOp::inferReturnTypes( - MLIRContext *context, Optional location, ValueRange operands, - DictionaryAttr attributes, RegionRange regions, - SmallVectorImpl &inferredReturnTypes) { - inferredReturnTypes.push_back(SizeType::get(context)); - return success(); -} - //===----------------------------------------------------------------------===// // FromExtentsOp //===----------------------------------------------------------------------===// -LogicalResult FromExtentsOp::inferReturnTypes( - MLIRContext *context, Optional location, ValueRange operands, - DictionaryAttr attributes, RegionRange regions, - SmallVectorImpl &inferredReturnTypes) { - inferredReturnTypes.push_back(ShapeType::get(context)); - return success(); -} - OpFoldResult FromExtentsOp::fold(ArrayRef operands) { if (llvm::any_of(operands, [](Attribute a) { return !a; })) return nullptr; @@ -312,15 +259,6 @@ OpFoldResult FromExtentsOp::fold(ArrayRef operands) { // GetExtentOp //===----------------------------------------------------------------------===// -LogicalResult -GetExtentOp::inferReturnTypes(MLIRContext *context, Optional location, - ValueRange operands, DictionaryAttr attributes, - RegionRange regions, - SmallVectorImpl &inferredReturnTypes) { - inferredReturnTypes.push_back(SizeType::get(context)); - return success(); -} - OpFoldResult GetExtentOp::fold(ArrayRef operands) { auto elements = operands[0].dyn_cast_or_null(); if (!elements) @@ -350,27 +288,10 @@ OpFoldResult NumElementsOp::fold(ArrayRef operands) { return builder.getIndexAttr(product.getLimitedValue()); } -LogicalResult NumElementsOp::inferReturnTypes( - MLIRContext *context, Optional location, ValueRange operands, - DictionaryAttr attributes, RegionRange regions, - SmallVectorImpl &inferredReturnTypes) { - inferredReturnTypes.push_back(SizeType::get(context)); - return success(); -} - //===----------------------------------------------------------------------===// // ShapeOfOp //===----------------------------------------------------------------------===// -LogicalResult -ShapeOfOp::inferReturnTypes(MLIRContext *context, Optional location, - ValueRange operands, DictionaryAttr attributes, - RegionRange regions, - SmallVectorImpl &inferredReturnTypes) { - inferredReturnTypes.push_back(ShapeType::get(context)); - return success(); -} - OpFoldResult ShapeOfOp::fold(ArrayRef) { auto type = getOperand().getType().dyn_cast(); if (!type || !type.hasStaticShape()) @@ -391,29 +312,10 @@ OpFoldResult SizeToIndexOp::fold(ArrayRef operands) { return {}; } -LogicalResult SizeToIndexOp::inferReturnTypes( - MLIRContext *context, Optional location, ValueRange operands, - DictionaryAttr attributes, RegionRange regions, - SmallVectorImpl &inferredReturnTypes) { - inferredReturnTypes.push_back(IndexType::get(context)); - return success(); -} - //===----------------------------------------------------------------------===// // SplitAtOp //===----------------------------------------------------------------------===// -LogicalResult -SplitAtOp::inferReturnTypes(MLIRContext *context, Optional location, - ValueRange operands, DictionaryAttr attributes, - RegionRange regions, - SmallVectorImpl &inferredReturnTypes) { - auto shapeType = ShapeType::get(context); - inferredReturnTypes.push_back(shapeType); - inferredReturnTypes.push_back(shapeType); - return success(); -} - LogicalResult SplitAtOp::fold(ArrayRef operands, SmallVectorImpl &results) { if (!operands[0] || !operands[1])