diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp index 427b5f0272b94a..8bd4df7cd84450 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp @@ -70,11 +70,25 @@ void simple_not_infinite1() { i++; } + while ((Limit)--) { + // Not an error since 'Limit' is updated. + i++; + } + + while ((Limit) -= 1) { + // Not an error since 'Limit' is updated. + } + while (int k = Limit) { // Not an error since 'Limit' is updated. Limit--; } + while (int k = Limit) { + // Not an error since 'Limit' is updated + (Limit)--; + } + while (int k = Limit--) { // Not an error since 'Limit' is updated. i++; @@ -86,6 +100,15 @@ void simple_not_infinite1() { for (i = 0; i < Limit; Limit--) { } + + for (i = 0; i < Limit; (Limit) = Limit - 1) { + } + + for (i = 0; i < Limit; (Limit) -= 1) { + } + + for (i = 0; i < Limit; --(Limit)) { + } } void simple_not_infinite2() { diff --git a/clang/include/clang/Basic/X86Target.def b/clang/include/clang/Basic/X86Target.def index ba4e5981e7dcca..70f3879f33a140 100644 --- a/clang/include/clang/Basic/X86Target.def +++ b/clang/include/clang/Basic/X86Target.def @@ -11,19 +11,6 @@ // //===----------------------------------------------------------------------===// -#ifndef PROC_WITH_FEAT -#define PROC_WITH_FEAT(ENUM, STRING, IS64BIT, KEYFEATURE) \ - PROC(ENUM, STRING, IS64BIT) -#endif - -#ifndef PROC -#define PROC(ENUM, STRING, IS64BIT) -#endif - -#ifndef PROC_ALIAS -#define PROC_ALIAS(ENUM, ALIAS) -#endif - #ifndef FEATURE #define FEATURE(ENUM) #endif @@ -36,230 +23,6 @@ #define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) #endif -#define PROC_64_BIT true -#define PROC_32_BIT false - -/// \name i386 -/// i386-generation processors. -//@{ -PROC(i386, "i386", PROC_32_BIT) -//@} - -/// \name i486 -/// i486-generation processors. -//@{ -PROC(i486, "i486", PROC_32_BIT) -PROC(WinChipC6, "winchip-c6", PROC_32_BIT) -PROC(WinChip2, "winchip2", PROC_32_BIT) -PROC(C3, "c3", PROC_32_BIT) -//@} - -/// \name i586 -/// i586-generation processors, P5 microarchitecture based. -//@{ -PROC(i586, "i586", PROC_32_BIT) -PROC(Pentium, "pentium", PROC_32_BIT) -PROC(PentiumMMX, "pentium-mmx", PROC_32_BIT) -//@} - -/// \name i686 -/// i686-generation processors, P6 / Pentium M microarchitecture based. -//@{ -PROC(PentiumPro, "pentiumpro", PROC_32_BIT) -PROC(i686, "i686", PROC_32_BIT) -PROC(Pentium2, "pentium2", PROC_32_BIT) -PROC(Pentium3, "pentium3", PROC_32_BIT) -PROC_ALIAS(Pentium3, "pentium3m") -PROC(PentiumM, "pentium-m", PROC_32_BIT) -PROC(C3_2, "c3-2", PROC_32_BIT) - -/// This enumerator is a bit odd, as GCC no longer accepts -march=yonah. -/// Clang however has some logic to support this. -// FIXME: Warn, deprecate, and potentially remove this. -PROC(Yonah, "yonah", PROC_32_BIT) -//@} - -/// \name Netburst -/// Netburst microarchitecture based processors. -//@{ -PROC(Pentium4, "pentium4", PROC_32_BIT) -PROC_ALIAS(Pentium4, "pentium4m") - -PROC(Prescott, "prescott", PROC_32_BIT) -PROC(Nocona, "nocona", PROC_64_BIT) -//@} - -/// \name Core -/// Core microarchitecture based processors. -//@{ -PROC_WITH_FEAT(Core2, "core2", PROC_64_BIT, FEATURE_SSSE3) - -/// This enumerator, like Yonah, is a bit odd. It is another -/// codename which GCC no longer accepts as an option to -march, but Clang -/// has some logic for recognizing it. -// FIXME: Warn, deprecate, and potentially remove this. -PROC(Penryn, "penryn", PROC_64_BIT) -//@} - -/// \name Atom -/// Atom processors -//@{ -PROC_WITH_FEAT(Bonnell, "bonnell", PROC_64_BIT, FEATURE_SSSE3) -PROC_ALIAS(Bonnell, "atom") - -PROC_WITH_FEAT(Silvermont, "silvermont", PROC_64_BIT, FEATURE_SSE4_2) -PROC_ALIAS(Silvermont, "slm") - -PROC(Goldmont, "goldmont", PROC_64_BIT) -PROC(GoldmontPlus, "goldmont-plus", PROC_64_BIT) - -PROC(Tremont, "tremont", PROC_64_BIT) -//@} - -/// \name Nehalem -/// Nehalem microarchitecture based processors. -PROC_WITH_FEAT(Nehalem, "nehalem", PROC_64_BIT, FEATURE_SSE4_2) -PROC_ALIAS(Nehalem, "corei7") - -/// \name Westmere -/// Westmere microarchitecture based processors. -PROC_WITH_FEAT(Westmere, "westmere", PROC_64_BIT, FEATURE_PCLMUL) - -/// \name Sandy Bridge -/// Sandy Bridge microarchitecture based processors. -PROC_WITH_FEAT(SandyBridge, "sandybridge", PROC_64_BIT, FEATURE_AVX) -PROC_ALIAS(SandyBridge, "corei7-avx") - -/// \name Ivy Bridge -/// Ivy Bridge microarchitecture based processors. -PROC_WITH_FEAT(IvyBridge, "ivybridge", PROC_64_BIT, FEATURE_AVX) -PROC_ALIAS(IvyBridge, "core-avx-i") - -/// \name Haswell -/// Haswell microarchitecture based processors. -PROC_WITH_FEAT(Haswell, "haswell", PROC_64_BIT, FEATURE_AVX2) -PROC_ALIAS(Haswell, "core-avx2") - -/// \name Broadwell -/// Broadwell microarchitecture based processors. -PROC_WITH_FEAT(Broadwell, "broadwell", PROC_64_BIT, FEATURE_AVX2) - -/// \name Skylake Client -/// Skylake client microarchitecture based processors. -PROC_WITH_FEAT(SkylakeClient, "skylake", PROC_64_BIT, FEATURE_AVX2) - -/// \name Skylake Server -/// Skylake server microarchitecture based processors. -PROC_WITH_FEAT(SkylakeServer, "skylake-avx512", PROC_64_BIT, FEATURE_AVX512F) -PROC_ALIAS(SkylakeServer, "skx") - -/// \name Cascadelake Server -/// Cascadelake Server microarchitecture based processors. -PROC_WITH_FEAT(Cascadelake, "cascadelake", PROC_64_BIT, FEATURE_AVX512VNNI) - -/// \name Cooperlake Server -/// Cooperlake Server microarchitecture based processors. -PROC_WITH_FEAT(Cooperlake, "cooperlake", PROC_64_BIT, FEATURE_AVX512BF16) - -/// \name Cannonlake Client -/// Cannonlake client microarchitecture based processors. -PROC_WITH_FEAT(Cannonlake, "cannonlake", PROC_64_BIT, FEATURE_AVX512VBMI) - -/// \name Icelake Client -/// Icelake client microarchitecture based processors. -PROC(IcelakeClient, "icelake-client", PROC_64_BIT) - -/// \name Icelake Server -/// Icelake server microarchitecture based processors. -PROC(IcelakeServer, "icelake-server", PROC_64_BIT) - -/// \name Tigerlake -/// Tigerlake microarchitecture based processors. -PROC(Tigerlake, "tigerlake", PROC_64_BIT) - -/// \name Knights Landing -/// Knights Landing processor. -PROC_WITH_FEAT(KNL, "knl", PROC_64_BIT, FEATURE_AVX512F) - -/// \name Knights Mill -/// Knights Mill processor. -PROC_WITH_FEAT(KNM, "knm", PROC_64_BIT, FEATURE_AVX5124FMAPS) - -/// \name Lakemont -/// Lakemont microarchitecture based processors. -PROC(Lakemont, "lakemont", PROC_32_BIT) - -/// \name K6 -/// K6 architecture processors. -//@{ -PROC(K6, "k6", PROC_32_BIT) -PROC(K6_2, "k6-2", PROC_32_BIT) -PROC(K6_3, "k6-3", PROC_32_BIT) -//@} - -/// \name K7 -/// K7 architecture processors. -//@{ -PROC(Athlon, "athlon", PROC_32_BIT) -PROC_ALIAS(Athlon, "athlon-tbird") - -PROC(AthlonXP, "athlon-xp", PROC_32_BIT) -PROC_ALIAS(AthlonXP, "athlon-mp") -PROC_ALIAS(AthlonXP, "athlon-4") -//@} - -/// \name K8 -/// K8 architecture processors. -//@{ -PROC(K8, "k8", PROC_64_BIT) -PROC_ALIAS(K8, "athlon64") -PROC_ALIAS(K8, "athlon-fx") -PROC_ALIAS(K8, "opteron") - -PROC(K8SSE3, "k8-sse3", PROC_64_BIT) -PROC_ALIAS(K8SSE3, "athlon64-sse3") -PROC_ALIAS(K8SSE3, "opteron-sse3") - -PROC_WITH_FEAT(AMDFAM10, "amdfam10", PROC_64_BIT, FEATURE_SSE4_A) -PROC_ALIAS(AMDFAM10, "barcelona") -//@} - -/// \name Bobcat -/// Bobcat architecture processors. -//@{ -PROC_WITH_FEAT(BTVER1, "btver1", PROC_64_BIT, FEATURE_SSE4_A) -PROC_WITH_FEAT(BTVER2, "btver2", PROC_64_BIT, FEATURE_BMI) -//@} - -/// \name Bulldozer -/// Bulldozer architecture processors. -//@{ -PROC_WITH_FEAT(BDVER1, "bdver1", PROC_64_BIT, FEATURE_XOP) -PROC_WITH_FEAT(BDVER2, "bdver2", PROC_64_BIT, FEATURE_FMA) -PROC_WITH_FEAT(BDVER3, "bdver3", PROC_64_BIT, FEATURE_FMA) -PROC_WITH_FEAT(BDVER4, "bdver4", PROC_64_BIT, FEATURE_AVX2) -//@} - -/// \name zen -/// Zen architecture processors. -//@{ -PROC_WITH_FEAT(ZNVER1, "znver1", PROC_64_BIT, FEATURE_AVX2) -PROC_WITH_FEAT(ZNVER2, "znver2", PROC_64_BIT, FEATURE_AVX2) -//@} - -/// This specification is deprecated and will be removed in the future. -/// Users should prefer K8. -// FIXME: Warn on this when the CPU is set to it. -//@{ -PROC(x86_64, "x86-64", PROC_64_BIT) -//@} - -/// \name Geode -/// Geode processors. -//@{ -PROC(Geode, "geode", PROC_32_BIT) -//@} - // List of CPU Supports features in order. These need to remain in the order // required by attribute 'target' checking. Note that not all are supported/ // prioritized by GCC, so synchronization with GCC's implementation may require @@ -345,6 +108,3 @@ CPU_SPECIFIC("knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+mo #undef PROC_64_BIT #undef PROC_32_BIT #undef FEATURE -#undef PROC -#undef PROC_ALIAS -#undef PROC_WITH_FEAT diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index b6b161e482ac15..1ae219781c696c 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -2333,7 +2333,7 @@ class Parser : public CodeCompletionHandler { AccessSpecifier AS, DeclSpecContext DSC); void ParseEnumBody(SourceLocation StartLoc, Decl *TagDecl); void ParseStructUnionBody(SourceLocation StartLoc, DeclSpec::TST TagType, - Decl *TagDecl); + RecordDecl *TagDecl); void ParseStructDeclaration( ParsingDeclSpec &DS, diff --git a/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/clang/lib/Analysis/ExprMutationAnalyzer.cpp index cb5cabfd3089c2..2f80285f17b4da 100644 --- a/clang/lib/Analysis/ExprMutationAnalyzer.cpp +++ b/clang/lib/Analysis/ExprMutationAnalyzer.cpp @@ -201,14 +201,15 @@ const Stmt *ExprMutationAnalyzer::findDeclPointeeMutation( const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { // LHS of any assignment operators. - const auto AsAssignmentLhs = - binaryOperator(isAssignmentOperator(), - hasLHS(maybeEvalCommaExpr(equalsNode(Exp)))); + const auto AsAssignmentLhs = binaryOperator( + isAssignmentOperator(), + hasLHS(maybeEvalCommaExpr(ignoringParenImpCasts(equalsNode(Exp))))); // Operand of increment/decrement operators. const auto AsIncDecOperand = unaryOperator(anyOf(hasOperatorName("++"), hasOperatorName("--")), - hasUnaryOperand(maybeEvalCommaExpr(equalsNode(Exp)))); + hasUnaryOperand(maybeEvalCommaExpr( + ignoringParenImpCasts(equalsNode(Exp))))); // Invoking non-const member function. // A member function is assumed to be non-const when it is unresolved. diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index b87490a6a85898..05c6ec22af3a89 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -109,7 +109,8 @@ bool X86TargetInfo::initFeatureMap( if (getTriple().getArch() == llvm::Triple::x86_64) setFeatureEnabledImpl(Features, "sse2", true); - const CPUKind Kind = getCPUKind(CPU); + using namespace llvm::X86; + const enum CPUKind Kind = parseArchX86(CPU); // Enable X87 for all X86 processors but Lakemont. if (Kind != CK_Lakemont) @@ -117,11 +118,11 @@ bool X86TargetInfo::initFeatureMap( // Enable cmpxchg8 for i586 and greater CPUs. Include generic for backwards // compatibility. - if (Kind >= CK_i586 || Kind == CK_Generic) + if (Kind >= CK_i586 || Kind == CK_None) setFeatureEnabledImpl(Features, "cx8", true); switch (Kind) { - case CK_Generic: + case CK_None: case CK_i386: case CK_i486: case CK_i586: @@ -936,8 +937,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, // Subtarget options. // FIXME: We are hard-coding the tune parameters based on the CPU, but they // truly should be based on -mtune options. + using namespace llvm::X86; switch (CPU) { - case CK_Generic: + case CK_None: break; case CK_i386: // The rest are coming from the i386 define above. @@ -1324,7 +1326,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, break; } - if (CPU >= CK_i486 || CPU == CK_Generic) { + if (CPU >= CK_i486 || CPU == CK_None) { Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); @@ -1548,8 +1550,9 @@ static unsigned getFeaturePriority(llvm::X86::ProcessorFeatures Feat) { unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const { // Valid CPUs have a 'key feature' that compares just better than its key // feature. - CPUKind Kind = getCPUKind(Name); - if (Kind != CK_Generic) { + using namespace llvm::X86; + CPUKind Kind = parseArchX86(Name); + if (Kind != CK_None) { switch (Kind) { default: llvm_unreachable( @@ -1557,7 +1560,7 @@ unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const { #define PROC_WITH_FEAT(ENUM, STR, IS64, KEY_FEAT) \ case CK_##ENUM: \ return (getFeaturePriority(llvm::X86::KEY_FEAT) << 1) + 1; -#include "clang/Basic/X86Target.def" +#include "llvm/Support/X86TargetParser.def" } } @@ -1761,6 +1764,7 @@ bool X86TargetInfo::validateAsmConstraint( // | Knights Mill | 64 | https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf?countrylabel=Colombia "2.5.5.2 L1 DCache " | // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ Optional X86TargetInfo::getCPUCacheLineSize() const { + using namespace llvm::X86; switch (CPU) { // i386 case CK_i386: @@ -1846,7 +1850,7 @@ Optional X86TargetInfo::getCPUCacheLineSize() const { // The following currently have unknown cache line sizes (but they are probably all 64): // Core - case CK_Generic: + case CK_None: return None; } llvm_unreachable("Unknown CPU kind"); @@ -1977,38 +1981,9 @@ std::string X86TargetInfo::convertConstraint(const char *&Constraint) const { } } -bool X86TargetInfo::checkCPUKind(CPUKind Kind) const { - // Perform any per-CPU checks necessary to determine if this CPU is - // acceptable. - switch (Kind) { - case CK_Generic: - // No processor selected! - return false; -#define PROC(ENUM, STRING, IS64BIT) \ - case CK_##ENUM: \ - return IS64BIT || getTriple().getArch() == llvm::Triple::x86; -#include "clang/Basic/X86Target.def" - } - llvm_unreachable("Unhandled CPU kind"); -} - void X86TargetInfo::fillValidCPUList(SmallVectorImpl &Values) const { -#define PROC(ENUM, STRING, IS64BIT) \ - if (IS64BIT || getTriple().getArch() == llvm::Triple::x86) \ - Values.emplace_back(STRING); - // For aliases we need to lookup the CPUKind to check get the 64-bit ness. -#define PROC_ALIAS(ENUM, ALIAS) \ - if (checkCPUKind(CK_##ENUM)) \ - Values.emplace_back(ALIAS); -#include "clang/Basic/X86Target.def" -} - -X86TargetInfo::CPUKind X86TargetInfo::getCPUKind(StringRef CPU) const { - return llvm::StringSwitch(CPU) -#define PROC(ENUM, STRING, IS64BIT) .Case(STRING, CK_##ENUM) -#define PROC_ALIAS(ENUM, ALIAS) .Case(ALIAS, CK_##ENUM) -#include "clang/Basic/X86Target.def" - .Default(CK_Generic); + bool Only64Bit = getTriple().getArch() != llvm::Triple::x86; + llvm::X86::fillValidCPUArchList(Values, Only64Bit); } ArrayRef X86TargetInfo::getGCCRegNames() const { diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 39ccac96a49d8a..c33c608e27c843 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -18,6 +18,7 @@ #include "clang/Basic/TargetOptions.h" #include "llvm/ADT/Triple.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/X86TargetParser.h" namespace clang { namespace targets { @@ -128,19 +129,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasTSXLDTRK = false; protected: - /// Enumeration of all of the X86 CPUs supported by Clang. - /// - /// Each enumeration represents a particular CPU supported by Clang. These - /// loosely correspond to the options passed to '-march' or '-mtune' flags. - enum CPUKind { - CK_Generic, -#define PROC(ENUM, STRING, IS64BIT) CK_##ENUM, -#include "clang/Basic/X86Target.def" - } CPU = CK_Generic; - - bool checkCPUKind(CPUKind Kind) const; - - CPUKind getCPUKind(StringRef CPU) const; + llvm::X86::CPUKind CPU = llvm::X86::CK_None; enum FPMathKind { FP_Default, FP_SSE, FP_387 } FPMath = FP_Default; @@ -313,13 +302,16 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { } bool isValidCPUName(StringRef Name) const override { - return checkCPUKind(getCPUKind(Name)); + bool Only64Bit = getTriple().getArch() != llvm::Triple::x86; + return llvm::X86::parseArchX86(Name, Only64Bit) != llvm::X86::CK_None; } void fillValidCPUList(SmallVectorImpl &Values) const override; bool setCPU(const std::string &Name) override { - return checkCPUKind(CPU = getCPUKind(Name)); + bool Only64Bit = getTriple().getArch() != llvm::Triple::x86; + CPU = llvm::X86::parseArchX86(Name, Only64Bit); + return CPU != llvm::X86::CK_None; } unsigned multiVersionSortPriority(StringRef Name) const override; diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 1737154d179a68..65d513c8cf0581 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -2149,6 +2149,9 @@ llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D, void CGDebugInfo::addHeapAllocSiteMetadata(llvm::CallBase *CI, QualType AllocatedTy, SourceLocation Loc) { + if (CGM.getCodeGenOpts().getDebugInfo() <= + codegenoptions::DebugLineTablesOnly) + return; llvm::MDNode *node; if (AllocatedTy->isVoidType()) node = llvm::MDNode::get(CGM.getLLVMContext(), None); diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index d018443858bd5b..d59aa6ce0fb94f 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -1639,8 +1639,7 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { EmitNewDeleteCall(*this, allocator, allocatorType, allocatorArgs); // Set !heapallocsite metadata on the call to operator new. - if (CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo && - getDebugInfo()) + if (getDebugInfo()) if (auto *newCall = dyn_cast(RV.getScalarVal())) getDebugInfo()->addHeapAllocSiteMetadata(newCall, allocType, E->getExprLoc()); diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 7e761978455731..79a3b19bac5766 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -4249,7 +4249,7 @@ void Parser::ParseStructDeclaration( /// [OBC] '@' 'defs' '(' class-name ')' /// void Parser::ParseStructUnionBody(SourceLocation RecordLoc, - DeclSpec::TST TagType, Decl *TagDecl) { + DeclSpec::TST TagType, RecordDecl *TagDecl) { PrettyDeclStackTraceEntry CrashInfo(Actions.Context, TagDecl, RecordLoc, "parsing struct/union body"); assert(!getLangOpts().CPlusPlus && "C++ declarations not supported"); @@ -4261,8 +4261,6 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc, ParseScope StructScope(this, Scope::ClassScope|Scope::DeclScope); Actions.ActOnTagStartDefinition(getCurScope(), TagDecl); - SmallVector FieldDecls; - // While we still have something to read, read the declarations in the struct. while (!tryParseMisplacedModuleImport() && Tok.isNot(tok::r_brace) && Tok.isNot(tok::eof)) { @@ -4314,7 +4312,6 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc, Actions.ActOnField(getCurScope(), TagDecl, FD.D.getDeclSpec().getSourceRange().getBegin(), FD.D, FD.BitfieldSize); - FieldDecls.push_back(Field); FD.complete(Field); }; @@ -4338,7 +4335,6 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc, SmallVector Fields; Actions.ActOnDefs(getCurScope(), TagDecl, Tok.getLocation(), Tok.getIdentifierInfo(), Fields); - FieldDecls.insert(FieldDecls.end(), Fields.begin(), Fields.end()); ConsumeToken(); ExpectAndConsume(tok::r_paren); } @@ -4364,6 +4360,9 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc, // If attributes exist after struct contents, parse them. MaybeParseGNUAttributes(attrs); + SmallVector FieldDecls(TagDecl->field_begin(), + TagDecl->field_end()); + Actions.ActOnFields(getCurScope(), RecordLoc, TagDecl, FieldDecls, T.getOpenLocation(), T.getCloseLocation(), attrs); StructScope.Exit(); diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 1a82475117baac..8753c929287512 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -1964,7 +1964,7 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, Decl *D = SkipBody.CheckSameAsPrevious ? SkipBody.New : TagOrTempResult.get(); // Parse the definition body. - ParseStructUnionBody(StartLoc, TagType, D); + ParseStructUnionBody(StartLoc, TagType, cast(D)); if (SkipBody.CheckSameAsPrevious && !Actions.ActOnDuplicateDefinition(DS, TagOrTempResult.get(), SkipBody)) { diff --git a/clang/test/CodeGenObjC/externally-retained.m b/clang/test/CodeGenObjC/externally-retained.m index f68696879768fc..b842b8c4c68ce8 100644 --- a/clang/test/CodeGenObjC/externally-retained.m +++ b/clang/test/CodeGenObjC/externally-retained.m @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fobjc-arc -fblocks -Wno-objc-root-class -O0 %s -S -emit-llvm -o - | FileCheck %s --dump-input-on-failure -// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fobjc-arc -fblocks -Wno-objc-root-class -O0 -xobjective-c++ -std=c++11 %s -S -emit-llvm -o - | FileCheck %s --check-prefix CHECKXX --dump-input-on-failure +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fobjc-arc -fblocks -Wno-objc-root-class -O0 %s -S -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fobjc-arc -fblocks -Wno-objc-root-class -O0 -xobjective-c++ -std=c++11 %s -S -emit-llvm -o - | FileCheck %s --check-prefix CHECKXX #define EXT_RET __attribute__((objc_externally_retained)) diff --git a/clang/test/Driver/rocm-device-libs.cl b/clang/test/Driver/rocm-device-libs.cl index cdb4716bde9a83..7f45116d363065 100644 --- a/clang/test/Driver/rocm-device-libs.cl +++ b/clang/test/Driver/rocm-device-libs.cl @@ -8,7 +8,7 @@ // RUN: -x cl -mcpu=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s @@ -17,7 +17,7 @@ // RUN: -x cl -mcpu=gfx803 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s @@ -26,7 +26,7 @@ // RUN: -x cl -mcpu=fiji \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s @@ -35,7 +35,7 @@ // RUN: -cl-denorms-are-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX900,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DAZ,GFX900,WAVE64 %s // RUN: %clang -### -target amdgcn-amd-amdhsa \ @@ -43,7 +43,7 @@ // RUN: -cl-denorms-are-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DAZ,GFX803,WAVE64 %s @@ -52,7 +52,7 @@ // RUN: -cl-finite-math-only \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FINITE-ONLY,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-FINITE-ONLY,GFX803,WAVE64 %s @@ -61,7 +61,7 @@ // RUN: -cl-fp32-correctly-rounded-divide-sqrt \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-CORRECT-SQRT,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-CORRECT-SQRT,GFX803,WAVE64 %s @@ -70,7 +70,7 @@ // RUN: -cl-fast-relaxed-math \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FAST-RELAXED,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-FAST-RELAXED,GFX803,WAVE64 %s @@ -79,45 +79,45 @@ // RUN: -cl-unsafe-math-optimizations \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-UNSAFE,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-UNSAFE,GFX803,WAVE64 %s // RUN: %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx1010 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE32 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMMON,GFX1010,WAVE32 %s // RUN: %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx1011 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1011,WAVE32 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMMON,GFX1011,WAVE32 %s // RUN: %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx1012 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1012,WAVE32 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMMON,GFX1012,WAVE32 %s // RUN: %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx1010 -mwavefrontsize64 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMMON,GFX1010,WAVE64 %s // RUN: %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx1010 -mwavefrontsize64 -mno-wavefrontsize64 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE32 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMMON,GFX1010,WAVE32 %s // Ignore -mno-wavefrontsize64 without wave32 support // RUN: %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx803 -mno-wavefrontsize64 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMMON,GFX803,WAVE64 %s @@ -126,13 +126,13 @@ // RUN: -x cl -mcpu=gfx900 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode \ // RUN: %S/opencl.cl \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s // Test environment variable HIP_DEVICE_LIB_PATH // RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/rocm/amdgcn/bitcode %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx900 \ // RUN: %S/opencl.cl \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s diff --git a/clang/test/Sema/struct-decl.c b/clang/test/Sema/struct-decl.c index 80cac0e0d145cf..ee3e79182eaa7d 100644 --- a/clang/test/Sema/struct-decl.c +++ b/clang/test/Sema/struct-decl.c @@ -69,3 +69,44 @@ void test_hiding() { struct PreserveAttributes {}; typedef struct __attribute__((noreturn)) PreserveAttributes PreserveAttributes_t; // expected-warning {{'noreturn' attribute only applies to functions and methods}} + +// PR46255 +struct FlexibleArrayMem { + int a; + int b[]; +}; + +struct FollowedByNamed { + struct FlexibleArrayMem a; // expected-warning {{field 'a' with variable sized type 'struct FlexibleArrayMem' not at the end of a struct or class is a GNU extension}} + int i; +}; + +struct FollowedByUnNamed { + struct FlexibleArrayMem a; // expected-warning {{field 'a' with variable sized type 'struct FlexibleArrayMem' not at the end of a struct or class is a GNU extension}} + struct { + int i; + }; +}; + +struct InAnonymous { + struct { // expected-warning-re {{field '' with variable sized type 'struct InAnonymous::(anonymous at {{.+}})' not at the end of a struct or class is a GNU extension}} + + struct FlexibleArrayMem a; + }; + int i; +}; +struct InAnonymousFollowedByAnon { + struct { // expected-warning-re {{field '' with variable sized type 'struct InAnonymousFollowedByAnon::(anonymous at {{.+}})' not at the end of a struct or class is a GNU extension}} + + struct FlexibleArrayMem a; + }; + struct { + int i; + }; +}; + +// This is the behavior in C++ as well, so making sure we reproduce it here. +struct InAnonymousFollowedByEmpty { + struct FlexibleArrayMem a; // expected-warning {{field 'a' with variable sized type 'struct FlexibleArrayMem' not at the end of a struct or class is a GNU extension}} + struct {}; +}; diff --git a/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp b/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp index 9b0a3dbda81e41..9d26eeb6af7347 100644 --- a/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp +++ b/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp @@ -112,11 +112,21 @@ TEST(ExprMutationAnalyzerTest, Trivial) { class AssignmentTest : public ::testing::TestWithParam {}; TEST_P(AssignmentTest, AssignmentModifies) { - const std::string ModExpr = "x " + GetParam() + " 10"; - const auto AST = buildASTFromCode("void f() { int x; " + ModExpr + "; }"); - const auto Results = - match(withEnclosingCompound(declRefTo("x")), AST->getASTContext()); - EXPECT_THAT(mutatedBy(Results, AST.get()), ElementsAre(ModExpr)); + { + const std::string ModExpr = "x " + GetParam() + " 10"; + const auto AST = buildASTFromCode("void f() { int x; " + ModExpr + "; }"); + const auto Results = + match(withEnclosingCompound(declRefTo("x")), AST->getASTContext()); + EXPECT_THAT(mutatedBy(Results, AST.get()), ElementsAre(ModExpr)); + } + + { + const std::string ModExpr = "(x) " + GetParam() + " 10"; + const auto AST = buildASTFromCode("void f() { int x; " + ModExpr + "; }"); + const auto Results = + match(withEnclosingCompound(declRefTo("x")), AST->getASTContext()); + EXPECT_THAT(mutatedBy(Results, AST.get()), ElementsAre(ModExpr)); + } } INSTANTIATE_TEST_CASE_P(AllAssignmentOperators, AssignmentTest, @@ -134,7 +144,8 @@ TEST_P(IncDecTest, IncDecModifies) { } INSTANTIATE_TEST_CASE_P(AllIncDecOperators, IncDecTest, - Values("++x", "--x", "x++", "x--"), ); + Values("++x", "--x", "x++", "x--", "++(x)", "--(x)", + "(x)++", "(x)--"), ); TEST(ExprMutationAnalyzerTest, NonConstMemberFunc) { const auto AST = buildASTFromCode( diff --git a/compiler-rt/test/fuzzer/fork.test b/compiler-rt/test/fuzzer/fork.test index e0f348b2bff1b4..6e76fe7f2b06a5 100644 --- a/compiler-rt/test/fuzzer/fork.test +++ b/compiler-rt/test/fuzzer/fork.test @@ -18,4 +18,4 @@ RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -rss_limit_mb=128 2>&1 | FileCheck MAX_TOTAL_TIME: INFO: fuzzed for {{.*}} seconds, wrapping up soon MAX_TOTAL_TIME: INFO: exiting: {{.*}} time: -RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -rss_limit_mb=128 -ignore_crashes=1 -max_total_time=10 2>&1 | FileCheck %s --dump-input-on-failure --check-prefix=MAX_TOTAL_TIME +RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -rss_limit_mb=128 -ignore_crashes=1 -max_total_time=10 2>&1 | FileCheck %s --check-prefix=MAX_TOTAL_TIME diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb b/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb index cdd3388d6d2e23..6ae1c7016b680e 100644 --- a/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb +++ b/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb @@ -1,4 +1,4 @@ -# RUN: gdb -q -batch -n -iex 'source %llvm_src_root/utils/gdb-scripts/prettyprinters.py' -x %s %llvm_tools_dir/check-gdb-llvm-support | FileCheck %s --dump-input-on-failure +# RUN: gdb -q -batch -n -iex 'source %llvm_src_root/utils/gdb-scripts/prettyprinters.py' -x %s %llvm_tools_dir/check-gdb-llvm-support | FileCheck %s # REQUIRES: debug-info break main diff --git a/libcxx/cmake/caches/Generic-32bits.cmake b/libcxx/cmake/caches/Generic-32bits.cmake new file mode 100644 index 00000000000000..ae7b2ac3e8d83a --- /dev/null +++ b/libcxx/cmake/caches/Generic-32bits.cmake @@ -0,0 +1 @@ +set(LLVM_BUILD_32_BITS ON CACHE BOOL "") diff --git a/libcxx/cmake/caches/Generic-asan.cmake b/libcxx/cmake/caches/Generic-asan.cmake new file mode 100644 index 00000000000000..cf919765c3a29c --- /dev/null +++ b/libcxx/cmake/caches/Generic-asan.cmake @@ -0,0 +1 @@ +set(LLVM_USE_SANITIZER "Address" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-cxx03.cmake b/libcxx/cmake/caches/Generic-cxx03.cmake new file mode 100644 index 00000000000000..d1d67d86d74a9c --- /dev/null +++ b/libcxx/cmake/caches/Generic-cxx03.cmake @@ -0,0 +1 @@ +set(LLVM_LIT_ARGS "--param std=c++03" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-cxx11.cmake b/libcxx/cmake/caches/Generic-cxx11.cmake new file mode 100644 index 00000000000000..e203c6aeaf29fc --- /dev/null +++ b/libcxx/cmake/caches/Generic-cxx11.cmake @@ -0,0 +1 @@ +set(LLVM_LIT_ARGS "--param std=c++11" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-cxx14.cmake b/libcxx/cmake/caches/Generic-cxx14.cmake new file mode 100644 index 00000000000000..b1bf1244b510ab --- /dev/null +++ b/libcxx/cmake/caches/Generic-cxx14.cmake @@ -0,0 +1 @@ +set(LLVM_LIT_ARGS "--param std=c++14" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-cxx17.cmake b/libcxx/cmake/caches/Generic-cxx17.cmake new file mode 100644 index 00000000000000..b23204729ced20 --- /dev/null +++ b/libcxx/cmake/caches/Generic-cxx17.cmake @@ -0,0 +1 @@ +set(LLVM_LIT_ARGS "--param std=c++17" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-cxx2a.cmake b/libcxx/cmake/caches/Generic-cxx2a.cmake new file mode 100644 index 00000000000000..31f1b76ab91f99 --- /dev/null +++ b/libcxx/cmake/caches/Generic-cxx2a.cmake @@ -0,0 +1 @@ +set(LLVM_LIT_ARGS "--param std=c++2a" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-msan.cmake b/libcxx/cmake/caches/Generic-msan.cmake new file mode 100644 index 00000000000000..7c948f51642dd4 --- /dev/null +++ b/libcxx/cmake/caches/Generic-msan.cmake @@ -0,0 +1 @@ +set(LLVM_USE_SANITIZER "MemoryWithOrigins" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-noexceptions.cmake b/libcxx/cmake/caches/Generic-noexceptions.cmake new file mode 100644 index 00000000000000..f0dffef60dba08 --- /dev/null +++ b/libcxx/cmake/caches/Generic-noexceptions.cmake @@ -0,0 +1,2 @@ +set(LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "") +set(LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "") diff --git a/libcxx/cmake/caches/Generic-singlethreaded.cmake b/libcxx/cmake/caches/Generic-singlethreaded.cmake new file mode 100644 index 00000000000000..616baef1be7bef --- /dev/null +++ b/libcxx/cmake/caches/Generic-singlethreaded.cmake @@ -0,0 +1,3 @@ +set(LIBCXX_ENABLE_THREADS OFF CACHE BOOL "") +set(LIBCXXABI_ENABLE_THREADS OFF CACHE BOOL "") +set(LIBCXX_ENABLE_MONOTONIC_CLOCK OFF CACHE BOOL "") diff --git a/libcxx/cmake/caches/Generic-tsan.cmake b/libcxx/cmake/caches/Generic-tsan.cmake new file mode 100644 index 00000000000000..a4b599e3e5094b --- /dev/null +++ b/libcxx/cmake/caches/Generic-tsan.cmake @@ -0,0 +1 @@ +set(LLVM_USE_SANITIZER "Thread" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-ubsan.cmake b/libcxx/cmake/caches/Generic-ubsan.cmake new file mode 100644 index 00000000000000..7ad891e4aed9e8 --- /dev/null +++ b/libcxx/cmake/caches/Generic-ubsan.cmake @@ -0,0 +1,2 @@ +set(LLVM_USE_SANITIZER "Undefined" CACHE STRING "") +set(LIBCXX_ABI_UNSTABLE ON CACHE BOOL "") diff --git a/libcxx/cmake/caches/README.md b/libcxx/cmake/caches/README.md new file mode 100644 index 00000000000000..60837ee2940177 --- /dev/null +++ b/libcxx/cmake/caches/README.md @@ -0,0 +1,13 @@ +# libc++ / libc++abi configuration caches + +This directory contains CMake caches for the supported configurations of libc++. +Some of the configurations are specific to a vendor, others are generic and not +tied to any vendor. + +While we won't explicitly work to break configurations not listed here, any +configuration not listed here is not explicitly supported. If you use or ship +libc++ under a configuration not listed here, you should work with the libc++ +maintainers to make it into a supported configuration and add it here. + +Similarly, adding any new configuration that's not already covered must be +discussed with the libc++ maintainers as it entails a maintenance burden. diff --git a/libcxx/include/__config b/libcxx/include/__config index cf596a7872abd5..26fadcff7ced42 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -1392,10 +1392,10 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( __attribute__((availability(tvos,strict,introduced=10.0))) \ __attribute__((availability(watchos,strict,introduced=3.0))) # define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS \ - __attribute__((availability(macosx,strict,introduced=10.14))) \ - __attribute__((availability(ios,strict,introduced=12.0))) \ - __attribute__((availability(tvos,strict,introduced=12.0))) \ - __attribute__((availability(watchos,strict,introduced=5.0))) + __attribute__((availability(macosx,strict,introduced=10.13))) \ + __attribute__((availability(ios,strict,introduced=11.0))) \ + __attribute__((availability(tvos,strict,introduced=11.0))) \ + __attribute__((availability(watchos,strict,introduced=4.0))) # define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS \ _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS # define _LIBCPP_AVAILABILITY_BAD_ANY_CAST \ diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index 22ec6c457e5694..35dac0df56824d 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -857,8 +857,8 @@ def configure_deployment(self): self.config.available_features.add('dylib-has-no-shared_mutex') self.lit_config.note("shared_mutex is not supported by the deployment target") # Throwing bad_optional_access, bad_variant_access and bad_any_cast is - # supported starting in macosx10.14. - if name == 'macosx' and version in ('10.%s' % v for v in range(9, 14)): + # supported starting in macosx10.13. + if name == 'macosx' and version in ('10.%s' % v for v in range(9, 13)): self.config.available_features.add('dylib-has-no-bad_optional_access') self.lit_config.note("throwing bad_optional_access is not supported by the deployment target") diff --git a/libcxxabi/test/libcxxabi/test/config.py b/libcxxabi/test/libcxxabi/test/config.py index fe76d193e79ed2..f1eb453e09f313 100644 --- a/libcxxabi/test/libcxxabi/test/config.py +++ b/libcxxabi/test/libcxxabi/test/config.py @@ -38,8 +38,6 @@ def has_cpp_feature(self, feature, required_value): def configure_features(self): super(Configuration, self).configure_features() - if not self.get_lit_bool('enable_exceptions', True): - self.config.available_features.add('no-exceptions') if not self.has_cpp_feature('noexcept_function_type', 201510): self.config.available_features.add('libcxxabi-no-noexcept-function-type') if not self.get_lit_bool('llvm_unwinder', False): diff --git a/libunwind/test/CMakeLists.txt b/libunwind/test/CMakeLists.txt index 40d4acd4e8c2a9..e608c1708b8abb 100644 --- a/libunwind/test/CMakeLists.txt +++ b/libunwind/test/CMakeLists.txt @@ -15,7 +15,6 @@ pythonize_bool(LIBUNWIND_BUILD_32_BITS) pythonize_bool(LIBCXX_ENABLE_SHARED) pythonize_bool(LIBUNWIND_ENABLE_SHARED) pythonize_bool(LIBUNWIND_ENABLE_THREADS) -pythonize_bool(LIBUNWIND_ENABLE_EXCEPTIONS) pythonize_bool(LIBUNWIND_USES_ARM_EHABI) pythonize_bool(LIBUNWIND_USE_COMPILER_RT) pythonize_bool(LIBUNWIND_BUILD_EXTERNAL_THREAD_LIBRARY) diff --git a/libunwind/test/libunwind/test/config.py b/libunwind/test/libunwind/test/config.py index 36501f230272cd..7e4f230d821c75 100644 --- a/libunwind/test/libunwind/test/config.py +++ b/libunwind/test/libunwind/test/config.py @@ -35,15 +35,11 @@ def has_cpp_feature(self, feature, required_value): def configure_features(self): super(Configuration, self).configure_features() - if not self.get_lit_bool('enable_exceptions', True): - self.config.available_features.add('no-exceptions') if self.get_lit_bool('arm_ehabi', False): self.config.available_features.add('libunwind-arm-ehabi') def configure_compile_flags(self): self.cxx.compile_flags += ['-DLIBUNWIND_NO_TIMER'] - if not self.get_lit_bool('enable_exceptions', True): - self.cxx.compile_flags += ['-fno-exceptions', '-DLIBUNWIND_HAS_NO_EXCEPTIONS'] # Stack unwinding tests need unwinding tables and these are not # generated by default on all Targets. self.cxx.compile_flags += ['-funwind-tables'] diff --git a/libunwind/test/lit.cfg b/libunwind/test/lit.cfg index 262f25af0d70a4..7f74bd6e4afb4c 100644 --- a/libunwind/test/lit.cfg +++ b/libunwind/test/lit.cfg @@ -23,9 +23,6 @@ config.suffixes = ['.cpp', '.s'] # test_source_root: The root path where tests are located. config.test_source_root = os.path.dirname(__file__) -# needed to test libunwind with code that throws exceptions -config.enable_exceptions = True - # Infer the libcxx_test_source_root for configuration import. # If libcxx_source_root isn't specified in the config, assume that the libcxx # and libunwind source directories are sibling directories. diff --git a/libunwind/test/lit.site.cfg.in b/libunwind/test/lit.site.cfg.in index 37f90a90efdb43..809ad1009f4bdc 100644 --- a/libunwind/test/lit.site.cfg.in +++ b/libunwind/test/lit.site.cfg.in @@ -18,7 +18,6 @@ config.test_compiler_flags = "@LIBUNWIND_TEST_COMPILER_FLAGS@" config.executor = "@LIBUNWIND_EXECUTOR@" config.libunwind_shared = @LIBUNWIND_ENABLE_SHARED@ config.enable_shared = @LIBCXX_ENABLE_SHARED@ -config.enable_exceptions = @LIBUNWIND_ENABLE_EXCEPTIONS@ config.arm_ehabi = @LIBUNWIND_USES_ARM_EHABI@ config.host_triple = "@LLVM_HOST_TRIPLE@" config.target_triple = "@TARGET_TRIPLE@" diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index c451aee1f921a9..5bbd6f0df7e9ab 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1117,8 +1117,20 @@ template void ObjFile::initializeSymbols() { // COMDAT member sections, and if a comdat group is discarded, some // defined symbol in a .eh_frame becomes dangling symbols. if (sec == &InputSection::discarded) { - this->symbols[i]->resolve( - Undefined{this, name, binding, stOther, type, secIdx}); + Undefined und{this, name, binding, stOther, type, secIdx}; + Symbol *sym = this->symbols[i]; + // !ArchiveFile::parsed or LazyObjFile::fetched means that the file + // containing this object has not finished processing, i.e. this symbol is + // a result of a lazy symbol fetch. We should demote the lazy symbol to an + // Undefined so that any relocations outside of the group to it will + // trigger a discarded section error. + if ((sym->symbolKind == Symbol::LazyArchiveKind && + !cast(sym->file)->parsed) || + (sym->symbolKind == Symbol::LazyObjectKind && + cast(sym->file)->fetched)) + sym->replace(und); + else + sym->resolve(und); continue; } @@ -1141,6 +1153,10 @@ ArchiveFile::ArchiveFile(std::unique_ptr &&file) void ArchiveFile::parse() { for (const Archive::Symbol &sym : file->symbols()) symtab->addSymbol(LazyArchive{*this, sym}); + + // Inform a future invocation of ObjFile::initializeSymbols() that this + // archive has been processed. + parsed = true; } // Returns a buffer pointing to a member file containing a given symbol. @@ -1615,14 +1631,13 @@ InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, } void LazyObjFile::fetch() { - if (mb.getBuffer().empty()) + if (fetched) return; + fetched = true; InputFile *file = createObjectFile(mb, archiveName, offsetInArchive); file->groupId = groupId; - mb = {}; - // Copy symbol vector so that the new InputFile doesn't have to // insert the same defined symbols to the symbol table again. file->symbols = std::move(symbols); diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 51882e0c964719..7af85e417ca583 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -307,6 +307,8 @@ class LazyObjFile : public InputFile { template void parse(); void fetch(); + bool fetched = false; + private: uint64_t offsetInArchive; }; @@ -327,6 +329,8 @@ class ArchiveFile : public InputFile { size_t getMemberCount() const; size_t getFetchedMemberCount() const { return seen.size(); } + bool parsed = false; + private: std::unique_ptr file; llvm::DenseSet seen; diff --git a/lld/test/ELF/comdat-discarded-lazy.s b/lld/test/ELF/comdat-discarded-lazy.s new file mode 100644 index 00000000000000..8ee15158f6b3ae --- /dev/null +++ b/lld/test/ELF/comdat-discarded-lazy.s @@ -0,0 +1,60 @@ +# REQUIRES: x86 +## Test that lazy symbols in a section group can be demoted to Undefined, +## so that we can report a "discarded section" error. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: echo '.globl f1, foo; f1: call foo; \ +# RUN: .section .text.foo,"axG",@progbits,foo,comdat; foo:' | \ +# RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o + +## Test the case when the symbol causing a "discarded section" is ordered +## *before* the symbol fetching the lazy object. +## The test relies on the symbol table order of llvm-mc (lexical), which will +## need adjustment if llvm-mc changes its behavior. +# RUN: echo '.globl f2, aa; f2: call aa; \ +# RUN: .section .text.foo,"axG",@progbits,foo,comdat; aa:' | \ +# RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %taa.o +# RUN: llvm-nm -p %taa.o | FileCheck --check-prefix=AA-NM %s +# RUN: not ld.lld %t.o --start-lib %t1.o %taa.o --end-lib -o /dev/null 2>&1 | FileCheck --check-prefix=AA %s +# RUN: rm -f %taa.a && llvm-ar rc %taa.a %taa.o +# RUN: not ld.lld %t.o --start-lib %t1.o --end-lib %taa.a -o /dev/null 2>&1 | FileCheck --check-prefix=AA %s + +# AA-NM: aa +# AA-NM: f2 + +# AA: error: relocation refers to a symbol in a discarded section: aa +# AA-NEXT: >>> defined in {{.*}}aa.o +# AA-NEXT: >>> section group signature: foo +# AA-NEXT: >>> prevailing definition is in {{.*}}1.o +# AA-NEXT: >>> referenced by {{.*}}aa.o:(.text+0x1) + +## Test the case when the symbol causing a "discarded section" is ordered +## *after* the symbol fetching the lazy object. +# RUN: echo '.globl f2, zz; f2: call zz; \ +# RUN: .section .text.foo,"axG",@progbits,foo,comdat; zz:' | \ +# RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %tzz.o +# RUN: llvm-nm -p %tzz.o | FileCheck --check-prefix=ZZ-NM %s +# RUN: not ld.lld %t.o --start-lib %t1.o %tzz.o --end-lib -o /dev/null 2>&1 | FileCheck --check-prefix=ZZ %s +# RUN: rm -f %tzz.a && llvm-ar rc %tzz.a %tzz.o +# RUN: not ld.lld %t.o --start-lib %t1.o --end-lib %tzz.a -o /dev/null 2>&1 | FileCheck --check-prefix=ZZ %s + +# ZZ-NM: f2 +# ZZ-NM: zz + +# ZZ: error: relocation refers to a symbol in a discarded section: zz +# ZZ-NEXT: >>> defined in {{.*}}zz.o +# ZZ-NEXT: >>> section group signature: foo +# ZZ-NEXT: >>> prevailing definition is in {{.*}}1.o +# ZZ-NEXT: >>> referenced by {{.*}}zz.o:(.text+0x1) + +## Don't error if the symbol which would cause "discarded section" +## was inserted before %tzz.o +# RUN: echo '.globl zz; zz:' | llvm-mc -filetype=obj -triple=x86_64 - -o %tdef.o +# RUN: ld.lld %t.o --start-lib %t1.o %tdef.o %tzz.o --end-lib -o /dev/null +# RUN: rm -f %tdef.a && llvm-ar rc %tdef.a %tdef.o +# RUN: ld.lld %t.o --start-lib %t1.o %tdef.a %tzz.o --end-lib -o /dev/null + +.globl _start +_start: + call f1 + call f2 diff --git a/lld/test/ELF/i386-linkonce.s b/lld/test/ELF/i386-linkonce.s index c06b042c7638e4..f7da0aed4af585 100644 --- a/lld/test/ELF/i386-linkonce.s +++ b/lld/test/ELF/i386-linkonce.s @@ -2,7 +2,9 @@ // RUN: llvm-mc -filetype=obj -triple=i386-linux-gnu %s -o %t.o // RUN: llvm-mc -filetype=obj -triple=i386-linux-gnu %p/Inputs/i386-linkonce.s -o %t2.o // RUN: llvm-ar rcs %t2.a %t2.o -// RUN: ld.lld %t.o %t2.a -o %t +// RUN: not ld.lld %t.o %t2.a -o /dev/null 2>&1 | FileCheck %s + +// CHECK: error: relocation refers to a symbol in a discarded section: __i686.get_pc_thunk.bx .globl _start _start: diff --git a/lld/test/ELF/start-lib-comdat.s b/lld/test/ELF/start-lib-comdat.s index 34c9934803f092..996ddb485bab3c 100644 --- a/lld/test/ELF/start-lib-comdat.s +++ b/lld/test/ELF/start-lib-comdat.s @@ -6,7 +6,7 @@ // RUN: ld.lld -shared -o %t3 %t1.o --start-lib %t2.o --end-lib // RUN: llvm-readobj --symbols %t3 | FileCheck %s // RUN: ld.lld -shared -o %t3 --start-lib %t2.o --end-lib %t1.o -// RUN: llvm-readobj --symbols %t3 | FileCheck %s +// RUN: llvm-readobj --symbols %t3 | FileCheck /dev/null --implicit-check-not='Name: zed' // CHECK: Name: zed // CHECK-NEXT: Value: diff --git a/lldb/source/Host/posix/FileSystemPosix.cpp b/lldb/source/Host/posix/FileSystemPosix.cpp index 3660f67895a4f7..0aa34bc5943596 100644 --- a/lldb/source/Host/posix/FileSystemPosix.cpp +++ b/lldb/source/Host/posix/FileSystemPosix.cpp @@ -72,9 +72,11 @@ Status FileSystem::ResolveSymbolicLink(const FileSpec &src, FileSpec &dst) { } FILE *FileSystem::Fopen(const char *path, const char *mode) { + Collect(path); return llvm::sys::RetryAfterSignal(nullptr, ::fopen, path, mode); } int FileSystem::Open(const char *path, int flags, int mode) { + Collect(path); return llvm::sys::RetryAfterSignal(-1, ::open, path, flags, mode); } diff --git a/lldb/source/Host/windows/FileSystem.cpp b/lldb/source/Host/windows/FileSystem.cpp index cbd1915bdb448c..94872c99b15ecb 100644 --- a/lldb/source/Host/windows/FileSystem.cpp +++ b/lldb/source/Host/windows/FileSystem.cpp @@ -86,6 +86,7 @@ Status FileSystem::ResolveSymbolicLink(const FileSpec &src, FileSpec &dst) { } FILE *FileSystem::Fopen(const char *path, const char *mode) { + Collect(path); std::wstring wpath, wmode; if (!llvm::ConvertUTF8toWide(path, wpath)) return nullptr; @@ -98,6 +99,7 @@ FILE *FileSystem::Fopen(const char *path, const char *mode) { } int FileSystem::Open(const char *path, int flags, int mode) { + Collect(path); std::wstring wpath; if (!llvm::ConvertUTF8toWide(path, wpath)) return -1; diff --git a/llvm/docs/CommandGuide/FileCheck.rst b/llvm/docs/CommandGuide/FileCheck.rst index 0512133f2e995c..b2e3dfcf01ad64 100644 --- a/llvm/docs/CommandGuide/FileCheck.rst +++ b/llvm/docs/CommandGuide/FileCheck.rst @@ -106,13 +106,8 @@ and from the command line. .. option:: --dump-input Dump input to stderr, adding annotations representing currently enabled - diagnostics. Do this either 'always', on 'fail', or 'never'. Specify 'help' - to explain the dump format and quit. - -.. option:: --dump-input-on-failure - - When the check fails, dump all of the original input. This option is - deprecated in favor of `--dump-input=fail`. + diagnostics. Do this either 'always', on 'fail' (default), or 'never'. + Specify 'help' to explain the dump format and quit. .. option:: --enable-var-scope diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 118a177f88dfb6..d6498345f25c85 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -35,23 +35,23 @@ class GISelChangeObserver; /// to transfer BuilderState between different kinds of MachineIRBuilders. struct MachineIRBuilderState { /// MachineFunction under construction. - MachineFunction *MF; + MachineFunction *MF = nullptr; /// Information used to access the description of the opcodes. - const TargetInstrInfo *TII; + const TargetInstrInfo *TII = nullptr; /// Information used to verify types are consistent and to create virtual registers. - MachineRegisterInfo *MRI; + MachineRegisterInfo *MRI = nullptr; /// Debug location to be set to any instruction we create. DebugLoc DL; /// \name Fields describing the insertion point. /// @{ - MachineBasicBlock *MBB; + MachineBasicBlock *MBB = nullptr; MachineBasicBlock::iterator II; /// @} - GISelChangeObserver *Observer; + GISelChangeObserver *Observer = nullptr; - GISelCSEInfo *CSEInfo; + GISelCSEInfo *CSEInfo = nullptr; }; class DstOp { @@ -228,14 +228,26 @@ class MachineIRBuilder { void validateSelectOp(const LLT ResTy, const LLT TstTy, const LLT Op0Ty, const LLT Op1Ty); - void recordInsertion(MachineInstr *MI) const; + + void recordInsertion(MachineInstr *InsertedInstr) const { + if (State.Observer) + State.Observer->createdInstr(*InsertedInstr); + } public: /// Some constructors for easy use. MachineIRBuilder() = default; MachineIRBuilder(MachineFunction &MF) { setMF(MF); } - MachineIRBuilder(MachineInstr &MI) : MachineIRBuilder(*MI.getMF()) { + + MachineIRBuilder(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt) { + setMF(*MBB.getParent()); + setInsertPt(MBB, InsPt); + } + + MachineIRBuilder(MachineInstr &MI) : + MachineIRBuilder(*MI.getParent(), MI.getIterator()) { setInstr(MI); + setDebugLoc(MI.getDebugLoc()); } virtual ~MachineIRBuilder() = default; @@ -292,10 +304,16 @@ class MachineIRBuilder { /// Set the insertion point before the specified position. /// \pre MBB must be in getMF(). /// \pre II must be a valid iterator in MBB. - void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II); + void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II) { + assert(MBB.getParent() == &getMF() && + "Basic block is in a different function"); + State.MBB = &MBB; + State.II = II; + } + /// @} - void setCSEInfo(GISelCSEInfo *Info); + void setCSEInfo(GISelCSEInfo *Info) { State.CSEInfo = Info; } /// \name Setters for the insertion point. /// @{ @@ -304,11 +322,20 @@ class MachineIRBuilder { /// Set the insertion point to the end of \p MBB. /// \pre \p MBB must be contained by getMF(). - void setMBB(MachineBasicBlock &MBB); + void setMBB(MachineBasicBlock &MBB) { + State.MBB = &MBB; + State.II = MBB.end(); + assert(&getMF() == MBB.getParent() && + "Basic block is in a different function"); + } /// Set the insertion point to before MI. /// \pre MI must be in getMF(). - void setInstr(MachineInstr &MI); + void setInstr(MachineInstr &MI) { + assert(MI.getParent() && "Instruction is not part of a basic block"); + setMBB(*MI.getParent()); + State.II = MI.getIterator(); + } /// @} /// Set the insertion point to before MI, and set the debug loc to MI's loc. @@ -318,8 +345,11 @@ class MachineIRBuilder { setDebugLoc(MI.getDebugLoc()); } - void setChangeObserver(GISelChangeObserver &Observer); - void stopObservingChanges(); + void setChangeObserver(GISelChangeObserver &Observer) { + State.Observer = &Observer; + } + + void stopObservingChanges() { State.Observer = nullptr; } /// @} /// Set the debug location to \p DL for all the next build instructions. @@ -335,7 +365,9 @@ class MachineIRBuilder { /// \pre setBasicBlock or setMI must have been called. /// /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildInstr(unsigned Opcode); + MachineInstrBuilder buildInstr(unsigned Opcode) { + return insertInstr(buildInstrNoInsert(Opcode)); + } /// Build but don't insert = \p Opcode . /// diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def index aef189a562a5b0..4d2b615e9d3de3 100644 --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -177,3 +177,245 @@ X86_FEATURE (67, FEATURE_CLFLUSHOPT) X86_FEATURE (68, FEATURE_SHA) #undef X86_FEATURE_COMPAT #undef X86_FEATURE + + +#ifndef PROC_WITH_FEAT +#define PROC_WITH_FEAT(ENUM, STRING, IS64BIT, KEYFEATURE) \ + PROC(ENUM, STRING, IS64BIT) +#endif + +#ifndef PROC +#define PROC(ENUM, STRING, IS64BIT) +#endif + +#ifndef PROC_ALIAS +#define PROC_ALIAS(ENUM, ALIAS) +#endif + +#define PROC_64_BIT true +#define PROC_32_BIT false + +/// \name i386 +/// i386-generation processors. +//@{ +PROC(i386, "i386", PROC_32_BIT) +//@} + +/// \name i486 +/// i486-generation processors. +//@{ +PROC(i486, "i486", PROC_32_BIT) +PROC(WinChipC6, "winchip-c6", PROC_32_BIT) +PROC(WinChip2, "winchip2", PROC_32_BIT) +PROC(C3, "c3", PROC_32_BIT) +//@} + +/// \name i586 +/// i586-generation processors, P5 microarchitecture based. +//@{ +PROC(i586, "i586", PROC_32_BIT) +PROC(Pentium, "pentium", PROC_32_BIT) +PROC(PentiumMMX, "pentium-mmx", PROC_32_BIT) +//@} + +/// \name i686 +/// i686-generation processors, P6 / Pentium M microarchitecture based. +//@{ +PROC(PentiumPro, "pentiumpro", PROC_32_BIT) +PROC(i686, "i686", PROC_32_BIT) +PROC(Pentium2, "pentium2", PROC_32_BIT) +PROC(Pentium3, "pentium3", PROC_32_BIT) +PROC_ALIAS(Pentium3, "pentium3m") +PROC(PentiumM, "pentium-m", PROC_32_BIT) +PROC(C3_2, "c3-2", PROC_32_BIT) + +/// This enumerator is a bit odd, as GCC no longer accepts -march=yonah. +/// Clang however has some logic to support this. +// FIXME: Warn, deprecate, and potentially remove this. +PROC(Yonah, "yonah", PROC_32_BIT) +//@} + +/// \name Netburst +/// Netburst microarchitecture based processors. +//@{ +PROC(Pentium4, "pentium4", PROC_32_BIT) +PROC_ALIAS(Pentium4, "pentium4m") + +PROC(Prescott, "prescott", PROC_32_BIT) +PROC(Nocona, "nocona", PROC_64_BIT) +//@} + +/// \name Core +/// Core microarchitecture based processors. +//@{ +PROC_WITH_FEAT(Core2, "core2", PROC_64_BIT, FEATURE_SSSE3) + +/// This enumerator, like Yonah, is a bit odd. It is another +/// codename which GCC no longer accepts as an option to -march, but Clang +/// has some logic for recognizing it. +// FIXME: Warn, deprecate, and potentially remove this. +PROC(Penryn, "penryn", PROC_64_BIT) +//@} + +/// \name Atom +/// Atom processors +//@{ +PROC_WITH_FEAT(Bonnell, "bonnell", PROC_64_BIT, FEATURE_SSSE3) +PROC_ALIAS(Bonnell, "atom") + +PROC_WITH_FEAT(Silvermont, "silvermont", PROC_64_BIT, FEATURE_SSE4_2) +PROC_ALIAS(Silvermont, "slm") + +PROC(Goldmont, "goldmont", PROC_64_BIT) +PROC(GoldmontPlus, "goldmont-plus", PROC_64_BIT) + +PROC(Tremont, "tremont", PROC_64_BIT) +//@} + +/// \name Nehalem +/// Nehalem microarchitecture based processors. +PROC_WITH_FEAT(Nehalem, "nehalem", PROC_64_BIT, FEATURE_SSE4_2) +PROC_ALIAS(Nehalem, "corei7") + +/// \name Westmere +/// Westmere microarchitecture based processors. +PROC_WITH_FEAT(Westmere, "westmere", PROC_64_BIT, FEATURE_PCLMUL) + +/// \name Sandy Bridge +/// Sandy Bridge microarchitecture based processors. +PROC_WITH_FEAT(SandyBridge, "sandybridge", PROC_64_BIT, FEATURE_AVX) +PROC_ALIAS(SandyBridge, "corei7-avx") + +/// \name Ivy Bridge +/// Ivy Bridge microarchitecture based processors. +PROC_WITH_FEAT(IvyBridge, "ivybridge", PROC_64_BIT, FEATURE_AVX) +PROC_ALIAS(IvyBridge, "core-avx-i") + +/// \name Haswell +/// Haswell microarchitecture based processors. +PROC_WITH_FEAT(Haswell, "haswell", PROC_64_BIT, FEATURE_AVX2) +PROC_ALIAS(Haswell, "core-avx2") + +/// \name Broadwell +/// Broadwell microarchitecture based processors. +PROC_WITH_FEAT(Broadwell, "broadwell", PROC_64_BIT, FEATURE_AVX2) + +/// \name Skylake Client +/// Skylake client microarchitecture based processors. +PROC_WITH_FEAT(SkylakeClient, "skylake", PROC_64_BIT, FEATURE_AVX2) + +/// \name Skylake Server +/// Skylake server microarchitecture based processors. +PROC_WITH_FEAT(SkylakeServer, "skylake-avx512", PROC_64_BIT, FEATURE_AVX512F) +PROC_ALIAS(SkylakeServer, "skx") + +/// \name Cascadelake Server +/// Cascadelake Server microarchitecture based processors. +PROC_WITH_FEAT(Cascadelake, "cascadelake", PROC_64_BIT, FEATURE_AVX512VNNI) + +/// \name Cooperlake Server +/// Cooperlake Server microarchitecture based processors. +PROC_WITH_FEAT(Cooperlake, "cooperlake", PROC_64_BIT, FEATURE_AVX512BF16) + +/// \name Cannonlake Client +/// Cannonlake client microarchitecture based processors. +PROC_WITH_FEAT(Cannonlake, "cannonlake", PROC_64_BIT, FEATURE_AVX512VBMI) + +/// \name Icelake Client +/// Icelake client microarchitecture based processors. +PROC(IcelakeClient, "icelake-client", PROC_64_BIT) + +/// \name Icelake Server +/// Icelake server microarchitecture based processors. +PROC(IcelakeServer, "icelake-server", PROC_64_BIT) + +/// \name Tigerlake +/// Tigerlake microarchitecture based processors. +PROC(Tigerlake, "tigerlake", PROC_64_BIT) + +/// \name Knights Landing +/// Knights Landing processor. +PROC_WITH_FEAT(KNL, "knl", PROC_64_BIT, FEATURE_AVX512F) + +/// \name Knights Mill +/// Knights Mill processor. +PROC_WITH_FEAT(KNM, "knm", PROC_64_BIT, FEATURE_AVX5124FMAPS) + +/// \name Lakemont +/// Lakemont microarchitecture based processors. +PROC(Lakemont, "lakemont", PROC_32_BIT) + +/// \name K6 +/// K6 architecture processors. +//@{ +PROC(K6, "k6", PROC_32_BIT) +PROC(K6_2, "k6-2", PROC_32_BIT) +PROC(K6_3, "k6-3", PROC_32_BIT) +//@} + +/// \name K7 +/// K7 architecture processors. +//@{ +PROC(Athlon, "athlon", PROC_32_BIT) +PROC_ALIAS(Athlon, "athlon-tbird") + +PROC(AthlonXP, "athlon-xp", PROC_32_BIT) +PROC_ALIAS(AthlonXP, "athlon-mp") +PROC_ALIAS(AthlonXP, "athlon-4") +//@} + +/// \name K8 +/// K8 architecture processors. +//@{ +PROC(K8, "k8", PROC_64_BIT) +PROC_ALIAS(K8, "athlon64") +PROC_ALIAS(K8, "athlon-fx") +PROC_ALIAS(K8, "opteron") + +PROC(K8SSE3, "k8-sse3", PROC_64_BIT) +PROC_ALIAS(K8SSE3, "athlon64-sse3") +PROC_ALIAS(K8SSE3, "opteron-sse3") + +PROC_WITH_FEAT(AMDFAM10, "amdfam10", PROC_64_BIT, FEATURE_SSE4_A) +PROC_ALIAS(AMDFAM10, "barcelona") +//@} + +/// \name Bobcat +/// Bobcat architecture processors. +//@{ +PROC_WITH_FEAT(BTVER1, "btver1", PROC_64_BIT, FEATURE_SSE4_A) +PROC_WITH_FEAT(BTVER2, "btver2", PROC_64_BIT, FEATURE_BMI) +//@} + +/// \name Bulldozer +/// Bulldozer architecture processors. +//@{ +PROC_WITH_FEAT(BDVER1, "bdver1", PROC_64_BIT, FEATURE_XOP) +PROC_WITH_FEAT(BDVER2, "bdver2", PROC_64_BIT, FEATURE_FMA) +PROC_WITH_FEAT(BDVER3, "bdver3", PROC_64_BIT, FEATURE_FMA) +PROC_WITH_FEAT(BDVER4, "bdver4", PROC_64_BIT, FEATURE_AVX2) +//@} + +/// \name zen +/// Zen architecture processors. +//@{ +PROC_WITH_FEAT(ZNVER1, "znver1", PROC_64_BIT, FEATURE_AVX2) +PROC_WITH_FEAT(ZNVER2, "znver2", PROC_64_BIT, FEATURE_AVX2) +//@} + +/// This specification is deprecated and will be removed in the future. +/// Users should prefer K8. +// FIXME: Warn on this when the CPU is set to it. +//@{ +PROC(x86_64, "x86-64", PROC_64_BIT) +//@} + +/// \name Geode +/// Geode processors. +//@{ +PROC(Geode, "geode", PROC_32_BIT) +//@} + +#undef PROC +#undef PROC_ALIAS +#undef PROC_WITH_FEAT diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h new file mode 100644 index 00000000000000..1c9ad03cde8135 --- /dev/null +++ b/llvm/include/llvm/Support/X86TargetParser.h @@ -0,0 +1,41 @@ +//===-- X86TargetParser - Parser for X86 features ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise X86 hardware features. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_X86TARGETPARSERCOMMON_H +#define LLVM_SUPPORT_X86TARGETPARSERCOMMON_H + +#include "llvm/ADT/SmallVector.h" + +namespace llvm { +class StringRef; + +namespace X86 { + +enum CPUKind { + CK_None, +#define PROC(ENUM, STRING, IS64BIT) CK_##ENUM, +#include "llvm/Support/X86TargetParser.def" +}; + +/// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if +/// \p Only64Bit is true. +CPUKind parseArchX86(StringRef CPU, bool Only64Bit = false); + +/// Provide a list of valid CPU names. If \p Only64Bit is true, the list will +/// only contain 64-bit capable CPUs. +void fillValidCPUArchList(SmallVectorImpl &Values, + bool ArchIs32Bit); + +} // namespace X86 +} // namespace llvm + +#endif diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index 0a2d71c275d582..1d7be54de3b045 100644 --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -133,7 +133,6 @@ class LegalizerWorkListManager : public GISelChangeObserver { } void createdInstr(MachineInstr &MI) override { - LLVM_DEBUG(dbgs() << ".. .. New MI: " << MI); LLVM_DEBUG(NewMIs.push_back(&MI)); createdOrChangedInstr(MI); } @@ -170,6 +169,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, ArrayRef AuxObservers, LostDebugLocObserver &LocObserver, MachineIRBuilder &MIRBuilder) { + MIRBuilder.setMF(MF); MachineRegisterInfo &MRI = MF.getRegInfo(); // Populate worklists. diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c38d08f41e685d..6c6ef78816b8d9 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -87,7 +87,6 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF, MachineIRBuilder &Builder) : MIRBuilder(Builder), MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { - MIRBuilder.setMF(MF); MIRBuilder.setChangeObserver(Observer); } @@ -95,13 +94,14 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, GISelChangeObserver &Observer, MachineIRBuilder &B) : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { - MIRBuilder.setMF(MF); MIRBuilder.setChangeObserver(Observer); } LegalizerHelper::LegalizeResult LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { LLVM_DEBUG(dbgs() << "Legalizing: " << MI); + MIRBuilder.setInstrAndDebugLoc(MI); + if (MI.getOpcode() == TargetOpcode::G_INTRINSIC || MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) return LI.legalizeIntrinsic(MI, MIRBuilder, Observer) ? Legalized @@ -636,8 +636,6 @@ LegalizerHelper::libcall(MachineInstr &MI) { unsigned Size = LLTy.getSizeInBits(); auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); - MIRBuilder.setInstrAndDebugLoc(MI); - switch (MI.getOpcode()) { default: return UnableToLegalize; @@ -733,8 +731,6 @@ LegalizerHelper::libcall(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - MIRBuilder.setInstrAndDebugLoc(MI); - uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); uint64_t NarrowSize = NarrowTy.getSizeInBits(); @@ -1646,8 +1642,6 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { - MIRBuilder.setInstrAndDebugLoc(MI); - switch (MI.getOpcode()) { default: return UnableToLegalize; @@ -2197,8 +2191,6 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { - MIRBuilder.setInstr(MI); - switch (MI.getOpcode()) { case TargetOpcode::G_LOAD: { if (TypeIdx != 0) @@ -2253,7 +2245,6 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { LegalizerHelper::LegalizeResult LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { using namespace TargetOpcode; - MIRBuilder.setInstrAndDebugLoc(MI); switch(MI.getOpcode()) { default: @@ -3327,7 +3318,6 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { using namespace TargetOpcode; - MIRBuilder.setInstrAndDebugLoc(MI); switch (MI.getOpcode()) { case G_IMPLICIT_DEF: return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); @@ -3650,7 +3640,6 @@ LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy) { - MIRBuilder.setInstrAndDebugLoc(MI); unsigned Opc = MI.getOpcode(); switch (Opc) { case TargetOpcode::G_IMPLICIT_DEF: diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 4236fdd8208425..ea98233beb0eca 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -33,48 +33,10 @@ void MachineIRBuilder::setMF(MachineFunction &MF) { State.Observer = nullptr; } -void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) { - State.MBB = &MBB; - State.II = MBB.end(); - assert(&getMF() == MBB.getParent() && - "Basic block is in a different function"); -} - -void MachineIRBuilder::setInstr(MachineInstr &MI) { - assert(MI.getParent() && "Instruction is not part of a basic block"); - setMBB(*MI.getParent()); - State.II = MI.getIterator(); -} - -void MachineIRBuilder::setCSEInfo(GISelCSEInfo *Info) { State.CSEInfo = Info; } - -void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator II) { - assert(MBB.getParent() == &getMF() && - "Basic block is in a different function"); - State.MBB = &MBB; - State.II = II; -} - -void MachineIRBuilder::recordInsertion(MachineInstr *InsertedInstr) const { - if (State.Observer) - State.Observer->createdInstr(*InsertedInstr); -} - -void MachineIRBuilder::setChangeObserver(GISelChangeObserver &Observer) { - State.Observer = &Observer; -} - -void MachineIRBuilder::stopObservingChanges() { State.Observer = nullptr; } - //------------------------------------------------------------------------------ // Build instruction variants. //------------------------------------------------------------------------------ -MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) { - return insertInstr(buildInstrNoInsert(Opcode)); -} - MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) { MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode)); return MIB; diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index a6889919137495..0f21c97a30f685 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2424,16 +2424,14 @@ SUnit *SchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); - if (CurrMOps > 0) { - // Defer any ready instrs that now have a hazard. - for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { - if (checkHazard(*I)) { - Pending.push(*I); - I = Available.remove(I); - continue; - } - ++I; + // Defer any ready instrs that now have a hazard. + for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { + if (checkHazard(*I)) { + Pending.push(*I); + I = Available.remove(I); + continue; } + ++I; } for (unsigned i = 0; Available.empty(); ++i) { // FIXME: Re-enable assert once PR20057 is resolved. diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index df23ccf4e195c4..c477626172450a 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -915,9 +915,6 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { switch (MI->getOpcode()) { case TargetOpcode::G_CONSTANT: case TargetOpcode::G_FCONSTANT: { - if (MI->getNumOperands() < MCID.getNumOperands()) - break; - LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); if (DstTy.isVector()) report("Instruction cannot use a vector result type", MI); diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 6a3448dc3f8566..17bef023078976 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -158,6 +158,7 @@ add_llvm_component_library(LLVMSupport VersionTuple.cpp VirtualFileSystem.cpp WithColor.cpp + X86TargetParser.cpp YAMLParser.cpp YAMLTraits.cpp raw_os_ostream.cpp diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp new file mode 100644 index 00000000000000..ba85ed7ee6260c --- /dev/null +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -0,0 +1,58 @@ +//===-- X86TargetParser - Parser for X86 features ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise X86 hardware features. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/X86TargetParser.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" + +using namespace llvm; + +bool checkCPUKind(llvm::X86::CPUKind Kind, bool Only64Bit) { + using namespace X86; + // Perform any per-CPU checks necessary to determine if this CPU is + // acceptable. + switch (Kind) { + case CK_None: + // No processor selected! + return false; +#define PROC(ENUM, STRING, IS64BIT) \ + case CK_##ENUM: \ + return IS64BIT || !Only64Bit; +#include "llvm/Support/X86TargetParser.def" + } + llvm_unreachable("Unhandled CPU kind"); +} + +X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) { + X86::CPUKind Kind = llvm::StringSwitch(CPU) +#define PROC(ENUM, STRING, IS64BIT) .Case(STRING, CK_##ENUM) +#define PROC_ALIAS(ENUM, ALIAS) .Case(ALIAS, CK_##ENUM) +#include "llvm/Support/X86TargetParser.def" + .Default(CK_None); + + if (!checkCPUKind(Kind, Only64Bit)) + Kind = CK_None; + + return Kind; +} + +void llvm::X86::fillValidCPUArchList(SmallVectorImpl &Values, + bool Only64Bit) { +#define PROC(ENUM, STRING, IS64BIT) \ + if (IS64BIT || !Only64Bit) \ + Values.emplace_back(STRING); + // For aliases we need to lookup the CPUKind to get the 64-bit ness. +#define PROC_ALIAS(ENUM, ALIAS) \ + if (checkCPUKind(CK_##ENUM, Only64Bit)) \ + Values.emplace_back(ALIAS); +#include "llvm/Support/X86TargetParser.def" +} diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 26d5dde4679e33..f45a3b560cf449 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -56,9 +56,16 @@ def dup: GICombineRule < (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) >; +def trn : GICombineRule< + (defs root:$root, shuffle_matchdata:$matchinfo), + (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, + [{ return matchTRN(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) +>; + // Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo // instruction. -def shuffle_vector_pseudos : GICombineGroup<[dup, rev, zip, uzp]>; +def shuffle_vector_pseudos : GICombineGroup<[dup, rev, zip, uzp, trn]>; def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 8c95515fa390ee..0bd8a206705d30 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -80,6 +80,19 @@ def G_DUP: AArch64GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$lane); } +// Represents a trn1 instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_TRN1 : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$v1, type0:$v2); +} + +// Represents a trn2 instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_TRN2 : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$v1, type0:$v2); +} def : GINodeEquiv; def : GINodeEquiv; @@ -89,3 +102,5 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index d642640c37520e..5aa73760f77049 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -726,6 +726,12 @@ def : InstAlias<"sevl", (HINT 0b101)>; def : InstAlias<"dgh", (HINT 0b110)>; def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; def : InstAlias<"csdb", (HINT 20)>; +// In order to be able to write readable assembly, LLVM should accept assembly +// inputs that use Branch Target Indentification mnemonics, even with BTI disabled. +// However, in order to be compatible with other assemblers (e.g. GAS), LLVM +// should not emit these mnemonics unless BTI is enabled. +def : InstAlias<"bti", (HINT 32), 0>; +def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>; def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>; def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 26dcde47ccfe39..c02f8dd0bb62e3 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -661,7 +661,6 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(MachineInstr &MI, if (GV->isThreadLocal()) return true; // Don't want to modify TLS vars. - MIRBuilder.setInstrAndDebugLoc(MI); auto &TM = ST->getTargetLowering()->getTargetMachine(); unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM); @@ -717,7 +716,6 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr( if (Amount > 31) return true; // This will have to remain a register variant. assert(MRI.getType(AmtReg).getSizeInBits() == 32); - MIRBuilder.setInstrAndDebugLoc(MI); auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg); MI.getOperand(2).setReg(ExtCst.getReg(0)); return true; @@ -746,7 +744,6 @@ bool AArch64LegalizerInfo::legalizeLoadStore( return false; } - MIRBuilder.setInstrAndDebugLoc(MI); unsigned PtrSize = ValTy.getElementType().getSizeInBits(); const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize); auto &MMO = **MI.memoperands_begin(); @@ -764,7 +761,6 @@ bool AArch64LegalizerInfo::legalizeLoadStore( bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const { - MIRBuilder.setInstrAndDebugLoc(MI); MachineFunction &MF = MIRBuilder.getMF(); Align Alignment(MI.getOperand(2).getImm()); Register Dst = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index bee187202a5756..1ce69a8900eba4 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -95,6 +95,22 @@ static bool isREVMask(ArrayRef M, unsigned EltSize, unsigned NumElts, return true; } +/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts. +/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult. +static bool isTRNMask(ArrayRef M, unsigned NumElts, + unsigned &WhichResult) { + if (NumElts % 2 != 0) + return false; + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i < NumElts; i += 2) { + if ((M[i] >= 0 && static_cast(M[i]) != i + WhichResult) || + (M[i + 1] >= 0 && + static_cast(M[i + 1]) != i + NumElts + WhichResult)) + return false; + } + return true; +} + /// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts. /// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult. static bool isUZPMask(ArrayRef M, unsigned NumElts, @@ -158,6 +174,24 @@ static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI, return false; } +/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with +/// a G_TRN1 or G_TRN2 instruction. +static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + unsigned WhichResult; + ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask(); + Register Dst = MI.getOperand(0).getReg(); + unsigned NumElts = MRI.getType(Dst).getNumElements(); + if (!isTRNMask(ShuffleMask, NumElts, WhichResult)) + return false; + unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2; + Register V1 = MI.getOperand(1).getReg(); + Register V2 = MI.getOperand(2).getReg(); + MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); + return true; +} + /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with /// a G_UZP1 or G_UZP2 instruction. /// diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index b0982afedb488f..a1581c7f0415b8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1571,8 +1571,6 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( MachineIRBuilder &B) const { MachineFunction &MF = B.getMF(); - B.setInstr(MI); - const LLT S32 = LLT::scalar(32); Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -1668,8 +1666,6 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( bool AMDGPULegalizerInfo::legalizeFrint( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); - Register Src = MI.getOperand(1).getReg(); LLT Ty = MRI.getType(Src); assert(Ty.isScalar() && Ty.getSizeInBits() == 64); @@ -1695,7 +1691,6 @@ bool AMDGPULegalizerInfo::legalizeFrint( bool AMDGPULegalizerInfo::legalizeFceil( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); const LLT S1 = LLT::scalar(1); const LLT S64 = LLT::scalar(64); @@ -1740,8 +1735,6 @@ static MachineInstrBuilder extractF64Exponent(unsigned Hi, bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); - const LLT S1 = LLT::scalar(1); const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); @@ -1786,7 +1779,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc( bool AMDGPULegalizerInfo::legalizeITOFP( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, bool Signed) const { - B.setInstr(MI); Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -1820,7 +1812,6 @@ bool AMDGPULegalizerInfo::legalizeITOFP( bool AMDGPULegalizerInfo::legalizeFPTOI( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, bool Signed) const { - B.setInstr(MI); Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -1871,7 +1862,6 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum( MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; LegalizerHelper Helper(MF, DummyObserver, HelperBuilder); - HelperBuilder.setInstr(MI); return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized; } @@ -1897,8 +1887,6 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt( LLT EltTy = VecTy.getElementType(); assert(EltTy == MRI.getType(Dst)); - B.setInstr(MI); - if (IdxVal->Value < VecTy.getNumElements()) B.buildExtract(Dst, Vec, IdxVal->Value * EltTy.getSizeInBits()); else @@ -1931,8 +1919,6 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt( LLT EltTy = VecTy.getElementType(); assert(EltTy == MRI.getType(Ins)); - B.setInstr(MI); - if (IdxVal->Value < VecTy.getNumElements()) B.buildInsert(Dst, Vec, Ins, IdxVal->Value * EltTy.getSizeInBits()); else @@ -1959,14 +1945,12 @@ bool AMDGPULegalizerInfo::legalizeShuffleVector( MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; LegalizerHelper Helper(B.getMF(), DummyObserver, HelperBuilder); - HelperBuilder.setInstr(MI); return Helper.lowerShuffleVector(MI) == LegalizerHelper::Legalized; } bool AMDGPULegalizerInfo::legalizeSinCos( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); @@ -2058,7 +2042,6 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue( const GlobalValue *GV = MI.getOperand(1).getGlobal(); MachineFunction &MF = B.getMF(); SIMachineFunctionInfo *MFI = MF.getInfo(); - B.setInstr(MI); if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) { if (!MFI->isEntryFunction()) { @@ -2138,7 +2121,6 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue( bool AMDGPULegalizerInfo::legalizeLoad( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, GISelChangeObserver &Observer) const { - B.setInstr(MI); LLT ConstPtr = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); auto Cast = B.buildAddrSpaceCast(ConstPtr, MI.getOperand(1).getReg()); Observer.changingInstr(MI); @@ -2166,7 +2148,6 @@ bool AMDGPULegalizerInfo::legalizeFMad( MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; LegalizerHelper Helper(MF, DummyObserver, HelperBuilder); - HelperBuilder.setInstr(MI); return Helper.lowerFMad(MI) == LegalizerHelper::Legalized; } @@ -2184,7 +2165,6 @@ bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg( LLT ValTy = MRI.getType(CmpVal); LLT VecTy = LLT::vector(2, ValTy); - B.setInstr(MI); Register PackedVal = B.buildBuildVector(VecTy, { NewVal, CmpVal }).getReg(0); B.buildInstr(AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG) @@ -2203,7 +2183,6 @@ bool AMDGPULegalizerInfo::legalizeFlog( Register Src = MI.getOperand(1).getReg(); LLT Ty = B.getMRI()->getType(Dst); unsigned Flags = MI.getFlags(); - B.setInstr(MI); auto Log2Operand = B.buildFLog2(Ty, Src, Flags); auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted); @@ -2219,7 +2198,6 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, Register Src = MI.getOperand(1).getReg(); unsigned Flags = MI.getFlags(); LLT Ty = B.getMRI()->getType(Dst); - B.setInstr(MI); auto K = B.buildFConstant(Ty, numbers::log2e); auto Mul = B.buildFMul(Ty, Src, K, Flags); @@ -2235,7 +2213,6 @@ bool AMDGPULegalizerInfo::legalizeFPow(MachineInstr &MI, Register Src1 = MI.getOperand(2).getReg(); unsigned Flags = MI.getFlags(); LLT Ty = B.getMRI()->getType(Dst); - B.setInstr(MI); const LLT S16 = LLT::scalar(16); const LLT S32 = LLT::scalar(32); @@ -2279,7 +2256,6 @@ static Register stripAnySourceMods(Register OrigSrc, MachineRegisterInfo &MRI) { bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); const LLT S1 = LLT::scalar(1); const LLT S64 = LLT::scalar(64); @@ -2345,7 +2321,6 @@ bool AMDGPULegalizerInfo::legalizeBuildVector( Register Src1 = MI.getOperand(2).getReg(); assert(MRI.getType(Src0) == LLT::scalar(16)); - B.setInstr(MI); auto Merge = B.buildMerge(S32, {Src0, Src1}); B.buildBitcast(Dst, Merge); @@ -2483,7 +2458,6 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B, bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const { - B.setInstr(MI); const ArgDescriptor *Arg = getArgDescriptor(B, ArgType); if (!Arg) @@ -2499,7 +2473,6 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin( bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); Register Dst = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(Dst); LLT S16 = LLT::scalar(16); @@ -2622,7 +2595,6 @@ void AMDGPULegalizerInfo::legalizeUDIV_UREM32Impl(MachineIRBuilder &B, bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); const bool IsRem = MI.getOpcode() == AMDGPU::G_UREM; Register DstReg = MI.getOperand(0).getReg(); Register Num = MI.getOperand(1).getReg(); @@ -2678,8 +2650,6 @@ static std::pair emitReciprocalU64(MachineIRBuilder &B, bool AMDGPULegalizerInfo::legalizeUDIV_UREM64(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); - const bool IsDiv = MI.getOpcode() == TargetOpcode::G_UDIV; const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); @@ -2808,7 +2778,6 @@ bool AMDGPULegalizerInfo::legalizeUDIV_UREM(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeSDIV_SREM32(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); const LLT S32 = LLT::scalar(32); const bool IsRem = MI.getOpcode() == AMDGPU::G_SREM; @@ -2915,7 +2884,6 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); Register Res = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); @@ -2978,7 +2946,6 @@ static void toggleSPDenormMode(bool Enable, bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); Register Res = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); @@ -3045,7 +3012,6 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); Register Res = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); @@ -3124,7 +3090,6 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); Register Res = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(2).getReg(); Register RHS = MI.getOperand(3).getReg(); @@ -3166,8 +3131,6 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI, AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR); } - B.setInstr(MI); - uint64_t Offset = ST.getTargetLowering()->getImplicitParameterOffset( B.getMF(), AMDGPUTargetLowering::FIRST_IMPLICIT); @@ -3195,7 +3158,6 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, unsigned AddrSpace) const { - B.setInstr(MI); Register ApertureReg = getSegmentAperture(AddrSpace, MRI, B); auto Hi32 = B.buildExtract(LLT::scalar(32), MI.getOperand(2).getReg(), 32); B.buildICmp(ICmpInst::ICMP_EQ, MI.getOperand(0), Hi32, ApertureReg); @@ -3303,8 +3265,6 @@ bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI, MachineIRBuilder &B, bool IsTyped, bool IsFormat) const { - B.setInstr(MI); - Register VData = MI.getOperand(1).getReg(); LLT Ty = MRI.getType(VData); LLT EltTy = Ty.getScalarType(); @@ -3395,8 +3355,6 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI, MachineIRBuilder &B, bool IsFormat, bool IsTyped) const { - B.setInstr(MI); - // FIXME: Verifier should enforce 1 MMO for these intrinsics. MachineMemOperand *MMO = *MI.memoperands_begin(); const int MemSize = MMO->getSize(); @@ -3515,7 +3473,6 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeAtomicIncDec(MachineInstr &MI, MachineIRBuilder &B, bool IsInc) const { - B.setInstr(MI); unsigned Opc = IsInc ? AMDGPU::G_AMDGPU_ATOMIC_INC : AMDGPU::G_AMDGPU_ATOMIC_DEC; B.buildInstr(Opc) @@ -3576,8 +3533,6 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) { bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B, Intrinsic::ID IID) const { - B.setInstr(MI); - const bool IsCmpSwap = IID == Intrinsic::amdgcn_raw_buffer_atomic_cmpswap || IID == Intrinsic::amdgcn_struct_buffer_atomic_cmpswap; @@ -3733,7 +3688,6 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const { - B.setInstr(MI); const int NumDefs = MI.getNumExplicitDefs(); bool IsTFE = NumDefs == 2; @@ -3913,8 +3867,6 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( if (!Ty.isVector() || Ty.getElementType() != S16) return true; - B.setInstr(MI); - Register RepackedReg = handleD16VData(B, *MRI, VData); if (RepackedReg != VData) { MI.getOperand(1).setReg(RepackedReg); @@ -4118,7 +4070,6 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad( // out this needs to be converted to a vector load during RegBankSelect. if (!isPowerOf2_32(Size)) { LegalizerHelper Helper(MF, *this, Observer, B); - B.setInstr(MI); if (Ty.isVector()) Helper.moreElementsVectorDst(MI, getPow2VectorType(Ty), 0); @@ -4133,8 +4084,6 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad( bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); - // Is non-HSA path or trap-handler disabled? then, insert s_endpgm instruction if (ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa || !ST.isTrapHandlerEnabled()) { @@ -4165,8 +4114,6 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeDebugTrapIntrinsic( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); - // Is non-HSA path or trap-handler disabled? then, report a warning // accordingly if (ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa || @@ -4201,7 +4148,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, const SIRegisterInfo *TRI = static_cast(MRI.getTargetRegisterInfo()); - B.setInstr(*BrCond); Register Def = MI.getOperand(1).getReg(); Register Use = MI.getOperand(3).getReg(); @@ -4244,8 +4190,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, const SIRegisterInfo *TRI = static_cast(MRI.getTargetRegisterInfo()); - B.setInstr(*BrCond); - MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB(); Register Reg = MI.getOperand(2).getReg(); B.buildInstr(AMDGPU::SI_LOOP) @@ -4267,7 +4211,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, } case Intrinsic::amdgcn_kernarg_segment_ptr: if (!AMDGPU::isKernel(B.getMF().getFunction().getCallingConv())) { - B.setInstr(MI); // This only makes sense to call in a kernel, so just lower to null. B.buildConstant(MI.getOperand(0).getReg(), 0); MI.eraseFromParent(); @@ -4315,7 +4258,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, case Intrinsic::amdgcn_is_private: return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::PRIVATE_ADDRESS); case Intrinsic::amdgcn_wavefrontsize: { - B.setInstr(MI); B.buildConstant(MI.getOperand(0), ST.getWavefrontSize()); MI.eraseFromParent(); return true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index e476fea9e14efa..7315fa08d2616f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2209,7 +2209,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl( break; const LLT S32 = LLT::scalar(32); - MachineFunction *MF = MI.getParent()->getParent(); + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction *MF = MBB->getParent(); MachineIRBuilder B(MI); ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank); GISelObserverWrapper Observer(&ApplySALU); @@ -2234,9 +2235,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl( if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized) llvm_unreachable("widen scalar should have succeeded"); - // FIXME: s16 shift amounts should be lgeal. + // FIXME: s16 shift amounts should be legal. if (Opc == AMDGPU::G_SHL || Opc == AMDGPU::G_LSHR || Opc == AMDGPU::G_ASHR) { + B.setInsertPt(*MBB, MI.getIterator()); if (Helper.widenScalar(MI, 1, S32) != LegalizerHelper::Legalized) llvm_unreachable("widen scalar should have succeeded"); } diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp index 6388e8d2d65771..0afc152744136e 100644 --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -333,7 +333,6 @@ bool MipsLegalizerInfo::legalizeCustom(MachineInstr &MI, using namespace TargetOpcode; - MIRBuilder.setInstr(MI); const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); @@ -507,7 +506,6 @@ bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI, const MipsInstrInfo &TII = *ST.getInstrInfo(); const MipsRegisterInfo &TRI = *ST.getRegisterInfo(); const RegisterBankInfo &RBI = *ST.getRegBankInfo(); - MIRBuilder.setInstr(MI); switch (MI.getIntrinsicID()) { case Intrinsic::memcpy: diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 2171be293914e5..fadcb173cd4b9a 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -364,9 +364,10 @@ namespace { if (User->getNumOperands() != 2) continue; - // If this can match to INC/DEC, don't count it as a use. - if (User->getOpcode() == ISD::ADD && - (isOneConstant(SDValue(N, 0)) || isAllOnesConstant(SDValue(N, 0)))) + // If this is a sign-extended 8-bit integer immediate used in an ALU + // instruction, there is probably an opcode encoding to save space. + auto *C = dyn_cast(N); + if (C && isInt<8>(C->getSExtValue())) continue; // Immediates that are used for offsets as part of stack diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index c03e70fdcef55d..b68182e6098dba 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -313,23 +313,48 @@ static bool foldBitcastShuf(Instruction &I, const TargetTransformInfo &TTI) { /// Match a vector binop instruction with inserted scalar operands and convert /// to scalar binop followed by insertelement. static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) { - Instruction *Ins0, *Ins1; - if (!match(&I, m_BinOp(m_Instruction(Ins0), m_Instruction(Ins1)))) + Value *Ins0, *Ins1; + if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1)))) return false; + // Match against one or both scalar values being inserted into constant + // vectors: + // vec_bo VecC0, (inselt VecC1, V1, Index) + // vec_bo (inselt VecC0, V0, Index), VecC1 + // vec_bo (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) // TODO: Deal with mismatched index constants and variable indexes? - Constant *VecC0, *VecC1; - Value *V0, *V1; - uint64_t Index; + Constant *VecC0 = nullptr, *VecC1 = nullptr; + Value *V0 = nullptr, *V1 = nullptr; + uint64_t Index0 = 0, Index1 = 0; if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0), - m_ConstantInt(Index))) || - !match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1), - m_SpecificInt(Index)))) + m_ConstantInt(Index0))) && + !match(Ins0, m_Constant(VecC0))) + return false; + if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1), + m_ConstantInt(Index1))) && + !match(Ins1, m_Constant(VecC1))) + return false; + + bool IsConst0 = !V0; + bool IsConst1 = !V1; + if (IsConst0 && IsConst1) + return false; + if (!IsConst0 && !IsConst1 && Index0 != Index1) return false; - Type *ScalarTy = V0->getType(); + // Bail for single insertion if it is a load. + // TODO: Handle this once getVectorInstrCost can cost for load/stores. + auto *I0 = dyn_cast_or_null(V0); + auto *I1 = dyn_cast_or_null(V1); + if ((IsConst0 && I1 && I1->mayReadFromMemory()) || + (IsConst1 && I0 && I0->mayReadFromMemory())) + return false; + + uint64_t Index = IsConst0 ? Index1 : Index0; + Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType(); Type *VecTy = I.getType(); - assert(VecTy->isVectorTy() && ScalarTy == V1->getType() && + assert(VecTy->isVectorTy() && + (IsConst0 || IsConst1 || V0->getType() == V1->getType()) && (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy()) && "Unexpected types for insert into binop"); @@ -341,10 +366,11 @@ static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) { // both sequences. int InsertCost = TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index); - int OldCost = InsertCost + InsertCost + VectorOpCost; + int OldCost = (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + + VectorOpCost; int NewCost = ScalarOpCost + InsertCost + - !Ins0->hasOneUse() * InsertCost + - !Ins1->hasOneUse() * InsertCost; + (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) + + (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost); // We want to scalarize unless the vector variant actually has lower cost. if (OldCost < NewCost) @@ -354,6 +380,13 @@ static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) { // inselt NewVecC, (scalar_bo V0, V1), Index ++NumScalarBO; IRBuilder<> Builder(&I); + + // For constant cases, extract the scalar element, this should constant fold. + if (IsConst0) + V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index)); + if (IsConst1) + V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index)); + Value *Scalar = Builder.CreateBinOp(Opcode, V0, V1, I.getName() + ".scalar"); // All IR flags are safe to back-propagate. There is no potential for extra diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-trn.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-trn.mir new file mode 100644 index 00000000000000..037177a78c5df5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-trn.mir @@ -0,0 +1,234 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +# +# Check that we produce G_TRN1 or G_TRN2 when we have an appropriate shuffle +# mask. +# + +... +--- +name: trn1_v8s8 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; CHECK-LABEL: name: trn1_v8s8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[TRN1_:%[0-9]+]]:_(<8 x s8>) = G_TRN1 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[TRN1_]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(0, 8, 2, 10, 4, 12, 6, 14) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn2_v8s8 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; CHECK-LABEL: name: trn2_v8s8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[TRN2_:%[0-9]+]]:_(<8 x s8>) = G_TRN2 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[TRN2_]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(1, 9, 3, 11, 5, 13, 7, 15) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn1_v16s8 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: trn1_v16s8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 + ; CHECK: [[TRN1_:%[0-9]+]]:_(<16 x s8>) = G_TRN1 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[TRN1_]](<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<16 x s8>) = COPY $q0 + %1:_(<16 x s8>) = COPY $q1 + %2:_(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %1, shufflemask(0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30) + $q0 = COPY %2(<16 x s8>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn2_v16s8 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: trn2_v16s8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 + ; CHECK: [[TRN2_:%[0-9]+]]:_(<16 x s8>) = G_TRN2 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[TRN2_]](<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<16 x s8>) = COPY $q0 + %1:_(<16 x s8>) = COPY $q1 + %2:_(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %1, shufflemask(1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31) + $q0 = COPY %2(<16 x s8>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn1_v4s32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: trn1_v4s32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[TRN1_:%[0-9]+]]:_(<4 x s32>) = G_TRN1 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[TRN1_]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 4, 2, 6) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn2_v4s32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: trn2_v4s32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[TRN2_:%[0-9]+]]:_(<4 x s32>) = G_TRN2 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[TRN2_]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 5, 3, 7) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: redundant_with_zip1 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; 2 x s32 TRN is redundant with ZIP. Make sure we prioritize ZIP. + ; + ; CHECK-LABEL: name: redundant_with_zip1 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK: [[ZIP1_:%[0-9]+]]:_(<2 x s32>) = G_ZIP1 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[ZIP1_]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(0, 2) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: redundant_with_zip2 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; 2 x s32 TRN is redundant with ZIP. Make sure we prioritize ZIP. + ; + ; CHECK-LABEL: name: redundant_with_zip2 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK: [[ZIP2_:%[0-9]+]]:_(<2 x s32>) = G_ZIP2 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[ZIP2_]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(1, 3) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: trn1_undef +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; Undef shuffle indices should not prevent matching to G_TRN1. + ; + ; CHECK-LABEL: name: trn1_undef + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[TRN1_:%[0-9]+]]:_(<8 x s8>) = G_TRN1 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[TRN1_]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(0, 8, -1, -1, 4, 12, 6, 14) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $d0 + +... +--- +name: trn2_undef +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; Undef shuffle indices should not prevent matching to G_TRN2. + ; + ; CHECK-LABEL: name: trn2_undef + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[TRN2_:%[0-9]+]]:_(<8 x s8>) = G_TRN2 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[TRN2_]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(1, -1, 3, 11, 5, 13, -1, -1) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-trn.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-trn.mir new file mode 100644 index 00000000000000..738aacf2c372cd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-trn.mir @@ -0,0 +1,300 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Test that we can select G_TRN1 and G_TRN2. +# +# Each testcase is named based off of the instruction which should be selected. + +... +--- +name: TRN1v2i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN1v2i32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN1v2i32_:%[0-9]+]]:fpr64 = TRN1v2i32 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s32>) = COPY $d1 + %2:fpr(<2 x s32>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v2i64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN1v2i64 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN1v2i64_:%[0-9]+]]:fpr128 = TRN1v2i64 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<2 x s64>) = COPY $q0 + %1:fpr(<2 x s64>) = COPY $q1 + %2:fpr(<2 x s64>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v4i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN1v4i16 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN1v4i16_:%[0-9]+]]:fpr64 = TRN1v4i16 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s16>) = COPY $d1 + %2:fpr(<4 x s16>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v4i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN1v4i32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN1v4i32_:%[0-9]+]]:fpr128 = TRN1v4i32 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<4 x s32>) = COPY $q1 + %2:fpr(<4 x s32>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN1v8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN1v8i8_:%[0-9]+]]:fpr64 = TRN1v8i8 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<8 x s8>) = COPY $d0 + %1:fpr(<8 x s8>) = COPY $d1 + %2:fpr(<8 x s8>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v8i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN1v8i16 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN1v8i16_:%[0-9]+]]:fpr128 = TRN1v8i16 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<8 x s16>) = COPY $q0 + %1:fpr(<8 x s16>) = COPY $q1 + %2:fpr(<8 x s16>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN1v16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN1v16i8_:%[0-9]+]]:fpr128 = TRN1v16i8 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<16 x s8>) = COPY $q0 + %1:fpr(<16 x s8>) = COPY $q1 + %2:fpr(<16 x s8>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v2i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN2v2i32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN2v2i32_:%[0-9]+]]:fpr64 = TRN2v2i32 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s32>) = COPY $d1 + %2:fpr(<2 x s32>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v2i64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN2v2i64 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN2v2i64_:%[0-9]+]]:fpr128 = TRN2v2i64 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<2 x s64>) = COPY $q0 + %1:fpr(<2 x s64>) = COPY $q1 + %2:fpr(<2 x s64>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v4i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN2v4i16 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN2v4i16_:%[0-9]+]]:fpr64 = TRN2v4i16 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s16>) = COPY $d1 + %2:fpr(<4 x s16>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v4i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN2v4i32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN2v4i32_:%[0-9]+]]:fpr128 = TRN2v4i32 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<4 x s32>) = COPY $q1 + %2:fpr(<4 x s32>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN2v8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN2v8i8_:%[0-9]+]]:fpr64 = TRN2v8i8 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<8 x s8>) = COPY $d0 + %1:fpr(<8 x s8>) = COPY $d1 + %2:fpr(<8 x s8>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v8i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN2v8i16 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN2v8i16_:%[0-9]+]]:fpr128 = TRN2v8i16 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<8 x s16>) = COPY $q0 + %1:fpr(<8 x s16>) = COPY $q1 + %2:fpr(<8 x s16>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN2v16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN2v16i8_:%[0-9]+]]:fpr128 = TRN2v16i8 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<16 x s8>) = COPY $q0 + %1:fpr(<16 x s8>) = COPY $q1 + %2:fpr(<16 x s8>) = G_TRN2 %0, %1 + RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll index 70038e934c9f74..95a419bd7398a0 100644 --- a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll @@ -79,7 +79,7 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, ; CHECK-LABEL: aesea: ; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} -; CHECK-NEXT: aesmc [[VA]], [[VA]] +; CHECK: aesmc [[VA]], [[VA]] ; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} ; CHECK-NEXT: aesmc [[VB]], [[VB]] ; CHECK: aese [[VC:v[0-7].16b]], {{v[0-7].16b}} @@ -163,7 +163,7 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, ; CHECK-LABEL: aesda: ; CHECK: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}} -; CHECK-NEXT: aesimc [[VA]], [[VA]] +; CHECK: aesimc [[VA]], [[VA]] ; CHECK: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}} ; CHECK-NEXT: aesimc [[VB]], [[VB]] ; CHECK: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}} diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll b/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll index 72f3170fb09c89..0f16235d7c69e0 100644 --- a/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll +++ b/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll @@ -1,5 +1,5 @@ -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH declare i64 @g(i64, i64) local_unnamed_addr define i64 @f_using_reserved_reg_x16(i64 %a, i64 %b) local_unnamed_addr SLHATTR { diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll b/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll index c5aae051430074..58690052183545 100644 --- a/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll +++ b/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --dump-input-on-failure +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s define i128 @ldp_single_csdb(i128* %p) speculative_load_hardening { entry: diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.ll b/llvm/test/CodeGen/AArch64/speculation-hardening.ll index 23b87563013fd9..d298efa94dc596 100644 --- a/llvm/test/CodeGen/AArch64/speculation-hardening.ll +++ b/llvm/test/CodeGen/AArch64/speculation-hardening.ll @@ -1,9 +1,9 @@ -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR { ; CHECK-LABEL: f diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.mir b/llvm/test/CodeGen/AArch64/speculation-hardening.mir index 5991c4df0407f7..0073bedf8ffad2 100644 --- a/llvm/test/CodeGen/AArch64/speculation-hardening.mir +++ b/llvm/test/CodeGen/AArch64/speculation-hardening.mir @@ -1,6 +1,6 @@ # RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu \ # RUN: -start-before aarch64-speculation-hardening -o - %s \ -# RUN: | FileCheck %s --dump-input-on-failure +# RUN: | FileCheck %s # Check that the speculation hardening pass generates code as expected for # basic blocks ending with a variety of branch patterns: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir index 4cff1a1d1a2f61..98183b01ce364d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s --- name: test_bitcast_s32_to_v2s16 @@ -283,6 +283,36 @@ body: | $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 ... +--- +name: test_bitcast_v32s32_to_v16s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + + ; CHECK-LABEL: name: test_bitcast_v32s32_to_v16s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s64>) = G_BITCAST [[COPY]](<32 x s32>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<16 x s64>) + %0:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:_(<16 x s64>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_bitcast_v16s64_to_v32s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + + ; CHECK-LABEL: name: test_bitcast_v16s64_to_v32s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<32 x s32>) = G_BITCAST [[COPY]](<16 x s64>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<32 x s32>) + %0:_(<16 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:_(<32 x s32>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + --- name: test_bitcast_s24_to_v3s8 body: | @@ -481,3 +511,1009 @@ body: | %3:_(s32) = G_ANYEXT %2 $vgpr0 = COPY %3 ... + +--- + +name: test_bitcast_v2s16_to_v4s8 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_bitcast_v2s16_to_v4s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<4 x s8>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<4 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<4 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT8]](<4 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<4 x s8>) = G_BITCAST %0 + %2:_(<4 x s8>) = G_ADD %1, %1 + %3:_(<4 x s32>) = G_ANYEXT %2 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: test_bitcast_v4s8_to_v2s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_bitcast_v4s8_to_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[COPY]](<4 x s32>) + ; CHECK: [[ADD:%[0-9]+]]:_(<4 x s8>) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ADD]](<4 x s8>) + ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s8>) = G_TRUNC %0 + %2:_(<4 x s8>) = G_ADD %1, %1 + %3:_(<2 x s16>) = G_BITCAST %2 + $vgpr0 = COPY %3 +... + +--- +name: test_bitcast_v2s16_to_v8s4 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_bitcast_v2s16_to_v8s4 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s4>) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(s4), [[UV1:%[0-9]+]]:_(s4), [[UV2:%[0-9]+]]:_(s4), [[UV3:%[0-9]+]]:_(s4), [[UV4:%[0-9]+]]:_(s4), [[UV5:%[0-9]+]]:_(s4), [[UV6:%[0-9]+]]:_(s4), [[UV7:%[0-9]+]]:_(s4) = G_UNMERGE_VALUES [[BITCAST]](<8 x s4>) + ; CHECK: [[UV8:%[0-9]+]]:_(s4), [[UV9:%[0-9]+]]:_(s4), [[UV10:%[0-9]+]]:_(s4), [[UV11:%[0-9]+]]:_(s4), [[UV12:%[0-9]+]]:_(s4), [[UV13:%[0-9]+]]:_(s4), [[UV14:%[0-9]+]]:_(s4), [[UV15:%[0-9]+]]:_(s4) = G_UNMERGE_VALUES [[BITCAST]](<8 x s4>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s4) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s4) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s4) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s4) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s4) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s4) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s4) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s4) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s4) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s4) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s4) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s4) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s4) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s4) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s4) = G_TRUNC [[ADD4]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s4) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s4) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s4) = G_TRUNC [[ADD5]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s4) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s4) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s4) = G_TRUNC [[ADD6]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s4) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s4) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s4) = G_TRUNC [[ADD7]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s4>) = G_BUILD_VECTOR [[TRUNC]](s4), [[TRUNC1]](s4), [[TRUNC2]](s4), [[TRUNC3]](s4), [[TRUNC4]](s4), [[TRUNC5]](s4), [[TRUNC6]](s4), [[TRUNC7]](s4) + ; CHECK: [[ANYEXT16:%[0-9]+]]:_(<8 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s4>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[ANYEXT16]](<8 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<8 x s4>) = G_BITCAST %0 + %2:_(<8 x s4>) = G_ADD %1, %1 + %3:_(<8 x s32>) = G_ANYEXT %2 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 +... + +--- +name: test_bitcast_v8s4_to_v2s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_bitcast_v8s4_to_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[COPY]](<8 x s32>) + ; CHECK: [[ADD:%[0-9]+]]:_(<8 x s4>) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ADD]](<8 x s4>) + ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x s4>) = G_TRUNC %0 + %2:_(<8 x s4>) = G_ADD %1, %1 + %3:_(<2 x s16>) = G_BITCAST %2 + $vgpr0 = COPY %3 +... + +--- +name: test_bitcast_v4s16_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_bitcast_v4s16_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s32>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_bitcast_v2s32_to_v4s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_bitcast_v2s32_to_v4s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>) + ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC4]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC5]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC6]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC7]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = G_BITCAST %0 + %2:_(<4 x s16>) = G_ADD %1, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: test_bitcast_v2s32_to_v8s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_bitcast_v2s32_to_v8s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<8 x s8>) + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<8 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) + ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<8 x s8>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<8 x s8>) = G_BITCAST %0 + %2:_(<8 x s8>) = G_ADD %1, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: test_bitcast_v8s8_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: test_bitcast_v8s8_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[COPY]](<8 x s32>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[TRUNC]](<8 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s32>) + %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x s8>) = G_TRUNC %0 + %2:_(<2 x s32>) = G_BITCAST %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_bitcast_v8s8_to_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: test_bitcast_v8s8_to_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) + ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) + %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x s8>) = G_TRUNC %0 + %2:_(s64) = G_BITCAST %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_bitcast_v2s32_to_v16s4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_bitcast_v2s32_to_v16s4 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s4>) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s4), [[UV1:%[0-9]+]]:_(s4), [[UV2:%[0-9]+]]:_(s4), [[UV3:%[0-9]+]]:_(s4), [[UV4:%[0-9]+]]:_(s4), [[UV5:%[0-9]+]]:_(s4), [[UV6:%[0-9]+]]:_(s4), [[UV7:%[0-9]+]]:_(s4), [[UV8:%[0-9]+]]:_(s4), [[UV9:%[0-9]+]]:_(s4), [[UV10:%[0-9]+]]:_(s4), [[UV11:%[0-9]+]]:_(s4), [[UV12:%[0-9]+]]:_(s4), [[UV13:%[0-9]+]]:_(s4), [[UV14:%[0-9]+]]:_(s4), [[UV15:%[0-9]+]]:_(s4) = G_UNMERGE_VALUES [[BITCAST]](<16 x s4>) + ; CHECK: [[UV16:%[0-9]+]]:_(s4), [[UV17:%[0-9]+]]:_(s4), [[UV18:%[0-9]+]]:_(s4), [[UV19:%[0-9]+]]:_(s4), [[UV20:%[0-9]+]]:_(s4), [[UV21:%[0-9]+]]:_(s4), [[UV22:%[0-9]+]]:_(s4), [[UV23:%[0-9]+]]:_(s4), [[UV24:%[0-9]+]]:_(s4), [[UV25:%[0-9]+]]:_(s4), [[UV26:%[0-9]+]]:_(s4), [[UV27:%[0-9]+]]:_(s4), [[UV28:%[0-9]+]]:_(s4), [[UV29:%[0-9]+]]:_(s4), [[UV30:%[0-9]+]]:_(s4), [[UV31:%[0-9]+]]:_(s4) = G_UNMERGE_VALUES [[BITCAST]](<16 x s4>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s4) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s4) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s4) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s4) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s4) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s4) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s4) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s4) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s4) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s4) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s4) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s4) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s4) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s4) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s4) = G_TRUNC [[ADD4]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s4) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s4) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s4) = G_TRUNC [[ADD5]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s4) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s4) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s4) = G_TRUNC [[ADD6]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s4) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s4) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s4) = G_TRUNC [[ADD7]](s16) + ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s4) + ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV24]](s4) + ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s4) = G_TRUNC [[ADD8]](s16) + ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s4) + ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV25]](s4) + ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s4) = G_TRUNC [[ADD9]](s16) + ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s4) + ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV26]](s4) + ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]] + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s4) = G_TRUNC [[ADD10]](s16) + ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s4) + ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV27]](s4) + ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]] + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s4) = G_TRUNC [[ADD11]](s16) + ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s4) + ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s16) = G_ANYEXT [[UV28]](s4) + ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT24]], [[ANYEXT25]] + ; CHECK: [[TRUNC12:%[0-9]+]]:_(s4) = G_TRUNC [[ADD12]](s16) + ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s4) + ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s16) = G_ANYEXT [[UV29]](s4) + ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT26]], [[ANYEXT27]] + ; CHECK: [[TRUNC13:%[0-9]+]]:_(s4) = G_TRUNC [[ADD13]](s16) + ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s4) + ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s16) = G_ANYEXT [[UV30]](s4) + ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT28]], [[ANYEXT29]] + ; CHECK: [[TRUNC14:%[0-9]+]]:_(s4) = G_TRUNC [[ADD14]](s16) + ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s4) + ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s16) = G_ANYEXT [[UV31]](s4) + ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT30]], [[ANYEXT31]] + ; CHECK: [[TRUNC15:%[0-9]+]]:_(s4) = G_TRUNC [[ADD15]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s4>) = G_BUILD_VECTOR [[TRUNC]](s4), [[TRUNC1]](s4), [[TRUNC2]](s4), [[TRUNC3]](s4), [[TRUNC4]](s4), [[TRUNC5]](s4), [[TRUNC6]](s4), [[TRUNC7]](s4), [[TRUNC8]](s4), [[TRUNC9]](s4), [[TRUNC10]](s4), [[TRUNC11]](s4), [[TRUNC12]](s4), [[TRUNC13]](s4), [[TRUNC14]](s4), [[TRUNC15]](s4) + ; CHECK: [[ANYEXT32:%[0-9]+]]:_(<16 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<16 x s4>) + ; CHECK: S_ENDPGM 0, implicit [[ANYEXT32]](<16 x s16>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<16 x s4>) = G_BITCAST %0 + %2:_(<16 x s4>) = G_ADD %1, %1 + %3:_(<16 x s16>) = G_ANYEXT %2 + S_ENDPGM 0, implicit %3 + +... + +--- +name: test_bitcast_v16s4_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: test_bitcast_v16s4_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s4>) = G_TRUNC [[COPY]](<16 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[TRUNC]](<16 x s4>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s32>) + %0:_(<16 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<16 x s4>) = G_TRUNC %0 + %2:_(<2 x s32>) = G_BITCAST %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_bitcast_s64_to_v8s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_bitcast_s64_to_v8s8 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) + ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[LSHR2]](s16) + ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[LSHR2]](s16) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[COPY4]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[COPY6]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[LSHR3]](s16) + ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[LSHR3]](s16) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY7]], [[COPY8]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY9]], [[COPY10]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s16) = COPY [[LSHR4]](s16) + ; CHECK: [[COPY12:%[0-9]+]]:_(s16) = COPY [[LSHR4]](s16) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY11]], [[COPY12]] + ; CHECK: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[COPY13]], [[COPY14]] + ; CHECK: [[COPY15:%[0-9]+]]:_(s16) = COPY [[LSHR5]](s16) + ; CHECK: [[COPY16:%[0-9]+]]:_(s16) = COPY [[LSHR5]](s16) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[COPY15]], [[COPY16]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC4]](<8 x s8>) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(<8 x s8>) = G_BITCAST %0 + %2:_(<8 x s8>) = G_ADD %1, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: test_bitcast_v3s32_to_v12s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_bitcast_v3s32_to_v12s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<12 x s8>) = G_BITCAST [[COPY]](<3 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<12 x s8>) + ; CHECK: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8), [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<12 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s8) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s8) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s8) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s8) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16) + ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s8) + ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[ADD8]](s16) + ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s8) + ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[ADD9]](s16) + ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s8) + ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]] + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[ADD10]](s16) + ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s8) + ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]] + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[ADD11]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) + ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<12 x s8>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<12 x s8>) = G_BITCAST %0 + %2:_(<12 x s8>) = G_ADD %1, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: test_bitcast_v12s8_to_v3s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 + + ; CHECK-LABEL: name: test_bitcast_v12s8_to_v3s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>), [[COPY2]](<4 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<12 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<12 x s32>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[TRUNC]](<12 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<12 x s32>) = G_CONCAT_VECTORS %0, %1, %2 + %4:_(<12 x s8>) = G_TRUNC %3 + %5:_(<3 x s32>) = G_BITCAST %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: test_bitcast_v6s8_to_v3s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_bitcast_v6s8_to_v3s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s8>) = G_TRUNC [[COPY]](<6 x s32>) + ; CHECK: [[ADD:%[0-9]+]]:_(<6 x s8>) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[ADD]](<6 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s16>) + %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<6 x s8>) = G_TRUNC %0 + %2:_(<6 x s8>) = G_ADD %1, %1 + %3:_(<3 x s16>) = G_BITCAST %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: test_bitcast_v3s16_to_v6s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_bitcast_v3s16_to_v6s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[COPY]](<3 x s32>) + ; CHECK: [[ADD:%[0-9]+]]:_(<3 x s16>) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<6 x s8>) = G_BITCAST [[ADD]](<3 x s16>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<6 x s8>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s16>) = G_TRUNC %0 + %2:_(<3 x s16>) = G_ADD %1, %1 + %3:_(<6 x s8>) = G_BITCAST %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: test_bitcast_v2s64_to_v16s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_bitcast_v2s64_to_v16s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>) + ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8), [[UV24:%[0-9]+]]:_(s8), [[UV25:%[0-9]+]]:_(s8), [[UV26:%[0-9]+]]:_(s8), [[UV27:%[0-9]+]]:_(s8), [[UV28:%[0-9]+]]:_(s8), [[UV29:%[0-9]+]]:_(s8), [[UV30:%[0-9]+]]:_(s8), [[UV31:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s8) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s8) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s8) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s8) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s8) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s8) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s8) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s8) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16) + ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV24]](s8) + ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[ADD8]](s16) + ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV25]](s8) + ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[ADD9]](s16) + ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV26]](s8) + ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]] + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[ADD10]](s16) + ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV27]](s8) + ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]] + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[ADD11]](s16) + ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s16) = G_ANYEXT [[UV28]](s8) + ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT24]], [[ANYEXT25]] + ; CHECK: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[ADD12]](s16) + ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s16) = G_ANYEXT [[UV29]](s8) + ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT26]], [[ANYEXT27]] + ; CHECK: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[ADD13]](s16) + ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s16) = G_ANYEXT [[UV30]](s8) + ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT28]], [[ANYEXT29]] + ; CHECK: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[ADD14]](s16) + ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s16) = G_ANYEXT [[UV31]](s8) + ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT30]], [[ANYEXT31]] + ; CHECK: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[ADD15]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8), [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; CHECK: [[ANYEXT32:%[0-9]+]]:_(<16 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<16 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[ANYEXT32]](<16 x s32>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<16 x s8>) = G_BITCAST %0 + %2:_(<16 x s8>) = G_ADD %1, %1 + %3:_(<16 x s32>) = G_ANYEXT %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: test_bitcast_v16s8_to_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + + ; CHECK-LABEL: name: test_bitcast_v16s8_to_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[COPY]](<16 x s32>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[TRUNC]](<16 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s64>) + %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<16 x s8>) = G_TRUNC %0 + %2:_(<2 x s64>) = G_BITCAST %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_bitcast_v4s32_to_v16s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-LABEL: name: test_bitcast_v4s32_to_v16s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<4 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>) + ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8), [[UV24:%[0-9]+]]:_(s8), [[UV25:%[0-9]+]]:_(s8), [[UV26:%[0-9]+]]:_(s8), [[UV27:%[0-9]+]]:_(s8), [[UV28:%[0-9]+]]:_(s8), [[UV29:%[0-9]+]]:_(s8), [[UV30:%[0-9]+]]:_(s8), [[UV31:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s8) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s8) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s8) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s8) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s8) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s8) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s8) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s8) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16) + ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV24]](s8) + ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[ADD8]](s16) + ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV25]](s8) + ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[ADD9]](s16) + ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV26]](s8) + ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]] + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[ADD10]](s16) + ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV27]](s8) + ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]] + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[ADD11]](s16) + ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s16) = G_ANYEXT [[UV28]](s8) + ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT24]], [[ANYEXT25]] + ; CHECK: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[ADD12]](s16) + ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s16) = G_ANYEXT [[UV29]](s8) + ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT26]], [[ANYEXT27]] + ; CHECK: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[ADD13]](s16) + ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s16) = G_ANYEXT [[UV30]](s8) + ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT28]], [[ANYEXT29]] + ; CHECK: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[ADD14]](s16) + ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s16) = G_ANYEXT [[UV31]](s8) + ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT30]], [[ANYEXT31]] + ; CHECK: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[ADD15]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8), [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; CHECK: [[ANYEXT32:%[0-9]+]]:_(<16 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<16 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[ANYEXT32]](<16 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<16 x s8>) = G_BITCAST %0 + %2:_(<16 x s8>) = G_ADD %1, %1 + %3:_(<16 x s32>) = G_ANYEXT %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: test_bitcast_v16s8_to_v4s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + + ; CHECK-LABEL: name: test_bitcast_v16s8_to_v4s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[COPY]](<16 x s32>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[TRUNC]](<16 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<4 x s32>) + %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<16 x s8>) = G_TRUNC %0 + %2:_(<4 x s32>) = G_BITCAST %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_bitcast_v8s16_to_v16s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_bitcast_v8s16_to_v16s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<8 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>) + ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8), [[UV24:%[0-9]+]]:_(s8), [[UV25:%[0-9]+]]:_(s8), [[UV26:%[0-9]+]]:_(s8), [[UV27:%[0-9]+]]:_(s8), [[UV28:%[0-9]+]]:_(s8), [[UV29:%[0-9]+]]:_(s8), [[UV30:%[0-9]+]]:_(s8), [[UV31:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s8) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s8) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s8) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s8) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s8) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s8) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s8) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s8) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16) + ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV24]](s8) + ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[ADD8]](s16) + ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV25]](s8) + ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[ADD9]](s16) + ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV26]](s8) + ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]] + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[ADD10]](s16) + ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV27]](s8) + ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]] + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[ADD11]](s16) + ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s16) = G_ANYEXT [[UV28]](s8) + ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT24]], [[ANYEXT25]] + ; CHECK: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[ADD12]](s16) + ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s16) = G_ANYEXT [[UV29]](s8) + ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT26]], [[ANYEXT27]] + ; CHECK: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[ADD13]](s16) + ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s16) = G_ANYEXT [[UV30]](s8) + ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT28]], [[ANYEXT29]] + ; CHECK: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[ADD14]](s16) + ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s16) = G_ANYEXT [[UV31]](s8) + ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT30]], [[ANYEXT31]] + ; CHECK: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[ADD15]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8), [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; CHECK: [[ANYEXT32:%[0-9]+]]:_(<16 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<16 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[ANYEXT32]](<16 x s32>) + %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<16 x s8>) = G_BITCAST %0 + %2:_(<16 x s8>) = G_ADD %1, %1 + %3:_(<16 x s32>) = G_ANYEXT %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: test_bitcast_v16s8_to_v8s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + + ; CHECK-LABEL: name: test_bitcast_v16s8_to_v8s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[COPY]](<16 x s32>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[TRUNC]](<16 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<8 x s16>) + %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<16 x s8>) = G_TRUNC %0 + %2:_(<8 x s16>) = G_BITCAST %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_bitcast_v3s64_to_v6s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_bitcast_v3s64_to_v6s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[COPY]](<3 x s64>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<6 x s32>) + %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<6 x s32>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_bitcast_v6s32_to_v3s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_bitcast_v6s32_to_v3s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<6 x s32>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) + %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<3 x s64>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_bitcast_v3s64_to_v12s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_bitcast_v3s64_to_v12s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<12 x s16>) = G_BITCAST [[COPY]](<3 x s64>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<12 x s16>) + %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<12 x s16>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_bitcast_v12s16_to_v3s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_bitcast_v12s16_to_v3s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<12 x s16>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) + %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<3 x s64>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_bitcast_v3s64_to_v24s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_bitcast_v3s64_to_v24s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<24 x s8>) = G_BITCAST [[COPY]](<3 x s64>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<24 x s8>) + %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<24 x s8>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_bitcast_v24s8_to_v3s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_bitcast_v24s8_to_v3s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<24 x s8>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<24 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) + %0:_(<24 x s8>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<3 x s64>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... diff --git a/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll index d216cf59bde263..9af68e7d801279 100644 --- a/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll +++ b/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll @@ -2,7 +2,7 @@ define i32 @foo() nounwind { entry: -; CHECK: cntlzw 3, 4 +; CHECK: cntlzw 3, 3 %retval = alloca i32, align 4 ; [#uses=2] %temp = alloca i32, align 4 ; [#uses=2] %ctz_x = alloca i32, align 4 ; [#uses=3] diff --git a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll index 7897d1c6b8a5af..028904fc3200a9 100644 --- a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll +++ b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll @@ -9,29 +9,29 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: stwu 1, -464(1) ; CHECK-NEXT: mfcr 12 ; CHECK-NEXT: stw 29, 412(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 30, 416(1) # 4-byte Folded Spill ; CHECK-NEXT: lis 3, .LCPI0_0@ha +; CHECK-NEXT: stw 30, 416(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 12, 408(1) ; CHECK-NEXT: stfd 2, 376(1) -; CHECK-NEXT: stfd 27, 424(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 1, 384(1) -; CHECK-NEXT: stfd 28, 432(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 29, 440(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 30, 448(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 31, 456(1) # 8-byte Folded Spill ; CHECK-NEXT: lwz 4, 380(1) -; CHECK-NEXT: lfs 27, .LCPI0_0@l(3) -; CHECK-NEXT: lwz 3, 384(1) +; CHECK-NEXT: stfd 27, 424(1) # 8-byte Folded Spill ; CHECK-NEXT: stw 4, 396(1) -; CHECK-NEXT: fcmpu 0, 2, 27 ; CHECK-NEXT: lwz 4, 376(1) +; CHECK-NEXT: lfs 27, .LCPI0_0@l(3) +; CHECK-NEXT: stfd 1, 384(1) +; CHECK-NEXT: stw 4, 392(1) +; CHECK-NEXT: fcmpu 0, 2, 27 +; CHECK-NEXT: lwz 4, 388(1) ; CHECK-NEXT: fcmpu 1, 1, 27 +; CHECK-NEXT: lwz 3, 384(1) ; CHECK-NEXT: crand 20, 6, 0 ; CHECK-NEXT: cror 20, 4, 20 -; CHECK-NEXT: stw 4, 392(1) -; CHECK-NEXT: stw 3, 400(1) -; CHECK-NEXT: lwz 4, 388(1) +; CHECK-NEXT: stfd 28, 432(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 29, 440(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 30, 448(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, 456(1) # 8-byte Folded Spill ; CHECK-NEXT: stw 4, 404(1) +; CHECK-NEXT: stw 3, 400(1) ; CHECK-NEXT: bc 4, 20, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %bb5 ; CHECK-NEXT: li 3, 0 @@ -41,54 +41,53 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfd 0, 400(1) ; CHECK-NEXT: lis 3, 15856 ; CHECK-NEXT: stw 3, 336(1) -; CHECK-NEXT: lfd 1, 392(1) ; CHECK-NEXT: li 29, 0 ; CHECK-NEXT: stfd 0, 304(1) -; CHECK-NEXT: stw 29, 340(1) -; CHECK-NEXT: stw 29, 332(1) -; CHECK-NEXT: stw 29, 328(1) ; CHECK-NEXT: lwz 3, 308(1) -; CHECK-NEXT: stfd 1, 296(1) -; CHECK-NEXT: lfd 3, 336(1) -; CHECK-NEXT: lfd 4, 328(1) +; CHECK-NEXT: lfd 1, 392(1) ; CHECK-NEXT: stw 3, 324(1) ; CHECK-NEXT: lwz 3, 304(1) +; CHECK-NEXT: stfd 1, 296(1) ; CHECK-NEXT: stw 3, 320(1) ; CHECK-NEXT: lwz 3, 300(1) -; CHECK-NEXT: lfd 31, 320(1) +; CHECK-NEXT: stw 29, 340(1) ; CHECK-NEXT: stw 3, 316(1) -; CHECK-NEXT: fmr 1, 31 ; CHECK-NEXT: lwz 3, 296(1) +; CHECK-NEXT: stw 29, 332(1) ; CHECK-NEXT: stw 3, 312(1) +; CHECK-NEXT: stw 29, 328(1) +; CHECK-NEXT: lfd 31, 320(1) ; CHECK-NEXT: lfd 30, 312(1) +; CHECK-NEXT: lfd 3, 336(1) +; CHECK-NEXT: fmr 1, 31 +; CHECK-NEXT: lfd 4, 328(1) ; CHECK-NEXT: fmr 2, 30 ; CHECK-NEXT: bl __gcc_qmul ; CHECK-NEXT: lis 3, 16864 ; CHECK-NEXT: stfd 1, 280(1) -; CHECK-NEXT: stw 3, 368(1) -; CHECK-NEXT: stfd 2, 288(1) -; CHECK-NEXT: stw 29, 372(1) -; CHECK-NEXT: stw 29, 364(1) -; CHECK-NEXT: stw 29, 360(1) ; CHECK-NEXT: fmr 29, 1 -; CHECK-NEXT: lwz 3, 284(1) +; CHECK-NEXT: stw 3, 368(1) ; CHECK-NEXT: fmr 28, 2 -; CHECK-NEXT: lfd 3, 368(1) -; CHECK-NEXT: lfd 4, 360(1) +; CHECK-NEXT: lwz 3, 284(1) +; CHECK-NEXT: stfd 2, 288(1) ; CHECK-NEXT: stw 3, 356(1) ; CHECK-NEXT: lwz 3, 280(1) +; CHECK-NEXT: stw 29, 372(1) ; CHECK-NEXT: stw 3, 352(1) ; CHECK-NEXT: lwz 3, 292(1) -; CHECK-NEXT: lfd 1, 352(1) +; CHECK-NEXT: stw 29, 364(1) ; CHECK-NEXT: stw 3, 348(1) ; CHECK-NEXT: lwz 3, 288(1) +; CHECK-NEXT: stw 29, 360(1) ; CHECK-NEXT: stw 3, 344(1) +; CHECK-NEXT: lfd 3, 368(1) +; CHECK-NEXT: lfd 4, 360(1) +; CHECK-NEXT: lfd 1, 352(1) ; CHECK-NEXT: lfd 2, 344(1) ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 ; CHECK-NEXT: lis 3, .LCPI0_1@ha -; CHECK-NEXT: fcmpu 0, 28, 27 ; CHECK-NEXT: mtfsb0 30 ; CHECK-NEXT: fadd 1, 2, 1 ; CHECK-NEXT: mtfsf 1, 0 @@ -102,6 +101,7 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfs 1, .LCPI0_1@l(3) ; CHECK-NEXT: fctiwz 0, 0 ; CHECK-NEXT: stfd 0, 152(1) +; CHECK-NEXT: fcmpu 0, 28, 27 ; CHECK-NEXT: lwz 3, 164(1) ; CHECK-NEXT: fcmpu 1, 29, 1 ; CHECK-NEXT: lwz 4, 156(1) @@ -120,25 +120,25 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: bl __floatditf ; CHECK-NEXT: lis 3, 17392 ; CHECK-NEXT: stfd 1, 208(1) -; CHECK-NEXT: stw 3, 240(1) -; CHECK-NEXT: stfd 2, 200(1) -; CHECK-NEXT: stw 29, 244(1) -; CHECK-NEXT: stw 29, 236(1) -; CHECK-NEXT: stw 29, 232(1) ; CHECK-NEXT: fmr 29, 1 -; CHECK-NEXT: lwz 3, 212(1) +; CHECK-NEXT: stw 3, 240(1) ; CHECK-NEXT: fmr 28, 2 -; CHECK-NEXT: lfd 3, 240(1) -; CHECK-NEXT: lfd 4, 232(1) +; CHECK-NEXT: lwz 3, 212(1) ; CHECK-NEXT: cmpwi 2, 30, 0 +; CHECK-NEXT: stfd 2, 200(1) ; CHECK-NEXT: stw 3, 228(1) ; CHECK-NEXT: lwz 3, 208(1) +; CHECK-NEXT: stw 29, 244(1) ; CHECK-NEXT: stw 3, 224(1) ; CHECK-NEXT: lwz 3, 204(1) -; CHECK-NEXT: lfd 1, 224(1) +; CHECK-NEXT: stw 29, 236(1) ; CHECK-NEXT: stw 3, 220(1) ; CHECK-NEXT: lwz 3, 200(1) +; CHECK-NEXT: stw 29, 232(1) ; CHECK-NEXT: stw 3, 216(1) +; CHECK-NEXT: lfd 3, 240(1) +; CHECK-NEXT: lfd 4, 232(1) +; CHECK-NEXT: lfd 1, 224(1) ; CHECK-NEXT: lfd 2, 216(1) ; CHECK-NEXT: bl __gcc_qadd ; CHECK-NEXT: blt 2, .LBB0_7 @@ -150,9 +150,9 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: fmr 1, 29 ; CHECK-NEXT: .LBB0_9: # %bb1 ; CHECK-NEXT: stfd 1, 184(1) -; CHECK-NEXT: stfd 2, 192(1) ; CHECK-NEXT: fmr 1, 31 ; CHECK-NEXT: lwz 3, 188(1) +; CHECK-NEXT: stfd 2, 192(1) ; CHECK-NEXT: fmr 2, 30 ; CHECK-NEXT: stw 3, 260(1) ; CHECK-NEXT: lwz 3, 184(1) @@ -165,10 +165,10 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfd 4, 248(1) ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: stfd 2, 176(1) -; CHECK-NEXT: stfd 1, 168(1) ; CHECK-NEXT: fcmpu 1, 2, 27 ; CHECK-NEXT: lwz 3, 180(1) ; CHECK-NEXT: fcmpu 0, 1, 27 +; CHECK-NEXT: stfd 1, 168(1) ; CHECK-NEXT: crandc 20, 2, 4 ; CHECK-NEXT: stw 3, 268(1) ; CHECK-NEXT: lwz 3, 176(1) @@ -184,27 +184,27 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: cror 20, 1, 3 ; CHECK-NEXT: bc 12, 20, .LBB0_14 ; CHECK-NEXT: # %bb.11: # %bb2 -; CHECK-NEXT: fneg 28, 31 -; CHECK-NEXT: stfd 28, 48(1) +; CHECK-NEXT: fneg 29, 31 +; CHECK-NEXT: stfd 29, 48(1) ; CHECK-NEXT: lis 3, 16864 ; CHECK-NEXT: stw 3, 80(1) -; CHECK-NEXT: fneg 29, 30 +; CHECK-NEXT: fneg 28, 30 ; CHECK-NEXT: lwz 3, 52(1) -; CHECK-NEXT: stfd 29, 40(1) ; CHECK-NEXT: li 29, 0 -; CHECK-NEXT: stw 29, 84(1) -; CHECK-NEXT: stw 29, 76(1) -; CHECK-NEXT: stw 29, 72(1) +; CHECK-NEXT: stfd 28, 40(1) ; CHECK-NEXT: stw 3, 68(1) -; CHECK-NEXT: lfd 3, 80(1) -; CHECK-NEXT: lfd 4, 72(1) ; CHECK-NEXT: lwz 3, 48(1) +; CHECK-NEXT: stw 29, 84(1) ; CHECK-NEXT: stw 3, 64(1) ; CHECK-NEXT: lwz 3, 44(1) -; CHECK-NEXT: lfd 1, 64(1) +; CHECK-NEXT: stw 29, 76(1) ; CHECK-NEXT: stw 3, 60(1) ; CHECK-NEXT: lwz 3, 40(1) +; CHECK-NEXT: stw 29, 72(1) ; CHECK-NEXT: stw 3, 56(1) +; CHECK-NEXT: lfd 3, 80(1) +; CHECK-NEXT: lfd 4, 72(1) +; CHECK-NEXT: lfd 1, 64(1) ; CHECK-NEXT: lfd 2, 56(1) ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: mffs 0 @@ -220,12 +220,12 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfs 0, .LCPI0_2@l(3) ; CHECK-NEXT: lis 3, .LCPI0_3@ha ; CHECK-NEXT: mtfsb0 30 -; CHECK-NEXT: fadd 2, 29, 28 +; CHECK-NEXT: fadd 2, 28, 29 ; CHECK-NEXT: mtfsf 1, 1 ; CHECK-NEXT: lfs 1, .LCPI0_3@l(3) -; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: fctiwz 2, 2 ; CHECK-NEXT: stfd 2, 24(1) +; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: lwz 3, 36(1) ; CHECK-NEXT: fcmpu 1, 31, 1 ; CHECK-NEXT: lwz 4, 28(1) @@ -244,22 +244,22 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: stfd 31, 112(1) ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: stw 3, 148(1) +; CHECK-NEXT: lis 4, 16864 ; CHECK-NEXT: stw 3, 140(1) ; CHECK-NEXT: stw 3, 136(1) -; CHECK-NEXT: stfd 30, 104(1) -; CHECK-NEXT: lis 4, 16864 ; CHECK-NEXT: lwz 3, 116(1) -; CHECK-NEXT: stw 4, 144(1) -; CHECK-NEXT: lfd 4, 136(1) +; CHECK-NEXT: stfd 30, 104(1) ; CHECK-NEXT: stw 3, 132(1) -; CHECK-NEXT: lfd 3, 144(1) ; CHECK-NEXT: lwz 3, 112(1) +; CHECK-NEXT: stw 4, 144(1) ; CHECK-NEXT: stw 3, 128(1) ; CHECK-NEXT: lwz 3, 108(1) -; CHECK-NEXT: lfd 1, 128(1) +; CHECK-NEXT: lfd 3, 144(1) ; CHECK-NEXT: stw 3, 124(1) ; CHECK-NEXT: lwz 3, 104(1) +; CHECK-NEXT: lfd 4, 136(1) ; CHECK-NEXT: stw 3, 120(1) +; CHECK-NEXT: lfd 1, 128(1) ; CHECK-NEXT: lfd 2, 120(1) ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: mffs 0 @@ -278,9 +278,9 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: fadd 2, 30, 31 ; CHECK-NEXT: mtfsf 1, 1 ; CHECK-NEXT: lfs 1, .LCPI0_1@l(3) -; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: fctiwz 2, 2 ; CHECK-NEXT: stfd 2, 88(1) +; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: lwz 3, 100(1) ; CHECK-NEXT: fcmpu 1, 31, 1 ; CHECK-NEXT: lwz 4, 92(1) @@ -300,8 +300,8 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfd 28, 432(1) # 8-byte Folded Reload ; CHECK-NEXT: lwz 12, 408(1) ; CHECK-NEXT: lfd 27, 424(1) # 8-byte Folded Reload -; CHECK-NEXT: lwz 30, 416(1) # 4-byte Folded Reload ; CHECK-NEXT: mtcrf 32, 12 # cr2 +; CHECK-NEXT: lwz 30, 416(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 29, 412(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 0, 468(1) ; CHECK-NEXT: addi 1, 1, 464 diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll index d155a78812257a..52070aa9063d6c 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -704,8 +704,8 @@ declare void @test_vararg(i32, ...) ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) ; 32BIT-NEXT: renamable $r6 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) @@ -773,8 +773,8 @@ entry: ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) ; 32BIT-NEXT: renamable $r7 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) @@ -844,8 +844,8 @@ entry: ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) ; 32BIT-NEXT: renamable $r8 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) diff --git a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll index 9f521788a3fccf..c276d4ccc39523 100644 --- a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll +++ b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll @@ -68,15 +68,15 @@ ; 32BIT-DAG: STW killed renamable $r8, 16, %fixed-stack.0 :: (store 4) ; 32BIT-DAG: STW killed renamable $r9, 20, %fixed-stack.0 :: (store 4) ; 32BIT-DAG: STW killed renamable $r10, 24, %fixed-stack.0 :: (store 4) -; 32BIT-DAG: STW killed renamable $r5, 0, %stack.1.arg2 :: (store 4 into %ir.arg2) -; 32BIT-DAG: renamable $r5 = ADDI %fixed-stack.0, 4 -; 32BIT-DAG: STW killed renamable $r4, 0, %stack.1.arg2 :: (store 4 into %ir.1) -; 32BIT-DAG: renamable $r4 = ADDI %fixed-stack.0, 0 -; 32BIT-DAG: STW renamable $r4, 0, %stack.0.arg1 :: (store 4 into %ir.0) -; 32BIT-DAG: STW renamable $r5, 0, %stack.0.arg1 :: (store 4 into %ir.arg1) -; 32BIT-DAG: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.2) -; 32BIT-DAG: renamable $r5 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.4) -; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r4, killed renamable $r3 +; 32BIT-DAG: STW killed renamable $r4, 0, %stack.1.arg2 :: (store 4 into %ir.arg2) +; 32BIT-DAG: renamable $r4 = ADDI %fixed-stack.0, 4 +; 32BIT-DAG: STW killed renamable $r11, 0, %stack.1.arg2 :: (store 4 into %ir.1) +; 32BIT-DAG: renamable $r11 = ADDI %fixed-stack.0, 0 +; 32BIT-DAG: STW renamable $r11, 0, %stack.0.arg1 :: (store 4 into %ir.0) +; 32BIT-DAG: STW renamable $r4, 0, %stack.0.arg1 :: (store 4 into %ir.arg1) +; 32BIT-DAG: renamable $r5 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.2) +; 32BIT-DAG: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.4) +; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r5, killed renamable $r3 ; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4 ; 32BIT-DAG: BLR implicit $lr, implicit $rm, implicit $r3 diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll index 54ceccd9c59ab1..fa57f50cb43df2 100644 --- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll +++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll @@ -28,9 +28,9 @@ entry: ; PPC32-DAG: stfd 2, 16(1) ; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1) ; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1) +; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0 ; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1) ; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1) -; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0 ; PPC32-DAG: xor [[HI0]], [[HI0]], [[FLIP_BIT]] ; PPC32-DAG: xor [[LO0]], [[LO0]], [[FLIP_BIT]] ; PPC32: blr @@ -68,9 +68,9 @@ entry: ; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1) ; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1) ; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1) -; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1) ; PPC32-NOT: BARRIER ; PPC32-DAG: xoris [[HI0]], [[HI0]], 32768 +; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1) ; PPC32-DAG: xoris [[LO0]], [[LO0]], 32768 ; PPC32: blr %0 = fsub ppc_fp128 0xM80000000000000000000000000000000, %x diff --git a/llvm/test/CodeGen/PowerPC/inc-of-add.ll b/llvm/test/CodeGen/PowerPC/inc-of-add.ll index fa03379a3c3076..90004143326fbc 100644 --- a/llvm/test/CodeGen/PowerPC/inc-of-add.ll +++ b/llvm/test/CodeGen/PowerPC/inc-of-add.ll @@ -65,88 +65,88 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; PPC32: # %bb.0: ; PPC32-NEXT: stwu 1, -64(1) ; PPC32-NEXT: stw 21, 20(1) # 4-byte Folded Spill -; PPC32-NEXT: lbz 4, 119(1) -; PPC32-NEXT: lbz 11, 115(1) -; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill -; PPC32-NEXT: add 4, 4, 6 ; PPC32-NEXT: lbz 21, 123(1) -; PPC32-NEXT: lbz 6, 131(1) -; PPC32-NEXT: add 5, 11, 5 -; PPC32-NEXT: lbz 11, 127(1) +; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill ; PPC32-NEXT: add 7, 21, 7 +; PPC32-NEXT: lbz 23, 115(1) +; PPC32-NEXT: lbz 22, 119(1) ; PPC32-NEXT: lbz 21, 135(1) -; PPC32-NEXT: lbz 24, 83(1) -; PPC32-NEXT: lbz 23, 79(1) -; PPC32-NEXT: add 6, 6, 9 +; PPC32-NEXT: add 5, 23, 5 +; PPC32-NEXT: lbz 23, 127(1) +; PPC32-NEXT: add 6, 22, 6 +; PPC32-NEXT: lbz 22, 131(1) ; PPC32-NEXT: add 10, 21, 10 -; PPC32-NEXT: lbz 21, 147(1) -; PPC32-NEXT: lbz 9, 143(1) -; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: add 8, 11, 8 -; PPC32-NEXT: lbz 22, 75(1) -; PPC32-NEXT: lbz 11, 139(1) -; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill -; PPC32-NEXT: add 24, 21, 24 -; PPC32-NEXT: lbz 27, 95(1) -; PPC32-NEXT: lbz 21, 159(1) ; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill -; PPC32-NEXT: add 9, 9, 23 -; PPC32-NEXT: lbz 26, 91(1) -; PPC32-NEXT: lbz 23, 155(1) +; PPC32-NEXT: add 8, 23, 8 +; PPC32-NEXT: lbz 26, 83(1) +; PPC32-NEXT: add 9, 22, 9 +; PPC32-NEXT: lbz 21, 147(1) +; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill -; PPC32-NEXT: add 11, 11, 22 -; PPC32-NEXT: lbz 25, 87(1) -; PPC32-NEXT: lbz 22, 151(1) -; PPC32-NEXT: lbz 12, 111(1) -; PPC32-NEXT: add 27, 21, 27 -; PPC32-NEXT: lbz 21, 175(1) -; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill -; PPC32-NEXT: lbz 0, 107(1) +; PPC32-NEXT: add 26, 21, 26 +; PPC32-NEXT: lbz 25, 79(1) +; PPC32-NEXT: lbz 24, 75(1) +; PPC32-NEXT: lbz 23, 139(1) +; PPC32-NEXT: lbz 22, 143(1) ; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill -; PPC32-NEXT: add 26, 23, 26 -; PPC32-NEXT: lbz 30, 171(1) -; PPC32-NEXT: lbz 29, 103(1) -; PPC32-NEXT: lbz 23, 167(1) -; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: add 24, 23, 24 +; PPC32-NEXT: lbz 29, 95(1) ; PPC32-NEXT: add 25, 22, 25 -; PPC32-NEXT: lbz 28, 99(1) -; PPC32-NEXT: lbz 22, 163(1) -; PPC32-NEXT: add 12, 21, 12 -; PPC32-NEXT: add 30, 30, 0 -; PPC32-NEXT: addi 12, 12, 1 -; PPC32-NEXT: add 29, 23, 29 -; PPC32-NEXT: stb 12, 15(3) -; PPC32-NEXT: addi 12, 30, 1 +; PPC32-NEXT: lbz 21, 159(1) +; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: add 29, 21, 29 +; PPC32-NEXT: lbz 28, 91(1) +; PPC32-NEXT: lbz 27, 87(1) +; PPC32-NEXT: lbz 23, 151(1) +; PPC32-NEXT: lbz 22, 155(1) +; PPC32-NEXT: lbz 4, 111(1) +; PPC32-NEXT: add 27, 23, 27 +; PPC32-NEXT: lbz 21, 175(1) ; PPC32-NEXT: add 28, 22, 28 -; PPC32-NEXT: stb 12, 14(3) -; PPC32-NEXT: addi 12, 29, 1 -; PPC32-NEXT: stb 12, 13(3) -; PPC32-NEXT: addi 12, 28, 1 -; PPC32-NEXT: stb 12, 12(3) -; PPC32-NEXT: addi 12, 27, 1 -; PPC32-NEXT: stb 12, 11(3) -; PPC32-NEXT: addi 12, 26, 1 -; PPC32-NEXT: addi 9, 9, 1 -; PPC32-NEXT: addi 6, 6, 1 -; PPC32-NEXT: stb 12, 10(3) -; PPC32-NEXT: addi 12, 25, 1 -; PPC32-NEXT: stb 9, 7(3) -; PPC32-NEXT: addi 9, 11, 1 -; PPC32-NEXT: stb 6, 4(3) -; PPC32-NEXT: addi 6, 8, 1 +; PPC32-NEXT: lbz 11, 107(1) +; PPC32-NEXT: lbz 12, 171(1) +; PPC32-NEXT: add 4, 21, 4 +; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill ; PPC32-NEXT: addi 4, 4, 1 -; PPC32-NEXT: stb 12, 9(3) -; PPC32-NEXT: addi 12, 24, 1 -; PPC32-NEXT: stb 9, 6(3) -; PPC32-NEXT: addi 9, 10, 1 -; PPC32-NEXT: stb 6, 3(3) -; PPC32-NEXT: addi 6, 7, 1 +; PPC32-NEXT: lbz 0, 103(1) +; PPC32-NEXT: add 11, 12, 11 +; PPC32-NEXT: lbz 30, 99(1) +; PPC32-NEXT: lbz 23, 163(1) +; PPC32-NEXT: lbz 22, 167(1) +; PPC32-NEXT: add 30, 23, 30 +; PPC32-NEXT: stb 4, 15(3) +; PPC32-NEXT: add 23, 22, 0 +; PPC32-NEXT: addi 4, 11, 1 +; PPC32-NEXT: stb 4, 14(3) +; PPC32-NEXT: addi 4, 23, 1 +; PPC32-NEXT: stb 4, 13(3) +; PPC32-NEXT: addi 4, 30, 1 +; PPC32-NEXT: stb 4, 12(3) +; PPC32-NEXT: addi 4, 29, 1 +; PPC32-NEXT: stb 4, 11(3) +; PPC32-NEXT: addi 4, 28, 1 +; PPC32-NEXT: stb 4, 10(3) +; PPC32-NEXT: addi 4, 27, 1 +; PPC32-NEXT: stb 4, 9(3) +; PPC32-NEXT: addi 4, 26, 1 +; PPC32-NEXT: stb 4, 8(3) +; PPC32-NEXT: addi 4, 25, 1 +; PPC32-NEXT: stb 4, 7(3) +; PPC32-NEXT: addi 4, 24, 1 +; PPC32-NEXT: stb 4, 6(3) +; PPC32-NEXT: addi 4, 10, 1 +; PPC32-NEXT: stb 4, 5(3) +; PPC32-NEXT: addi 4, 9, 1 +; PPC32-NEXT: stb 4, 4(3) +; PPC32-NEXT: addi 4, 8, 1 +; PPC32-NEXT: stb 4, 3(3) +; PPC32-NEXT: addi 4, 7, 1 +; PPC32-NEXT: stb 4, 2(3) +; PPC32-NEXT: addi 4, 6, 1 ; PPC32-NEXT: stb 4, 1(3) ; PPC32-NEXT: addi 4, 5, 1 -; PPC32-NEXT: stb 12, 8(3) -; PPC32-NEXT: stb 9, 5(3) -; PPC32-NEXT: stb 6, 2(3) ; PPC32-NEXT: stb 4, 0(3) ; PPC32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload ; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload @@ -165,79 +165,79 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; PPC64BE: # %bb.0: ; PPC64BE-NEXT: std 21, -88(1) # 8-byte Folded Spill ; PPC64BE-NEXT: lbz 21, 207(1) -; PPC64BE-NEXT: lbz 11, 199(1) -; PPC64BE-NEXT: lbz 12, 191(1) -; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 22, -80(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 25, -56(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 24, -64(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lbz 0, 183(1) +; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PPC64BE-NEXT: lbz 22, 199(1) +; PPC64BE-NEXT: lbz 23, 191(1) ; PPC64BE-NEXT: add 6, 21, 6 ; PPC64BE-NEXT: lbz 21, 231(1) -; PPC64BE-NEXT: add 5, 11, 5 -; PPC64BE-NEXT: lbz 11, 223(1) -; PPC64BE-NEXT: add 4, 12, 4 -; PPC64BE-NEXT: lbz 12, 215(1) -; PPC64BE-NEXT: lbz 23, 127(1) +; PPC64BE-NEXT: add 5, 22, 5 +; PPC64BE-NEXT: lbz 22, 223(1) +; PPC64BE-NEXT: add 4, 23, 4 +; PPC64BE-NEXT: lbz 23, 215(1) ; PPC64BE-NEXT: add 9, 21, 9 +; PPC64BE-NEXT: lbz 25, 127(1) +; PPC64BE-NEXT: add 8, 22, 8 ; PPC64BE-NEXT: lbz 21, 255(1) -; PPC64BE-NEXT: lbz 22, 119(1) -; PPC64BE-NEXT: add 8, 11, 8 -; PPC64BE-NEXT: lbz 11, 247(1) -; PPC64BE-NEXT: add 7, 12, 7 -; PPC64BE-NEXT: lbz 12, 239(1) -; PPC64BE-NEXT: lbz 26, 151(1) -; PPC64BE-NEXT: add 23, 21, 23 -; PPC64BE-NEXT: lbz 21, 279(1) -; PPC64BE-NEXT: lbz 25, 143(1) -; PPC64BE-NEXT: add 11, 11, 22 -; PPC64BE-NEXT: lbz 22, 271(1) -; PPC64BE-NEXT: lbz 24, 135(1) -; PPC64BE-NEXT: add 10, 12, 10 -; PPC64BE-NEXT: lbz 12, 263(1) -; PPC64BE-NEXT: lbz 30, 175(1) -; PPC64BE-NEXT: lbz 29, 303(1) -; PPC64BE-NEXT: add 26, 21, 26 -; PPC64BE-NEXT: lbz 21, 311(1) -; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 25, 22, 25 -; PPC64BE-NEXT: lbz 28, 167(1) -; PPC64BE-NEXT: lbz 22, 295(1) -; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 12, 12, 24 -; PPC64BE-NEXT: lbz 27, 159(1) -; PPC64BE-NEXT: lbz 24, 287(1) -; PPC64BE-NEXT: add 30, 29, 30 -; PPC64BE-NEXT: add 29, 21, 0 -; PPC64BE-NEXT: addi 0, 29, 1 -; PPC64BE-NEXT: add 28, 22, 28 -; PPC64BE-NEXT: stb 0, 15(3) -; PPC64BE-NEXT: addi 0, 30, 1 -; PPC64BE-NEXT: add 27, 24, 27 -; PPC64BE-NEXT: stb 0, 14(3) -; PPC64BE-NEXT: addi 0, 28, 1 -; PPC64BE-NEXT: stb 0, 13(3) -; PPC64BE-NEXT: addi 0, 27, 1 -; PPC64BE-NEXT: stb 0, 12(3) -; PPC64BE-NEXT: addi 0, 26, 1 -; PPC64BE-NEXT: addi 12, 12, 1 -; PPC64BE-NEXT: stb 0, 11(3) -; PPC64BE-NEXT: addi 0, 25, 1 -; PPC64BE-NEXT: stb 12, 9(3) -; PPC64BE-NEXT: addi 12, 23, 1 -; PPC64BE-NEXT: addi 11, 11, 1 -; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: add 7, 23, 7 +; PPC64BE-NEXT: lbz 24, 119(1) ; PPC64BE-NEXT: addi 9, 9, 1 +; PPC64BE-NEXT: lbz 22, 247(1) +; PPC64BE-NEXT: add 25, 21, 25 +; PPC64BE-NEXT: lbz 23, 239(1) ; PPC64BE-NEXT: addi 8, 8, 1 +; PPC64BE-NEXT: lbz 28, 151(1) +; PPC64BE-NEXT: add 24, 22, 24 +; PPC64BE-NEXT: lbz 21, 279(1) +; PPC64BE-NEXT: add 10, 23, 10 +; PPC64BE-NEXT: lbz 27, 143(1) +; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: lbz 22, 271(1) +; PPC64BE-NEXT: add 28, 21, 28 +; PPC64BE-NEXT: lbz 26, 135(1) ; PPC64BE-NEXT: addi 7, 7, 1 +; PPC64BE-NEXT: lbz 23, 263(1) +; PPC64BE-NEXT: add 27, 22, 27 +; PPC64BE-NEXT: lbz 11, 183(1) ; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: lbz 21, 311(1) +; PPC64BE-NEXT: add 26, 23, 26 +; PPC64BE-NEXT: lbz 12, 175(1) ; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: lbz 0, 303(1) +; PPC64BE-NEXT: add 11, 21, 11 +; PPC64BE-NEXT: lbz 30, 167(1) +; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: lbz 22, 295(1) +; PPC64BE-NEXT: add 12, 0, 12 +; PPC64BE-NEXT: lbz 29, 159(1) ; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: stb 0, 10(3) -; PPC64BE-NEXT: stb 12, 8(3) +; PPC64BE-NEXT: lbz 23, 287(1) +; PPC64BE-NEXT: add 30, 22, 30 +; PPC64BE-NEXT: stb 11, 15(3) +; PPC64BE-NEXT: addi 11, 12, 1 +; PPC64BE-NEXT: add 29, 23, 29 +; PPC64BE-NEXT: stb 11, 14(3) +; PPC64BE-NEXT: addi 11, 30, 1 +; PPC64BE-NEXT: stb 11, 13(3) +; PPC64BE-NEXT: addi 11, 29, 1 +; PPC64BE-NEXT: stb 11, 12(3) +; PPC64BE-NEXT: addi 11, 28, 1 +; PPC64BE-NEXT: stb 11, 11(3) +; PPC64BE-NEXT: addi 11, 27, 1 +; PPC64BE-NEXT: stb 11, 10(3) +; PPC64BE-NEXT: addi 11, 26, 1 +; PPC64BE-NEXT: stb 11, 9(3) +; PPC64BE-NEXT: addi 11, 25, 1 +; PPC64BE-NEXT: stb 11, 8(3) +; PPC64BE-NEXT: addi 11, 24, 1 ; PPC64BE-NEXT: stb 11, 7(3) ; PPC64BE-NEXT: stb 10, 6(3) ; PPC64BE-NEXT: stb 9, 5(3) @@ -277,23 +277,23 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; PPC32-NEXT: stw 28, 16(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 29, 20(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: lhz 11, 50(1) -; PPC32-NEXT: lhz 12, 46(1) -; PPC32-NEXT: lhz 0, 42(1) -; PPC32-NEXT: lhz 30, 70(1) -; PPC32-NEXT: lhz 29, 66(1) -; PPC32-NEXT: lhz 28, 62(1) -; PPC32-NEXT: lhz 27, 58(1) +; PPC32-NEXT: lhz 11, 70(1) +; PPC32-NEXT: lhz 12, 66(1) +; PPC32-NEXT: lhz 0, 62(1) +; PPC32-NEXT: add 10, 11, 10 +; PPC32-NEXT: lhz 30, 58(1) +; PPC32-NEXT: add 9, 12, 9 +; PPC32-NEXT: lhz 29, 50(1) +; PPC32-NEXT: add 8, 0, 8 +; PPC32-NEXT: lhz 28, 42(1) +; PPC32-NEXT: add 7, 30, 7 +; PPC32-NEXT: lhz 27, 46(1) +; PPC32-NEXT: add 5, 29, 5 ; PPC32-NEXT: lhz 26, 54(1) -; PPC32-NEXT: add 3, 0, 3 -; PPC32-NEXT: add 4, 12, 4 -; PPC32-NEXT: add 5, 11, 5 -; PPC32-NEXT: add 6, 26, 6 -; PPC32-NEXT: add 7, 27, 7 -; PPC32-NEXT: add 8, 28, 8 -; PPC32-NEXT: add 9, 29, 9 -; PPC32-NEXT: add 10, 30, 10 +; PPC32-NEXT: add 3, 28, 3 +; PPC32-NEXT: add 4, 27, 4 ; PPC32-NEXT: addi 3, 3, 1 +; PPC32-NEXT: add 6, 26, 6 ; PPC32-NEXT: addi 4, 4, 1 ; PPC32-NEXT: addi 5, 5, 1 ; PPC32-NEXT: addi 6, 6, 1 @@ -317,31 +317,31 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lhz 11, 142(1) -; PPC64BE-NEXT: lhz 12, 134(1) -; PPC64BE-NEXT: lhz 0, 126(1) -; PPC64BE-NEXT: lhz 30, 118(1) -; PPC64BE-NEXT: lhz 29, 182(1) -; PPC64BE-NEXT: lhz 28, 174(1) -; PPC64BE-NEXT: lhz 27, 166(1) -; PPC64BE-NEXT: lhz 26, 158(1) +; PPC64BE-NEXT: lhz 11, 118(1) +; PPC64BE-NEXT: lhz 12, 182(1) +; PPC64BE-NEXT: lhz 0, 174(1) +; PPC64BE-NEXT: lhz 30, 166(1) +; PPC64BE-NEXT: add 11, 12, 11 +; PPC64BE-NEXT: lhz 29, 158(1) +; PPC64BE-NEXT: add 10, 0, 10 +; PPC64BE-NEXT: lhz 28, 142(1) +; PPC64BE-NEXT: add 9, 30, 9 +; PPC64BE-NEXT: lhz 27, 126(1) +; PPC64BE-NEXT: add 8, 29, 8 +; PPC64BE-NEXT: lhz 26, 134(1) +; PPC64BE-NEXT: add 6, 28, 6 ; PPC64BE-NEXT: lhz 25, 150(1) -; PPC64BE-NEXT: add 4, 0, 4 -; PPC64BE-NEXT: add 5, 12, 5 -; PPC64BE-NEXT: add 6, 11, 6 +; PPC64BE-NEXT: add 4, 27, 4 +; PPC64BE-NEXT: add 5, 26, 5 +; PPC64BE-NEXT: addi 11, 11, 1 ; PPC64BE-NEXT: add 7, 25, 7 -; PPC64BE-NEXT: add 8, 26, 8 -; PPC64BE-NEXT: add 9, 27, 9 -; PPC64BE-NEXT: add 10, 28, 10 -; PPC64BE-NEXT: add 11, 29, 30 -; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: addi 5, 5, 1 -; PPC64BE-NEXT: addi 6, 6, 1 -; PPC64BE-NEXT: addi 7, 7, 1 -; PPC64BE-NEXT: addi 8, 8, 1 -; PPC64BE-NEXT: addi 9, 9, 1 ; PPC64BE-NEXT: addi 10, 10, 1 -; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: addi 9, 9, 1 +; PPC64BE-NEXT: addi 8, 8, 1 +; PPC64BE-NEXT: addi 7, 7, 1 +; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: addi 4, 4, 1 ; PPC64BE-NEXT: sth 11, 14(3) ; PPC64BE-NEXT: sth 10, 12(3) ; PPC64BE-NEXT: sth 9, 10(3) diff --git a/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll b/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll index 42cbb30318bceb..5fae34f212cccc 100644 --- a/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll +++ b/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll @@ -17,9 +17,9 @@ entry: ; argument put on stack. ; CHECK-NOT: mr 8, 4 ; CHECK: stw 6, 16(1) +; CHECK: stw 7, 20(1) ; CHECK: stw 5, 12(1) ; CHECK: stw 4, 8(1) -; CHECK: stw 7, 20(1) declare i32 @printf(i8* nocapture readonly, ...) diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll index c9d9cf870e49fc..b87f1a682e25aa 100644 --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -1442,19 +1442,19 @@ define void @test_constrained_libcall_multichain(float* %firstptr, ppc_fp128* %r ; PC64-NEXT: mr 29, 3 ; PC64-NEXT: li 3, 0 ; PC64-NEXT: stfd 31, 168(1) # 8-byte Folded Spill -; PC64-NEXT: stfd 30, 160(1) # 8-byte Folded Spill ; PC64-NEXT: std 30, 128(1) # 8-byte Folded Spill -; PC64-NEXT: stfd 28, 144(1) # 8-byte Folded Spill -; PC64-NEXT: stfd 29, 152(1) # 8-byte Folded Spill ; PC64-NEXT: mr 30, 4 ; PC64-NEXT: lfs 31, 0(29) ; PC64-NEXT: std 3, 8(4) ; PC64-NEXT: addis 3, 2, .LCPI32_0@toc@ha +; PC64-NEXT: stfd 30, 160(1) # 8-byte Folded Spill ; PC64-NEXT: lfs 30, .LCPI32_0@toc@l(3) ; PC64-NEXT: fmr 1, 31 ; PC64-NEXT: fmr 3, 31 +; PC64-NEXT: stfd 28, 144(1) # 8-byte Folded Spill ; PC64-NEXT: fmr 2, 30 ; PC64-NEXT: fmr 4, 30 +; PC64-NEXT: stfd 29, 152(1) # 8-byte Folded Spill ; PC64-NEXT: stfd 31, 0(4) ; PC64-NEXT: bl __gcc_qadd ; PC64-NEXT: nop @@ -1475,14 +1475,14 @@ define void @test_constrained_libcall_multichain(float* %firstptr, ppc_fp128* %r ; PC64-NEXT: nop ; PC64-NEXT: frsp 0, 1 ; PC64-NEXT: stfs 0, 0(29) -; PC64-NEXT: lfd 31, 168(1) # 8-byte Folded Reload -; PC64-NEXT: lfd 30, 160(1) # 8-byte Folded Reload -; PC64-NEXT: lfd 29, 152(1) # 8-byte Folded Reload -; PC64-NEXT: lfd 28, 144(1) # 8-byte Folded Reload ; PC64-NEXT: ld 29, 120(1) # 8-byte Folded Reload ; PC64-NEXT: stfd 1, -16(30) ; PC64-NEXT: stfd 2, -8(30) ; PC64-NEXT: ld 30, 128(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 31, 168(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 30, 160(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 29, 152(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 28, 144(1) # 8-byte Folded Reload ; PC64-NEXT: addi 1, 1, 176 ; PC64-NEXT: ld 0, 16(1) ; PC64-NEXT: mtlr 0 diff --git a/llvm/test/CodeGen/PowerPC/pr43976.ll b/llvm/test/CodeGen/PowerPC/pr43976.ll index 91722283f4ae01..9dc1a52c567f57 100644 --- a/llvm/test/CodeGen/PowerPC/pr43976.ll +++ b/llvm/test/CodeGen/PowerPC/pr43976.ll @@ -10,11 +10,11 @@ define dso_local signext i32 @b() local_unnamed_addr #0 { ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -144(r1) ; CHECK-NEXT: addis r3, r2, a@toc@ha -; CHECK-NEXT: addis r4, r2, .LCPI0_0@toc@ha -; CHECK-NEXT: lfd f0, a@toc@l(r3) -; CHECK-NEXT: lfs f1, .LCPI0_0@toc@l(r4) ; CHECK-NEXT: li r4, 1 +; CHECK-NEXT: lfd f0, a@toc@l(r3) +; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; CHECK-NEXT: sldi r4, r4, 63 +; CHECK-NEXT: lfs f1, .LCPI0_0@toc@l(r3) ; CHECK-NEXT: fsub f2, f0, f1 ; CHECK-NEXT: fctidz f2, f2 ; CHECK-NEXT: stfd f2, 128(r1) diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll index d2400be43cb490..1c4c7a33981770 100644 --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -1297,6 +1297,8 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* ; CHECK-NEXT: evlddx 31, 1, 5 # 8-byte Folded Reload ; CHECK-NEXT: li 5, 256 ; CHECK-NEXT: evlddx 30, 1, 5 # 8-byte Folded Reload +; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 +; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 ; CHECK-NEXT: evldd 29, 248(1) # 8-byte Folded Reload ; CHECK-NEXT: evldd 28, 240(1) # 8-byte Folded Reload ; CHECK-NEXT: evldd 27, 232(1) # 8-byte Folded Reload @@ -1313,8 +1315,6 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* ; CHECK-NEXT: evldd 16, 144(1) # 8-byte Folded Reload ; CHECK-NEXT: evldd 15, 136(1) # 8-byte Folded Reload ; CHECK-NEXT: evldd 14, 128(1) # 8-byte Folded Reload -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 ; CHECK-NEXT: lwz 31, 348(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 30, 344(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 29, 340(1) # 4-byte Folded Reload @@ -1392,8 +1392,8 @@ define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 { ; CHECK-NEXT: # implicit-def: $r5 ; CHECK-NEXT: .LBB57_4: # %for.cond.cleanup ; CHECK-NEXT: evldd 30, 16(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload ; CHECK-NEXT: mr 3, 5 +; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload ; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 0, 52(1) diff --git a/llvm/test/CodeGen/PowerPC/sub-of-not.ll b/llvm/test/CodeGen/PowerPC/sub-of-not.ll index db92a3eb1bee95..d2b55aaf7ac839 100644 --- a/llvm/test/CodeGen/PowerPC/sub-of-not.ll +++ b/llvm/test/CodeGen/PowerPC/sub-of-not.ll @@ -65,88 +65,88 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; PPC32: # %bb.0: ; PPC32-NEXT: stwu 1, -64(1) ; PPC32-NEXT: stw 21, 20(1) # 4-byte Folded Spill -; PPC32-NEXT: lbz 4, 119(1) -; PPC32-NEXT: lbz 11, 115(1) -; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill -; PPC32-NEXT: add 4, 4, 6 ; PPC32-NEXT: lbz 21, 123(1) -; PPC32-NEXT: lbz 6, 131(1) -; PPC32-NEXT: add 5, 11, 5 -; PPC32-NEXT: lbz 11, 127(1) +; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill ; PPC32-NEXT: add 7, 21, 7 +; PPC32-NEXT: lbz 23, 115(1) +; PPC32-NEXT: lbz 22, 119(1) ; PPC32-NEXT: lbz 21, 135(1) -; PPC32-NEXT: lbz 24, 83(1) -; PPC32-NEXT: lbz 23, 79(1) -; PPC32-NEXT: add 6, 6, 9 +; PPC32-NEXT: add 5, 23, 5 +; PPC32-NEXT: lbz 23, 127(1) +; PPC32-NEXT: add 6, 22, 6 +; PPC32-NEXT: lbz 22, 131(1) ; PPC32-NEXT: add 10, 21, 10 -; PPC32-NEXT: lbz 21, 147(1) -; PPC32-NEXT: lbz 9, 143(1) -; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: add 8, 11, 8 -; PPC32-NEXT: lbz 22, 75(1) -; PPC32-NEXT: lbz 11, 139(1) -; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill -; PPC32-NEXT: add 24, 21, 24 -; PPC32-NEXT: lbz 27, 95(1) -; PPC32-NEXT: lbz 21, 159(1) ; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill -; PPC32-NEXT: add 9, 9, 23 -; PPC32-NEXT: lbz 26, 91(1) -; PPC32-NEXT: lbz 23, 155(1) +; PPC32-NEXT: add 8, 23, 8 +; PPC32-NEXT: lbz 26, 83(1) +; PPC32-NEXT: add 9, 22, 9 +; PPC32-NEXT: lbz 21, 147(1) +; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill -; PPC32-NEXT: add 11, 11, 22 -; PPC32-NEXT: lbz 25, 87(1) -; PPC32-NEXT: lbz 22, 151(1) -; PPC32-NEXT: lbz 12, 111(1) -; PPC32-NEXT: add 27, 21, 27 -; PPC32-NEXT: lbz 21, 175(1) -; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill -; PPC32-NEXT: lbz 0, 107(1) +; PPC32-NEXT: add 26, 21, 26 +; PPC32-NEXT: lbz 25, 79(1) +; PPC32-NEXT: lbz 24, 75(1) +; PPC32-NEXT: lbz 23, 139(1) +; PPC32-NEXT: lbz 22, 143(1) ; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill -; PPC32-NEXT: add 26, 23, 26 -; PPC32-NEXT: lbz 30, 171(1) -; PPC32-NEXT: lbz 29, 103(1) -; PPC32-NEXT: lbz 23, 167(1) -; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: add 24, 23, 24 +; PPC32-NEXT: lbz 29, 95(1) ; PPC32-NEXT: add 25, 22, 25 -; PPC32-NEXT: lbz 28, 99(1) -; PPC32-NEXT: lbz 22, 163(1) -; PPC32-NEXT: add 12, 21, 12 -; PPC32-NEXT: add 30, 30, 0 -; PPC32-NEXT: addi 12, 12, 1 -; PPC32-NEXT: add 29, 23, 29 -; PPC32-NEXT: stb 12, 15(3) -; PPC32-NEXT: addi 12, 30, 1 +; PPC32-NEXT: lbz 21, 159(1) +; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: add 29, 21, 29 +; PPC32-NEXT: lbz 28, 91(1) +; PPC32-NEXT: lbz 27, 87(1) +; PPC32-NEXT: lbz 23, 151(1) +; PPC32-NEXT: lbz 22, 155(1) +; PPC32-NEXT: lbz 4, 111(1) +; PPC32-NEXT: add 27, 23, 27 +; PPC32-NEXT: lbz 21, 175(1) ; PPC32-NEXT: add 28, 22, 28 -; PPC32-NEXT: stb 12, 14(3) -; PPC32-NEXT: addi 12, 29, 1 -; PPC32-NEXT: stb 12, 13(3) -; PPC32-NEXT: addi 12, 28, 1 -; PPC32-NEXT: stb 12, 12(3) -; PPC32-NEXT: addi 12, 27, 1 -; PPC32-NEXT: stb 12, 11(3) -; PPC32-NEXT: addi 12, 26, 1 -; PPC32-NEXT: addi 9, 9, 1 -; PPC32-NEXT: addi 6, 6, 1 -; PPC32-NEXT: stb 12, 10(3) -; PPC32-NEXT: addi 12, 25, 1 -; PPC32-NEXT: stb 9, 7(3) -; PPC32-NEXT: addi 9, 11, 1 -; PPC32-NEXT: stb 6, 4(3) -; PPC32-NEXT: addi 6, 8, 1 +; PPC32-NEXT: lbz 11, 107(1) +; PPC32-NEXT: lbz 12, 171(1) +; PPC32-NEXT: add 4, 21, 4 +; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill ; PPC32-NEXT: addi 4, 4, 1 -; PPC32-NEXT: stb 12, 9(3) -; PPC32-NEXT: addi 12, 24, 1 -; PPC32-NEXT: stb 9, 6(3) -; PPC32-NEXT: addi 9, 10, 1 -; PPC32-NEXT: stb 6, 3(3) -; PPC32-NEXT: addi 6, 7, 1 +; PPC32-NEXT: lbz 0, 103(1) +; PPC32-NEXT: add 11, 12, 11 +; PPC32-NEXT: lbz 30, 99(1) +; PPC32-NEXT: lbz 23, 163(1) +; PPC32-NEXT: lbz 22, 167(1) +; PPC32-NEXT: add 30, 23, 30 +; PPC32-NEXT: stb 4, 15(3) +; PPC32-NEXT: add 23, 22, 0 +; PPC32-NEXT: addi 4, 11, 1 +; PPC32-NEXT: stb 4, 14(3) +; PPC32-NEXT: addi 4, 23, 1 +; PPC32-NEXT: stb 4, 13(3) +; PPC32-NEXT: addi 4, 30, 1 +; PPC32-NEXT: stb 4, 12(3) +; PPC32-NEXT: addi 4, 29, 1 +; PPC32-NEXT: stb 4, 11(3) +; PPC32-NEXT: addi 4, 28, 1 +; PPC32-NEXT: stb 4, 10(3) +; PPC32-NEXT: addi 4, 27, 1 +; PPC32-NEXT: stb 4, 9(3) +; PPC32-NEXT: addi 4, 26, 1 +; PPC32-NEXT: stb 4, 8(3) +; PPC32-NEXT: addi 4, 25, 1 +; PPC32-NEXT: stb 4, 7(3) +; PPC32-NEXT: addi 4, 24, 1 +; PPC32-NEXT: stb 4, 6(3) +; PPC32-NEXT: addi 4, 10, 1 +; PPC32-NEXT: stb 4, 5(3) +; PPC32-NEXT: addi 4, 9, 1 +; PPC32-NEXT: stb 4, 4(3) +; PPC32-NEXT: addi 4, 8, 1 +; PPC32-NEXT: stb 4, 3(3) +; PPC32-NEXT: addi 4, 7, 1 +; PPC32-NEXT: stb 4, 2(3) +; PPC32-NEXT: addi 4, 6, 1 ; PPC32-NEXT: stb 4, 1(3) ; PPC32-NEXT: addi 4, 5, 1 -; PPC32-NEXT: stb 12, 8(3) -; PPC32-NEXT: stb 9, 5(3) -; PPC32-NEXT: stb 6, 2(3) ; PPC32-NEXT: stb 4, 0(3) ; PPC32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload ; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload @@ -165,79 +165,79 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; PPC64BE: # %bb.0: ; PPC64BE-NEXT: std 21, -88(1) # 8-byte Folded Spill ; PPC64BE-NEXT: lbz 21, 207(1) -; PPC64BE-NEXT: lbz 11, 199(1) -; PPC64BE-NEXT: lbz 12, 191(1) -; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 22, -80(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 25, -56(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 24, -64(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lbz 0, 183(1) +; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PPC64BE-NEXT: lbz 22, 199(1) +; PPC64BE-NEXT: lbz 23, 191(1) ; PPC64BE-NEXT: add 6, 21, 6 ; PPC64BE-NEXT: lbz 21, 231(1) -; PPC64BE-NEXT: add 5, 11, 5 -; PPC64BE-NEXT: lbz 11, 223(1) -; PPC64BE-NEXT: add 4, 12, 4 -; PPC64BE-NEXT: lbz 12, 215(1) -; PPC64BE-NEXT: lbz 23, 127(1) +; PPC64BE-NEXT: add 5, 22, 5 +; PPC64BE-NEXT: lbz 22, 223(1) +; PPC64BE-NEXT: add 4, 23, 4 +; PPC64BE-NEXT: lbz 23, 215(1) ; PPC64BE-NEXT: add 9, 21, 9 +; PPC64BE-NEXT: lbz 25, 127(1) +; PPC64BE-NEXT: add 8, 22, 8 ; PPC64BE-NEXT: lbz 21, 255(1) -; PPC64BE-NEXT: lbz 22, 119(1) -; PPC64BE-NEXT: add 8, 11, 8 -; PPC64BE-NEXT: lbz 11, 247(1) -; PPC64BE-NEXT: add 7, 12, 7 -; PPC64BE-NEXT: lbz 12, 239(1) -; PPC64BE-NEXT: lbz 26, 151(1) -; PPC64BE-NEXT: add 23, 21, 23 -; PPC64BE-NEXT: lbz 21, 279(1) -; PPC64BE-NEXT: lbz 25, 143(1) -; PPC64BE-NEXT: add 11, 11, 22 -; PPC64BE-NEXT: lbz 22, 271(1) -; PPC64BE-NEXT: lbz 24, 135(1) -; PPC64BE-NEXT: add 10, 12, 10 -; PPC64BE-NEXT: lbz 12, 263(1) -; PPC64BE-NEXT: lbz 30, 175(1) -; PPC64BE-NEXT: lbz 29, 303(1) -; PPC64BE-NEXT: add 26, 21, 26 -; PPC64BE-NEXT: lbz 21, 311(1) -; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 25, 22, 25 -; PPC64BE-NEXT: lbz 28, 167(1) -; PPC64BE-NEXT: lbz 22, 295(1) -; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 12, 12, 24 -; PPC64BE-NEXT: lbz 27, 159(1) -; PPC64BE-NEXT: lbz 24, 287(1) -; PPC64BE-NEXT: add 30, 29, 30 -; PPC64BE-NEXT: add 29, 21, 0 -; PPC64BE-NEXT: addi 0, 29, 1 -; PPC64BE-NEXT: add 28, 22, 28 -; PPC64BE-NEXT: stb 0, 15(3) -; PPC64BE-NEXT: addi 0, 30, 1 -; PPC64BE-NEXT: add 27, 24, 27 -; PPC64BE-NEXT: stb 0, 14(3) -; PPC64BE-NEXT: addi 0, 28, 1 -; PPC64BE-NEXT: stb 0, 13(3) -; PPC64BE-NEXT: addi 0, 27, 1 -; PPC64BE-NEXT: stb 0, 12(3) -; PPC64BE-NEXT: addi 0, 26, 1 -; PPC64BE-NEXT: addi 12, 12, 1 -; PPC64BE-NEXT: stb 0, 11(3) -; PPC64BE-NEXT: addi 0, 25, 1 -; PPC64BE-NEXT: stb 12, 9(3) -; PPC64BE-NEXT: addi 12, 23, 1 -; PPC64BE-NEXT: addi 11, 11, 1 -; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: add 7, 23, 7 +; PPC64BE-NEXT: lbz 24, 119(1) ; PPC64BE-NEXT: addi 9, 9, 1 +; PPC64BE-NEXT: lbz 22, 247(1) +; PPC64BE-NEXT: add 25, 21, 25 +; PPC64BE-NEXT: lbz 23, 239(1) ; PPC64BE-NEXT: addi 8, 8, 1 +; PPC64BE-NEXT: lbz 28, 151(1) +; PPC64BE-NEXT: add 24, 22, 24 +; PPC64BE-NEXT: lbz 21, 279(1) +; PPC64BE-NEXT: add 10, 23, 10 +; PPC64BE-NEXT: lbz 27, 143(1) +; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: lbz 22, 271(1) +; PPC64BE-NEXT: add 28, 21, 28 +; PPC64BE-NEXT: lbz 26, 135(1) ; PPC64BE-NEXT: addi 7, 7, 1 +; PPC64BE-NEXT: lbz 23, 263(1) +; PPC64BE-NEXT: add 27, 22, 27 +; PPC64BE-NEXT: lbz 11, 183(1) ; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: lbz 21, 311(1) +; PPC64BE-NEXT: add 26, 23, 26 +; PPC64BE-NEXT: lbz 12, 175(1) ; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: lbz 0, 303(1) +; PPC64BE-NEXT: add 11, 21, 11 +; PPC64BE-NEXT: lbz 30, 167(1) +; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: lbz 22, 295(1) +; PPC64BE-NEXT: add 12, 0, 12 +; PPC64BE-NEXT: lbz 29, 159(1) ; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: stb 0, 10(3) -; PPC64BE-NEXT: stb 12, 8(3) +; PPC64BE-NEXT: lbz 23, 287(1) +; PPC64BE-NEXT: add 30, 22, 30 +; PPC64BE-NEXT: stb 11, 15(3) +; PPC64BE-NEXT: addi 11, 12, 1 +; PPC64BE-NEXT: add 29, 23, 29 +; PPC64BE-NEXT: stb 11, 14(3) +; PPC64BE-NEXT: addi 11, 30, 1 +; PPC64BE-NEXT: stb 11, 13(3) +; PPC64BE-NEXT: addi 11, 29, 1 +; PPC64BE-NEXT: stb 11, 12(3) +; PPC64BE-NEXT: addi 11, 28, 1 +; PPC64BE-NEXT: stb 11, 11(3) +; PPC64BE-NEXT: addi 11, 27, 1 +; PPC64BE-NEXT: stb 11, 10(3) +; PPC64BE-NEXT: addi 11, 26, 1 +; PPC64BE-NEXT: stb 11, 9(3) +; PPC64BE-NEXT: addi 11, 25, 1 +; PPC64BE-NEXT: stb 11, 8(3) +; PPC64BE-NEXT: addi 11, 24, 1 ; PPC64BE-NEXT: stb 11, 7(3) ; PPC64BE-NEXT: stb 10, 6(3) ; PPC64BE-NEXT: stb 9, 5(3) @@ -277,23 +277,23 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; PPC32-NEXT: stw 28, 16(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 29, 20(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: lhz 11, 50(1) -; PPC32-NEXT: lhz 12, 46(1) -; PPC32-NEXT: lhz 0, 42(1) -; PPC32-NEXT: lhz 30, 70(1) -; PPC32-NEXT: lhz 29, 66(1) -; PPC32-NEXT: lhz 28, 62(1) -; PPC32-NEXT: lhz 27, 58(1) +; PPC32-NEXT: lhz 11, 70(1) +; PPC32-NEXT: lhz 12, 66(1) +; PPC32-NEXT: lhz 0, 62(1) +; PPC32-NEXT: add 10, 11, 10 +; PPC32-NEXT: lhz 30, 58(1) +; PPC32-NEXT: add 9, 12, 9 +; PPC32-NEXT: lhz 29, 50(1) +; PPC32-NEXT: add 8, 0, 8 +; PPC32-NEXT: lhz 28, 42(1) +; PPC32-NEXT: add 7, 30, 7 +; PPC32-NEXT: lhz 27, 46(1) +; PPC32-NEXT: add 5, 29, 5 ; PPC32-NEXT: lhz 26, 54(1) -; PPC32-NEXT: add 3, 0, 3 -; PPC32-NEXT: add 4, 12, 4 -; PPC32-NEXT: add 5, 11, 5 -; PPC32-NEXT: add 6, 26, 6 -; PPC32-NEXT: add 7, 27, 7 -; PPC32-NEXT: add 8, 28, 8 -; PPC32-NEXT: add 9, 29, 9 -; PPC32-NEXT: add 10, 30, 10 +; PPC32-NEXT: add 3, 28, 3 +; PPC32-NEXT: add 4, 27, 4 ; PPC32-NEXT: addi 3, 3, 1 +; PPC32-NEXT: add 6, 26, 6 ; PPC32-NEXT: addi 4, 4, 1 ; PPC32-NEXT: addi 5, 5, 1 ; PPC32-NEXT: addi 6, 6, 1 @@ -317,31 +317,31 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lhz 11, 142(1) -; PPC64BE-NEXT: lhz 12, 134(1) -; PPC64BE-NEXT: lhz 0, 126(1) -; PPC64BE-NEXT: lhz 30, 118(1) -; PPC64BE-NEXT: lhz 29, 182(1) -; PPC64BE-NEXT: lhz 28, 174(1) -; PPC64BE-NEXT: lhz 27, 166(1) -; PPC64BE-NEXT: lhz 26, 158(1) +; PPC64BE-NEXT: lhz 11, 118(1) +; PPC64BE-NEXT: lhz 12, 182(1) +; PPC64BE-NEXT: lhz 0, 174(1) +; PPC64BE-NEXT: lhz 30, 166(1) +; PPC64BE-NEXT: add 11, 12, 11 +; PPC64BE-NEXT: lhz 29, 158(1) +; PPC64BE-NEXT: add 10, 0, 10 +; PPC64BE-NEXT: lhz 28, 142(1) +; PPC64BE-NEXT: add 9, 30, 9 +; PPC64BE-NEXT: lhz 27, 126(1) +; PPC64BE-NEXT: add 8, 29, 8 +; PPC64BE-NEXT: lhz 26, 134(1) +; PPC64BE-NEXT: add 6, 28, 6 ; PPC64BE-NEXT: lhz 25, 150(1) -; PPC64BE-NEXT: add 4, 0, 4 -; PPC64BE-NEXT: add 5, 12, 5 -; PPC64BE-NEXT: add 6, 11, 6 +; PPC64BE-NEXT: add 4, 27, 4 +; PPC64BE-NEXT: add 5, 26, 5 +; PPC64BE-NEXT: addi 11, 11, 1 ; PPC64BE-NEXT: add 7, 25, 7 -; PPC64BE-NEXT: add 8, 26, 8 -; PPC64BE-NEXT: add 9, 27, 9 -; PPC64BE-NEXT: add 10, 28, 10 -; PPC64BE-NEXT: add 11, 29, 30 -; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: addi 5, 5, 1 -; PPC64BE-NEXT: addi 6, 6, 1 -; PPC64BE-NEXT: addi 7, 7, 1 -; PPC64BE-NEXT: addi 8, 8, 1 -; PPC64BE-NEXT: addi 9, 9, 1 ; PPC64BE-NEXT: addi 10, 10, 1 -; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: addi 9, 9, 1 +; PPC64BE-NEXT: addi 8, 8, 1 +; PPC64BE-NEXT: addi 7, 7, 1 +; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: addi 4, 4, 1 ; PPC64BE-NEXT: sth 11, 14(3) ; PPC64BE-NEXT: sth 10, 12(3) ; PPC64BE-NEXT: sth 9, 10(3) diff --git a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll index c0a8a76c7f1af2..815d5b7443e430 100644 --- a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll @@ -5,23 +5,23 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; PPC64-LABEL: muloti_test: ; PPC64: # %bb.0: # %start +; PPC64-NEXT: mulhdu. 8, 3, 6 +; PPC64-NEXT: mcrf 1, 0 ; PPC64-NEXT: mulld 8, 5, 4 -; PPC64-NEXT: cmpdi 5, 3, 0 -; PPC64-NEXT: mulhdu. 9, 3, 6 +; PPC64-NEXT: cmpdi 3, 0 ; PPC64-NEXT: mulld 3, 3, 6 -; PPC64-NEXT: mcrf 1, 0 +; PPC64-NEXT: cmpdi 5, 5, 0 ; PPC64-NEXT: add 3, 3, 8 -; PPC64-NEXT: cmpdi 5, 0 -; PPC64-NEXT: crnor 20, 2, 22 -; PPC64-NEXT: cmpldi 3, 0 +; PPC64-NEXT: crnor 20, 22, 2 ; PPC64-NEXT: mulhdu 8, 4, 6 +; PPC64-NEXT: cmpldi 3, 0 ; PPC64-NEXT: add 3, 8, 3 ; PPC64-NEXT: cmpld 6, 3, 8 ; PPC64-NEXT: crandc 21, 24, 2 ; PPC64-NEXT: crorc 20, 20, 6 -; PPC64-NEXT: li 7, 1 ; PPC64-NEXT: mulhdu. 5, 5, 4 ; PPC64-NEXT: crorc 20, 20, 2 +; PPC64-NEXT: li 7, 1 ; PPC64-NEXT: crnor 20, 20, 21 ; PPC64-NEXT: mulld 4, 4, 6 ; PPC64-NEXT: bc 12, 20, .LBB0_2 @@ -38,13 +38,13 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; PPC32-NEXT: stw 0, 4(1) ; PPC32-NEXT: stwu 1, -80(1) ; PPC32-NEXT: stw 26, 56(1) # 4-byte Folded Spill +; PPC32-NEXT: mfcr 12 ; PPC32-NEXT: stw 27, 60(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 27, 4 ; PPC32-NEXT: stw 29, 68(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 29, 7 ; PPC32-NEXT: stw 30, 72(1) # 4-byte Folded Spill -; PPC32-NEXT: mfcr 12 ; PPC32-NEXT: mr 30, 8 -; PPC32-NEXT: mr 29, 7 -; PPC32-NEXT: mr 27, 4 ; PPC32-NEXT: mr 26, 3 ; PPC32-NEXT: li 3, 0 ; PPC32-NEXT: li 4, 0 @@ -54,30 +54,36 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; PPC32-NEXT: stw 21, 36(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 22, 40(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 23, 44(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 23, 6 ; PPC32-NEXT: stw 24, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 24, 5 ; PPC32-NEXT: stw 25, 52(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 28, 64(1) # 4-byte Folded Spill ; PPC32-NEXT: mr 25, 10 -; PPC32-NEXT: stw 12, 28(1) +; PPC32-NEXT: stw 28, 64(1) # 4-byte Folded Spill ; PPC32-NEXT: mr 28, 9 -; PPC32-NEXT: mr 23, 6 -; PPC32-NEXT: mr 24, 5 +; PPC32-NEXT: stw 12, 28(1) ; PPC32-NEXT: bl __multi3 ; PPC32-NEXT: mr 7, 4 ; PPC32-NEXT: mullw 4, 24, 30 +; PPC32-NEXT: cmpwi 5, 24, 0 +; PPC32-NEXT: cmpwi 6, 26, 0 +; PPC32-NEXT: cmpwi 7, 28, 0 +; PPC32-NEXT: crnor 9, 30, 26 ; PPC32-NEXT: mullw 8, 29, 23 -; PPC32-NEXT: mullw 10, 28, 27 -; PPC32-NEXT: mullw 11, 26, 25 +; PPC32-NEXT: add 21, 8, 4 +; PPC32-NEXT: mullw 11, 28, 27 +; PPC32-NEXT: mullw 12, 26, 25 +; PPC32-NEXT: add 11, 12, 11 +; PPC32-NEXT: cmplwi 7, 11, 0 ; PPC32-NEXT: mulhwu 9, 30, 23 -; PPC32-NEXT: mulhwu 12, 27, 25 +; PPC32-NEXT: add 12, 9, 21 +; PPC32-NEXT: cmplw 6, 12, 9 +; PPC32-NEXT: mulhwu 10, 27, 25 ; PPC32-NEXT: mullw 0, 30, 23 ; PPC32-NEXT: mullw 22, 27, 25 -; PPC32-NEXT: add 21, 8, 4 -; PPC32-NEXT: add 10, 11, 10 ; PPC32-NEXT: addc 4, 22, 0 -; PPC32-NEXT: add 11, 9, 21 -; PPC32-NEXT: add 0, 12, 10 -; PPC32-NEXT: adde 8, 0, 11 +; PPC32-NEXT: add 0, 10, 11 +; PPC32-NEXT: adde 8, 0, 12 ; PPC32-NEXT: addc 4, 7, 4 ; PPC32-NEXT: adde 8, 3, 8 ; PPC32-NEXT: xor 22, 4, 7 @@ -85,21 +91,15 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; PPC32-NEXT: or. 22, 22, 20 ; PPC32-NEXT: mcrf 1, 0 ; PPC32-NEXT: cmpwi 29, 0 -; PPC32-NEXT: cmpwi 5, 24, 0 -; PPC32-NEXT: cmpwi 6, 26, 0 -; PPC32-NEXT: cmpwi 7, 28, 0 ; PPC32-NEXT: crnor 8, 22, 2 ; PPC32-NEXT: mulhwu. 23, 29, 23 -; PPC32-NEXT: crnor 9, 30, 26 ; PPC32-NEXT: mcrf 5, 0 ; PPC32-NEXT: cmplwi 21, 0 -; PPC32-NEXT: cmplw 6, 11, 9 -; PPC32-NEXT: cmplwi 7, 10, 0 ; PPC32-NEXT: crandc 10, 24, 2 -; PPC32-NEXT: cmplw 3, 0, 12 +; PPC32-NEXT: cmplw 3, 0, 10 +; PPC32-NEXT: crandc 11, 12, 30 ; PPC32-NEXT: mulhwu. 9, 24, 30 ; PPC32-NEXT: mcrf 6, 0 -; PPC32-NEXT: crandc 11, 12, 30 ; PPC32-NEXT: cmplw 4, 7 ; PPC32-NEXT: cmplw 7, 8, 3 ; PPC32-NEXT: crand 12, 30, 0 diff --git a/llvm/test/CodeGen/PowerPC/vec_splat.ll b/llvm/test/CodeGen/PowerPC/vec_splat.ll index 7c048ff3710858..0e6626bbce2315 100644 --- a/llvm/test/CodeGen/PowerPC/vec_splat.ll +++ b/llvm/test/CodeGen/PowerPC/vec_splat.ll @@ -10,17 +10,17 @@ define void @splat(%f4* %P, %f4* %Q, float %X) nounwind { ; G3-LABEL: splat: ; G3: # %bb.0: -; G3-NEXT: lfs 0, 0(4) +; G3-NEXT: lfs 0, 12(4) ; G3-NEXT: lfs 2, 8(4) ; G3-NEXT: lfs 3, 4(4) -; G3-NEXT: lfs 4, 12(4) ; G3-NEXT: fadds 0, 0, 1 -; G3-NEXT: fadds 2, 2, 1 -; G3-NEXT: fadds 3, 3, 1 -; G3-NEXT: fadds 1, 4, 1 -; G3-NEXT: stfs 1, 12(3) -; G3-NEXT: stfs 2, 8(3) -; G3-NEXT: stfs 3, 4(3) +; G3-NEXT: lfs 4, 0(4) +; G3-NEXT: stfs 0, 12(3) +; G3-NEXT: fadds 0, 2, 1 +; G3-NEXT: stfs 0, 8(3) +; G3-NEXT: fadds 0, 3, 1 +; G3-NEXT: stfs 0, 4(3) +; G3-NEXT: fadds 0, 4, 1 ; G3-NEXT: stfs 0, 0(3) ; G3-NEXT: blr ; @@ -49,18 +49,18 @@ define void @splat(%f4* %P, %f4* %Q, float %X) nounwind { define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind { ; G3-LABEL: splat_i4: ; G3: # %bb.0: -; G3-NEXT: lwz 6, 0(4) +; G3-NEXT: lwz 6, 12(4) ; G3-NEXT: lwz 7, 8(4) ; G3-NEXT: lwz 8, 4(4) -; G3-NEXT: lwz 4, 12(4) ; G3-NEXT: add 6, 6, 5 -; G3-NEXT: add 8, 8, 5 -; G3-NEXT: add 7, 7, 5 +; G3-NEXT: lwz 4, 0(4) +; G3-NEXT: stw 6, 12(3) +; G3-NEXT: add 6, 7, 5 +; G3-NEXT: stw 6, 8(3) +; G3-NEXT: add 6, 8, 5 ; G3-NEXT: add 4, 4, 5 -; G3-NEXT: stw 4, 12(3) -; G3-NEXT: stw 7, 8(3) -; G3-NEXT: stw 8, 4(3) -; G3-NEXT: stw 6, 0(3) +; G3-NEXT: stw 6, 4(3) +; G3-NEXT: stw 4, 0(3) ; G3-NEXT: blr ; ; G5-LABEL: splat_i4: @@ -88,18 +88,18 @@ define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind { define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind { ; G3-LABEL: splat_imm_i32: ; G3: # %bb.0: -; G3-NEXT: lwz 5, 0(4) +; G3-NEXT: lwz 5, 12(4) ; G3-NEXT: lwz 6, 8(4) ; G3-NEXT: lwz 7, 4(4) -; G3-NEXT: lwz 4, 12(4) ; G3-NEXT: addi 5, 5, -1 -; G3-NEXT: addi 7, 7, -1 -; G3-NEXT: addi 6, 6, -1 +; G3-NEXT: lwz 4, 0(4) +; G3-NEXT: stw 5, 12(3) +; G3-NEXT: addi 5, 6, -1 +; G3-NEXT: stw 5, 8(3) +; G3-NEXT: addi 5, 7, -1 ; G3-NEXT: addi 4, 4, -1 -; G3-NEXT: stw 4, 12(3) -; G3-NEXT: stw 6, 8(3) -; G3-NEXT: stw 7, 4(3) -; G3-NEXT: stw 5, 0(3) +; G3-NEXT: stw 5, 4(3) +; G3-NEXT: stw 4, 0(3) ; G3-NEXT: blr ; ; G5-LABEL: splat_imm_i32: @@ -118,22 +118,22 @@ define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind { define void @splat_imm_i16(%i4* %P, %i4* %Q, i32 %X) nounwind { ; G3-LABEL: splat_imm_i16: ; G3: # %bb.0: -; G3-NEXT: lwz 5, 0(4) -; G3-NEXT: lwz 6, 8(4) +; G3-NEXT: lwz 5, 8(4) +; G3-NEXT: lwz 6, 0(4) ; G3-NEXT: lwz 7, 4(4) -; G3-NEXT: lwz 4, 12(4) ; G3-NEXT: addi 5, 5, 1 -; G3-NEXT: addi 7, 7, 1 +; G3-NEXT: lwz 4, 12(4) ; G3-NEXT: addi 6, 6, 1 +; G3-NEXT: addi 7, 7, 1 ; G3-NEXT: addi 4, 4, 1 -; G3-NEXT: addis 5, 5, 1 -; G3-NEXT: addis 7, 7, 1 -; G3-NEXT: addis 6, 6, 1 ; G3-NEXT: addis 4, 4, 1 ; G3-NEXT: stw 4, 12(3) -; G3-NEXT: stw 6, 8(3) -; G3-NEXT: stw 7, 4(3) -; G3-NEXT: stw 5, 0(3) +; G3-NEXT: addis 4, 5, 1 +; G3-NEXT: stw 4, 8(3) +; G3-NEXT: addis 4, 7, 1 +; G3-NEXT: stw 4, 4(3) +; G3-NEXT: addis 4, 6, 1 +; G3-NEXT: stw 4, 0(3) ; G3-NEXT: blr ; ; G5-LABEL: splat_imm_i16: @@ -189,58 +189,60 @@ define void @spltish(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; G3-LABEL: spltish: ; G3: # %bb.0: ; G3-NEXT: stwu 1, -48(1) +; G3-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; G3-NEXT: lbz 5, 0(4) +; G3-NEXT: lbz 30, 15(4) +; G3-NEXT: stw 29, 36(1) # 4-byte Folded Spill +; G3-NEXT: lbz 29, 13(4) +; G3-NEXT: stw 28, 32(1) # 4-byte Folded Spill +; G3-NEXT: lbz 28, 11(4) +; G3-NEXT: stw 27, 28(1) # 4-byte Folded Spill +; G3-NEXT: lbz 27, 9(4) +; G3-NEXT: stw 24, 16(1) # 4-byte Folded Spill ; G3-NEXT: stw 25, 20(1) # 4-byte Folded Spill ; G3-NEXT: stw 26, 24(1) # 4-byte Folded Spill -; G3-NEXT: stw 27, 28(1) # 4-byte Folded Spill -; G3-NEXT: stw 28, 32(1) # 4-byte Folded Spill -; G3-NEXT: stw 29, 36(1) # 4-byte Folded Spill -; G3-NEXT: stw 30, 40(1) # 4-byte Folded Spill -; G3-NEXT: lbz 5, 5(4) -; G3-NEXT: lbz 6, 3(4) -; G3-NEXT: lbz 7, 1(4) -; G3-NEXT: lbz 8, 0(4) -; G3-NEXT: lbz 9, 2(4) -; G3-NEXT: lbz 10, 4(4) -; G3-NEXT: lbz 11, 6(4) -; G3-NEXT: lbz 12, 8(4) -; G3-NEXT: lbz 0, 10(4) -; G3-NEXT: addi 7, 7, -15 -; G3-NEXT: lbz 30, 12(4) -; G3-NEXT: lbz 29, 14(4) -; G3-NEXT: lbz 28, 15(4) -; G3-NEXT: lbz 27, 13(4) -; G3-NEXT: lbz 26, 11(4) -; G3-NEXT: lbz 25, 9(4) -; G3-NEXT: addi 6, 6, -15 -; G3-NEXT: lbz 4, 7(4) -; G3-NEXT: addi 5, 5, -15 -; G3-NEXT: addi 25, 25, -15 -; G3-NEXT: addi 26, 26, -15 -; G3-NEXT: addi 4, 4, -15 -; G3-NEXT: addi 27, 27, -15 -; G3-NEXT: addi 28, 28, -15 -; G3-NEXT: stb 29, 14(3) -; G3-NEXT: stb 30, 12(3) -; G3-NEXT: stb 0, 10(3) -; G3-NEXT: stb 12, 8(3) -; G3-NEXT: stb 11, 6(3) -; G3-NEXT: stb 10, 4(3) -; G3-NEXT: stb 9, 2(3) -; G3-NEXT: stb 8, 0(3) -; G3-NEXT: stb 28, 15(3) -; G3-NEXT: stb 27, 13(3) -; G3-NEXT: stb 26, 11(3) -; G3-NEXT: stb 25, 9(3) -; G3-NEXT: stb 4, 7(3) +; G3-NEXT: lbz 6, 2(4) +; G3-NEXT: lbz 7, 4(4) +; G3-NEXT: lbz 8, 6(4) +; G3-NEXT: lbz 9, 8(4) +; G3-NEXT: lbz 10, 10(4) +; G3-NEXT: lbz 11, 12(4) +; G3-NEXT: lbz 12, 14(4) +; G3-NEXT: lbz 26, 7(4) +; G3-NEXT: lbz 25, 5(4) +; G3-NEXT: lbz 24, 3(4) +; G3-NEXT: lbz 4, 1(4) +; G3-NEXT: stb 5, 0(3) +; G3-NEXT: addi 5, 30, -15 +; G3-NEXT: stb 5, 15(3) +; G3-NEXT: addi 5, 29, -15 +; G3-NEXT: stb 5, 13(3) +; G3-NEXT: addi 5, 28, -15 +; G3-NEXT: stb 5, 11(3) +; G3-NEXT: addi 5, 27, -15 +; G3-NEXT: stb 5, 9(3) +; G3-NEXT: addi 5, 26, -15 +; G3-NEXT: stb 5, 7(3) +; G3-NEXT: addi 5, 25, -15 ; G3-NEXT: stb 5, 5(3) -; G3-NEXT: stb 6, 3(3) -; G3-NEXT: stb 7, 1(3) +; G3-NEXT: addi 5, 24, -15 +; G3-NEXT: addi 4, 4, -15 +; G3-NEXT: stb 12, 14(3) +; G3-NEXT: stb 11, 12(3) +; G3-NEXT: stb 10, 10(3) +; G3-NEXT: stb 9, 8(3) +; G3-NEXT: stb 8, 6(3) +; G3-NEXT: stb 7, 4(3) +; G3-NEXT: stb 6, 2(3) +; G3-NEXT: stb 5, 3(3) +; G3-NEXT: stb 4, 1(3) ; G3-NEXT: lwz 30, 40(1) # 4-byte Folded Reload ; G3-NEXT: lwz 29, 36(1) # 4-byte Folded Reload ; G3-NEXT: lwz 28, 32(1) # 4-byte Folded Reload ; G3-NEXT: lwz 27, 28(1) # 4-byte Folded Reload ; G3-NEXT: lwz 26, 24(1) # 4-byte Folded Reload ; G3-NEXT: lwz 25, 20(1) # 4-byte Folded Reload +; G3-NEXT: lwz 24, 16(1) # 4-byte Folded Reload ; G3-NEXT: addi 1, 1, 48 ; G3-NEXT: blr ; diff --git a/llvm/test/CodeGen/X86/immediate_merging.ll b/llvm/test/CodeGen/X86/immediate_merging.ll index 1bed1014f94e39..038c56f6dd5dd5 100644 --- a/llvm/test/CodeGen/X86/immediate_merging.ll +++ b/llvm/test/CodeGen/X86/immediate_merging.ll @@ -12,16 +12,16 @@ @i = common global i32 0, align 4 ; Test -Os to make sure immediates with multiple users don't get pulled in to -; instructions. +; instructions (8-bit immediates are exceptions). + define i32 @foo() optsize { ; X86-LABEL: foo: ; X86: # %bb.0: # %entry ; X86-NEXT: movl $1234, %eax # imm = 0x4D2 ; X86-NEXT: movl %eax, a ; X86-NEXT: movl %eax, b -; X86-NEXT: movl $12, %eax -; X86-NEXT: movl %eax, c -; X86-NEXT: cmpl %eax, e +; X86-NEXT: movl $12, c +; X86-NEXT: cmpl $12, e ; X86-NEXT: jne .LBB0_2 ; X86-NEXT: # %bb.1: # %if.then ; X86-NEXT: movl $1, x @@ -38,9 +38,8 @@ define i32 @foo() optsize { ; X64-NEXT: movl $1234, %eax # imm = 0x4D2 ; X64-NEXT: movl %eax, {{.*}}(%rip) ; X64-NEXT: movl %eax, {{.*}}(%rip) -; X64-NEXT: movl $12, %eax -; X64-NEXT: movl %eax, {{.*}}(%rip) -; X64-NEXT: cmpl %eax, {{.*}}(%rip) +; X64-NEXT: movl $12, {{.*}}(%rip) +; X64-NEXT: cmpl $12, {{.*}}(%rip) ; X64-NEXT: jne .LBB0_2 ; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl $1, {{.*}}(%rip) @@ -74,16 +73,16 @@ if.end: ; preds = %if.then, %entry } ; Test PGSO to make sure immediates with multiple users don't get pulled in to -; instructions. +; instructions (8-bit immediates are exceptions). + define i32 @foo_pgso() !prof !14 { ; X86-LABEL: foo_pgso: ; X86: # %bb.0: # %entry ; X86-NEXT: movl $1234, %eax # imm = 0x4D2 ; X86-NEXT: movl %eax, a ; X86-NEXT: movl %eax, b -; X86-NEXT: movl $12, %eax -; X86-NEXT: movl %eax, c -; X86-NEXT: cmpl %eax, e +; X86-NEXT: movl $12, c +; X86-NEXT: cmpl $12, e ; X86-NEXT: jne .LBB1_2 ; X86-NEXT: # %bb.1: # %if.then ; X86-NEXT: movl $1, x @@ -100,9 +99,8 @@ define i32 @foo_pgso() !prof !14 { ; X64-NEXT: movl $1234, %eax # imm = 0x4D2 ; X64-NEXT: movl %eax, {{.*}}(%rip) ; X64-NEXT: movl %eax, {{.*}}(%rip) -; X64-NEXT: movl $12, %eax -; X64-NEXT: movl %eax, {{.*}}(%rip) -; X64-NEXT: cmpl %eax, {{.*}}(%rip) +; X64-NEXT: movl $12, {{.*}}(%rip) +; X64-NEXT: cmpl $12, {{.*}}(%rip) ; X64-NEXT: jne .LBB1_2 ; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl $1, {{.*}}(%rip) diff --git a/llvm/test/CodeGen/X86/immediate_merging64.ll b/llvm/test/CodeGen/X86/immediate_merging64.ll index a807a119e89353..d355bea1603a5e 100644 --- a/llvm/test/CodeGen/X86/immediate_merging64.ll +++ b/llvm/test/CodeGen/X86/immediate_merging64.ll @@ -5,13 +5,13 @@ ; 32-bit immediates are merged for code size savings. ; Immediates with multiple users should not be pulled into instructions when -; optimizing for code size. +; optimizing for code size (but 8-bit immediates are exceptions). + define i1 @imm_multiple_users(i64 %a, i64* %b) optsize { ; CHECK-LABEL: imm_multiple_users: ; CHECK: # %bb.0: -; CHECK-NEXT: movq $-1, %rax -; CHECK-NEXT: movq %rax, (%rsi) -; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: movq $-1, (%rsi) +; CHECK-NEXT: cmpq $-1, %rdi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq store i64 -1, i64* %b, align 8 @@ -22,9 +22,8 @@ define i1 @imm_multiple_users(i64 %a, i64* %b) optsize { define i1 @imm_multiple_users_pgso(i64 %a, i64* %b) !prof !14 { ; CHECK-LABEL: imm_multiple_users_pgso: ; CHECK: # %bb.0: -; CHECK-NEXT: movq $-1, %rax -; CHECK-NEXT: movq %rax, (%rsi) -; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: movq $-1, (%rsi) +; CHECK-NEXT: cmpq $-1, %rdi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq store i64 -1, i64* %b, align 8 diff --git a/llvm/test/CodeGen/X86/pr27202.ll b/llvm/test/CodeGen/X86/pr27202.ll index ea5781ed8c5fcd..bb6be1d1685da5 100644 --- a/llvm/test/CodeGen/X86/pr27202.ll +++ b/llvm/test/CodeGen/X86/pr27202.ll @@ -14,12 +14,14 @@ define i1 @foo(i32 %i) optsize { ret i1 %cmp } +; 8-bit ALU immediates probably have small encodings. +; We do not want to hoist the constant into a register here. + define zeroext i1 @g(i32 %x) optsize { ; CHECK-LABEL: g: ; CHECK: # %bb.0: -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: orl $1, %edi +; CHECK-NEXT: cmpl $1, %edi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %t0 = or i32 %x, 1 @@ -27,7 +29,7 @@ define zeroext i1 @g(i32 %x) optsize { ret i1 %t1 } -; 8-bit immediates probably have small encodings. +; 8-bit ALU immediates probably have small encodings. ; We do not want to hoist the constant into a register here. define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize { @@ -36,9 +38,8 @@ define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize { ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: shll $6, %eax ; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: movl $7, %eax -; CHECK-NEXT: andq %rax, %rsi -; CHECK-NEXT: andq %rax, %rdx +; CHECK-NEXT: andl $7, %esi +; CHECK-NEXT: andl $7, %edx ; CHECK-NEXT: leaq (%rdx,%rsi,8), %rax ; CHECK-NEXT: orq %rcx, %rax ; CHECK-NEXT: retq diff --git a/llvm/test/FileCheck/comment/after-words.txt b/llvm/test/FileCheck/comment/after-words.txt index 46eeb657f0157d..3650f959be3fa8 100644 --- a/llvm/test/FileCheck/comment/after-words.txt +++ b/llvm/test/FileCheck/comment/after-words.txt @@ -8,7 +8,7 @@ RUN: echo 'FOO-COM: CHECK: foo' > %t.chk RUN: echo 'RUN_COM: CHECK: bar' >> %t.chk RUN: echo 'RUN3COM: CHECK: foo' >> %t.chk RUN: echo ' COMRUN: CHECK: bar' >> %t.chk -RUN: %ProtectFileCheckOutput FileCheck -vv %t.chk < %t.in 2>&1 | FileCheck %s +RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv %t.chk < %t.in 2>&1 | FileCheck %s CHECK: .chk:1:17: remark: CHECK: expected string found in input CHECK: .chk:2:17: remark: CHECK: expected string found in input diff --git a/llvm/test/FileCheck/comment/blank-comments.txt b/llvm/test/FileCheck/comment/blank-comments.txt index b035ddd750d09e..1bad6d2daada51 100644 --- a/llvm/test/FileCheck/comment/blank-comments.txt +++ b/llvm/test/FileCheck/comment/blank-comments.txt @@ -4,6 +4,6 @@ RUN: echo 'foo' > %t.in RUN: echo 'COM:' > %t.chk RUN: echo 'CHECK: foo' >> %t.chk RUN: echo ' COM: ' >> %t.chk -RUN: %ProtectFileCheckOutput FileCheck -vv %t.chk < %t.in 2>&1 | FileCheck %s +RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv %t.chk < %t.in 2>&1 | FileCheck %s CHECK: .chk:2:8: remark: CHECK: expected string found in input diff --git a/llvm/test/FileCheck/comment/suffixes.txt b/llvm/test/FileCheck/comment/suffixes.txt index 47805b46d0c94e..85b05fb5778cf3 100644 --- a/llvm/test/FileCheck/comment/suffixes.txt +++ b/llvm/test/FileCheck/comment/suffixes.txt @@ -6,7 +6,7 @@ RUN: echo bar >> %t.in RUN: echo 'COM-NEXT: CHECK: foo' > %t.chk RUN: echo 'RUN-NOT: CHECK: bar' >> %t.chk -RUN: %ProtectFileCheckOutput FileCheck -vv %t.chk < %t.in 2>&1 | \ +RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv %t.chk < %t.in 2>&1 | \ RUN: FileCheck -check-prefix=CHECK1 %s CHECK1: .chk:1:18: remark: CHECK: expected string found in input @@ -15,7 +15,7 @@ CHECK1: .chk:2:17: remark: CHECK: expected string found in input # But we can define them as comment prefixes. RUN: %ProtectFileCheckOutput \ -RUN: FileCheck -vv -comment-prefixes=COM,RUN,RUN-NOT %t.chk < %t.in 2>&1 | \ +RUN: FileCheck -dump-input=never -vv -comment-prefixes=COM,RUN,RUN-NOT %t.chk < %t.in 2>&1 | \ RUN: FileCheck -check-prefix=CHECK2 %s CHECK2: .chk:1:18: remark: CHECK: expected string found in input diff --git a/llvm/test/FileCheck/comment/suppresses-checks.txt b/llvm/test/FileCheck/comment/suppresses-checks.txt index 98f01811f53f4d..a58a040b5d39a7 100644 --- a/llvm/test/FileCheck/comment/suppresses-checks.txt +++ b/llvm/test/FileCheck/comment/suppresses-checks.txt @@ -7,7 +7,7 @@ RUN: echo 'foo' > %t-1.in RUN: echo 'COM: CHECK: bar' > %t-1.chk RUN: echo 'CHECK: foo' >> %t-1.chk RUN: echo 'RUN: echo "CHECK: baz"' >> %t-1.chk -RUN: %ProtectFileCheckOutput FileCheck -vv %t-1.chk < %t-1.in 2>&1 | \ +RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv %t-1.chk < %t-1.in 2>&1 | \ RUN: FileCheck -DCHECK_LINE=2 %s # Check the case of one user-specified comment prefix. @@ -16,7 +16,7 @@ RUN: echo 'foo' > %t-2.in RUN: echo 'CHECK: foo' > %t-2.chk RUN: echo 'letters then space MY-PREFIX: CHECK: bar' >> %t-2.chk RUN: %ProtectFileCheckOutput \ -RUN: FileCheck -vv %t-2.chk -comment-prefixes=MY-PREFIX < %t-2.in 2>&1 | \ +RUN: FileCheck -dump-input=never -vv %t-2.chk -comment-prefixes=MY-PREFIX < %t-2.in 2>&1 | \ RUN: FileCheck -DCHECK_LINE=1 %s # Check the case of multiple user-specified comment prefixes. @@ -26,7 +26,7 @@ RUN: echo 'CHECK: foo' >> %t-3.chk RUN: echo 'Foo_1: CHECK: Foo' >> %t-3.chk RUN: echo 'Baz_3: CHECK: Baz' >> %t-3.chk RUN: %ProtectFileCheckOutput \ -RUN: FileCheck -vv %t-3.chk -comment-prefixes=Foo_1,Bar_2 \ +RUN: FileCheck -dump-input=never -vv %t-3.chk -comment-prefixes=Foo_1,Bar_2 \ RUN: -comment-prefixes=Baz_3 < %t-3.in 2>&1 | \ RUN: FileCheck -DCHECK_LINE=2 %s diff --git a/llvm/test/FileCheck/comment/unused-comment-prefixes.txt b/llvm/test/FileCheck/comment/unused-comment-prefixes.txt index 29212ecb6aadce..5dadc8f3569d5c 100644 --- a/llvm/test/FileCheck/comment/unused-comment-prefixes.txt +++ b/llvm/test/FileCheck/comment/unused-comment-prefixes.txt @@ -5,12 +5,12 @@ RUN: echo 'CHECK: foo' > %t.chk # Check the case of default comment prefixes. RUN: %ProtectFileCheckOutput \ -RUN: FileCheck -vv %t.chk < %t.in 2>&1 | FileCheck %s +RUN: FileCheck -dump-input=never -vv %t.chk < %t.in 2>&1 | FileCheck %s # Specifying non-default comment prefixes doesn't mean you have to use them. # For example, they might be applied to an entire test suite via # FILECHECK_OPTS or via a wrapper command or substitution. RUN: %ProtectFileCheckOutput \ -RUN: FileCheck -vv -comment-prefixes=FOO %t.chk < %t.in 2>&1 | FileCheck %s +RUN: FileCheck -dump-input=never -vv -comment-prefixes=FOO %t.chk < %t.in 2>&1 | FileCheck %s CHECK: .chk:1:8: remark: CHECK: expected string found in input diff --git a/llvm/test/FileCheck/dump-input-enable.txt b/llvm/test/FileCheck/dump-input-enable.txt index cf47f03dfa835b..48a6eef417154e 100644 --- a/llvm/test/FileCheck/dump-input-enable.txt +++ b/llvm/test/FileCheck/dump-input-enable.txt @@ -74,20 +74,22 @@ BADVAL: {{F|f}}ile{{C|c}}heck{{.*}}: for the --dump-input option: Cannot find op ; RUN: | FileCheck %s -match-full-lines -check-prefixes=TRACE,ERR,NODUMP ;-------------------------------------------------- -; Check no -dump-input, which defaults to never. +; Check no -dump-input, which defaults to fail. ;-------------------------------------------------- -; FileCheck success, -v => no dump, trace. +; FileCheck success, -v => no dump, no trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -v 2>&1 \ -; RUN: | FileCheck %s -match-full-lines -check-prefixes=TRACE,NODUMP +; RUN: | FileCheck %s -match-full-lines -allow-empty \ +; RUN: -check-prefixes=NOTRACE,NODUMP -; FileCheck fail, -v => no dump, trace. +; FileCheck fail, -v => dump, no trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -v 2>&1 \ -; RUN: | FileCheck %s -match-full-lines -check-prefixes=TRACE,ERR,NODUMP +; RUN: | FileCheck %s -match-full-lines \ +; RUN: -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V ;-------------------------------------------------- ; Check -dump-input=fail. @@ -122,42 +124,6 @@ BADVAL: {{F|f}}ile{{C|c}}heck{{.*}}: for the --dump-input option: Cannot find op ; RUN: | FileCheck %s -match-full-lines \ ; RUN: -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V -;-------------------------------------------------- -; Check -dump-input-on-failure. -;-------------------------------------------------- - -; Command-line option. - -; FileCheck success, -v => no dump, no trace. -; RUN: %ProtectFileCheckOutput \ -; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ -; RUN: -match-full-lines -dump-input-on-failure -v 2>&1 \ -; RUN: | FileCheck %s -match-full-lines -allow-empty \ -; RUN: -check-prefixes=NOTRACE,NODUMP - -; FileCheck fail, -v => dump, no trace. -; RUN: %ProtectFileCheckOutput \ -; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ -; RUN: -match-full-lines -dump-input-on-failure -v 2>&1 \ -; RUN: | FileCheck %s -match-full-lines \ -; RUN: -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V - -; FILECHECK_DUMP_INPUT_ON_FAILURE=1. - -; FileCheck success, -v => no dump, no trace. -; RUN: %ProtectFileCheckOutput FILECHECK_DUMP_INPUT_ON_FAILURE=1 \ -; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ -; RUN: -match-full-lines -v 2>&1 \ -; RUN: | FileCheck %s -match-full-lines -allow-empty \ -; RUN: -check-prefixes=NOTRACE,NODUMP - -; FileCheck fail, -v => dump, no trace. -; RUN: %ProtectFileCheckOutput FILECHECK_DUMP_INPUT_ON_FAILURE=1 \ -; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ -; RUN: -match-full-lines -v 2>&1 \ -; RUN: | FileCheck %s -match-full-lines \ -; RUN: -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V - ;-------------------------------------------------- ; Check -dump-input=always. ;-------------------------------------------------- diff --git a/llvm/test/FileCheck/envvar-opts.txt b/llvm/test/FileCheck/envvar-opts.txt index c1a9b2e1b243e6..da2b9f919a0d89 100644 --- a/llvm/test/FileCheck/envvar-opts.txt +++ b/llvm/test/FileCheck/envvar-opts.txt @@ -4,15 +4,15 @@ ; CHECK: bar ; RUN: %ProtectFileCheckOutput \ -; RUN: not FileCheck %s -input-file %t.in 2>&1 \ +; RUN: not FileCheck %s -dump-input=never -input-file %t.in 2>&1 \ ; RUN: | FileCheck -check-prefix QUIET %s ; RUN: %ProtectFileCheckOutput FILECHECK_OPTS= \ -; RUN: not FileCheck %s -input-file %t.in 2>&1 \ +; RUN: not FileCheck %s -dump-input=never -input-file %t.in 2>&1 \ ; RUN: | FileCheck -check-prefix QUIET %s ; RUN: %ProtectFileCheckOutput FILECHECK_OPTS=-v \ -; RUN: not FileCheck %s -input-file %t.in 2>&1 \ +; RUN: not FileCheck %s -dump-input=never -input-file %t.in 2>&1 \ ; RUN: | FileCheck -check-prefix VERB %s ; QUIET-NOT: remark: {{CHECK}}: expected string found in input diff --git a/llvm/test/FileCheck/lit.local.cfg b/llvm/test/FileCheck/lit.local.cfg index 65aba149e22d7c..9164f683fc1be3 100644 --- a/llvm/test/FileCheck/lit.local.cfg +++ b/llvm/test/FileCheck/lit.local.cfg @@ -39,7 +39,7 @@ config.test_format = lit.formats.ShTest(execute_external=False) # ; FILECHECK_OPTS beforehand. # ; # ; RUN: %ProtectFileCheckOutput FILECHECK_OPTS=-v \ -# ; RUN: FileCheck -input-file %s %s 2>&1 \ +# ; RUN: FileCheck -dump-input=never -input-file %s %s 2>&1 \ # ; RUN: | FileCheck -check-prefix TRACE %s # ; # ; CHECK: {{[0-9]+\.0}} @@ -53,4 +53,4 @@ config.test_format = lit.formats.ShTest(execute_external=False) # status (e.g., FILECHECK_OPTS=-strict-whitespace), he shouldn't be surprised # that test results throughout all test suites are affected. config.substitutions.append(('%ProtectFileCheckOutput', - 'env -u FILECHECK_OPTS -u FILECHECK_DUMP_INPUT_ON_FAILURE')) + 'env -u FILECHECK_OPTS')) diff --git a/llvm/test/FileCheck/match-full-lines.txt b/llvm/test/FileCheck/match-full-lines.txt index 114f628d8bc927..d69ebbc4c5a9b9 100644 --- a/llvm/test/FileCheck/match-full-lines.txt +++ b/llvm/test/FileCheck/match-full-lines.txt @@ -1,8 +1,8 @@ // RUN: %ProtectFileCheckOutput \ -// RUN: not FileCheck -match-full-lines -input-file %s %s 2>&1 \ +// RUN: not FileCheck -match-full-lines -dump-input=never -input-file %s %s 2>&1 \ // RUN: | FileCheck --check-prefix=ERROR --implicit-check-not=error: %s // RUN: %ProtectFileCheckOutput \ -// RUN: not FileCheck -match-full-lines -strict-whitespace -input-file %s %s \ +// RUN: not FileCheck -match-full-lines -strict-whitespace -dump-input=never -input-file %s %s \ // RUN: 2>&1 | FileCheck --check-prefix=ERROR-STRICT --check-prefix=ERROR \ // RUN: --implicit-check-not=error: %s diff --git a/llvm/test/FileCheck/verbose.txt b/llvm/test/FileCheck/verbose.txt index 66c4b1efbe06b5..f852702a9b1f8b 100644 --- a/llvm/test/FileCheck/verbose.txt +++ b/llvm/test/FileCheck/verbose.txt @@ -1,8 +1,8 @@ -; RUN: %ProtectFileCheckOutput FileCheck -input-file %s %s 2>&1 \ +; RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -input-file %s %s 2>&1 \ ; RUN: | FileCheck -check-prefix QUIET --allow-empty %s -; RUN: %ProtectFileCheckOutput FileCheck -v -input-file %s %s 2>&1 \ +; RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -v -input-file %s %s 2>&1 \ ; RUN: | FileCheck --strict-whitespace -check-prefix V %s -; RUN: %ProtectFileCheckOutput FileCheck -vv -input-file %s %s 2>&1 \ +; RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv -input-file %s %s 2>&1 \ ; RUN: | FileCheck --strict-whitespace -check-prefixes V,VV %s foo diff --git a/llvm/test/MC/AArch64/armv8.5a-bti.s b/llvm/test/MC/AArch64/armv8.5a-bti.s index ca55516890c42c..e0585f7613fcc8 100644 --- a/llvm/test/MC/AArch64/armv8.5a-bti.s +++ b/llvm/test/MC/AArch64/armv8.5a-bti.s @@ -1,6 +1,6 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+bti < %s | FileCheck %s -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.5a < %s | FileCheck %s -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-bti < %s 2>&1 | FileCheck %s --check-prefix=NOBTI +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+bti < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.5a < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-bti < %s | FileCheck %s --check-prefix=NOBTI bti bti c @@ -12,14 +12,10 @@ bti jc // CHECK: bti j // encoding: [0x9f,0x24,0x03,0xd5] // CHECK: bti jc // encoding: [0xdf,0x24,0x03,0xd5] -// NOBTI: instruction requires: bti -// NOBTI-NEXT: bti -// NOBTI: instruction requires: bti -// NOBTI-NEXT: bti -// NOBTI: instruction requires: bti -// NOBTI-NEXT: bti -// NOBTI: instruction requires: bti -// NOBTI-NEXT: bti +// NOBTI: hint #32 // encoding: [0x1f,0x24,0x03,0xd5] +// NOBTI: hint #34 // encoding: [0x5f,0x24,0x03,0xd5] +// NOBTI: hint #36 // encoding: [0x9f,0x24,0x03,0xd5] +// NOBTI: hint #38 // encoding: [0xdf,0x24,0x03,0xd5] hint #32 hint #34 diff --git a/llvm/test/Transforms/GCOVProfiling/global-ctor.ll b/llvm/test/Transforms/GCOVProfiling/global-ctor.ll index 596a2ad77635d1..e90385c7c42e00 100644 --- a/llvm/test/Transforms/GCOVProfiling/global-ctor.ll +++ b/llvm/test/Transforms/GCOVProfiling/global-ctor.ll @@ -1,5 +1,6 @@ ;; For a global constructor, _GLOBAL__sub_I_ only has artificial lines. ;; Test that we don't instrument those functions. +; RUN: mkdir -p %t && cd %t ; RUN: opt -S -insert-gcov-profiling < %s | FileCheck %s ; RUN: opt -S -passes=insert-gcov-profiling < %s | FileCheck %s diff --git a/llvm/test/Transforms/InstCombine/fortify-folding.ll b/llvm/test/Transforms/InstCombine/fortify-folding.ll index b2171a44f57ef8..2602640595e65e 100644 --- a/llvm/test/Transforms/InstCombine/fortify-folding.ll +++ b/llvm/test/Transforms/InstCombine/fortify-folding.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine -S | FileCheck %s --dump-input-on-failure +; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll new file mode 100644 index 00000000000000..5eac0e09414ca5 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) +declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>) +declare void @use_f32(float) + +define float @diff_of_sums_v4f32(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { +; CHECK-LABEL: @diff_of_sums_v4f32( +; CHECK-NEXT: [[R0:%.*]] = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) +; CHECK-NEXT: [[R:%.*]] = fsub reassoc nsz float [[R0]], [[R1]] +; CHECK-NEXT: ret float [[R]] +; + %r0 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) + %r1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1) + %r = fsub reassoc nsz float %r0, %r1 + ret float %r +} + +define float @diff_of_sums_v4f32_fmf(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { +; CHECK-LABEL: @diff_of_sums_v4f32_fmf( +; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) +; CHECK-NEXT: [[R:%.*]] = fsub nnan ninf nsz float [[R0]], [[R1]] +; CHECK-NEXT: ret float [[R]] +; + %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) + %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1) + %r = fsub ninf nnan nsz float %r0, %r1 + ret float %r +} + +define float @diff_of_sums_extra_use1(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { +; CHECK-LABEL: @diff_of_sums_extra_use1( +; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) +; CHECK-NEXT: call void @use_f32(float [[R0]]) +; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) +; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] +; CHECK-NEXT: ret float [[R]] +; + %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) + call void @use_f32(float %r0) + %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1) + %r = fsub fast float %r0, %r1 + ret float %r +} + +define float @diff_of_sums_extra_use2(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { +; CHECK-LABEL: @diff_of_sums_extra_use2( +; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) +; CHECK-NEXT: call void @use_f32(float [[R1]]) +; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] +; CHECK-NEXT: ret float [[R]] +; + %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) + %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1) + call void @use_f32(float %r1) + %r = fsub fast float %r0, %r1 + ret float %r +} + +define float @diff_of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1, <8 x float> %v1) { +; CHECK-LABEL: @diff_of_sums_type_mismatch( +; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float [[A1:%.*]], <8 x float> [[V1:%.*]]) +; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] +; CHECK-NEXT: ret float [[R]] +; + %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) + %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float %a1, <8 x float> %v1) + %r = fsub fast float %r0, %r1 + ret float %r +} diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll index 835a6dad32061f..d77abbd39f0d28 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s ; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S | FileCheck %s -check-prefix=VF2UF2 +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -S | FileCheck %s -check-prefix=VF1UF4 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" @@ -132,6 +133,63 @@ define void @pr45679(i32* %A) optsize { ; VF2UF2: exit: ; VF2UF2-NEXT: ret void ; +; VF1UF4-LABEL: @pr45679( +; VF1UF4-NEXT: entry: +; VF1UF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF1UF4: vector.ph: +; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]] +; VF1UF4: vector.body: +; VF1UF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ] +; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0 +; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1 +; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2 +; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3 +; VF1UF4-NEXT: [[TMP0:%.*]] = icmp ule i32 [[INDUCTION]], 13 +; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i32 [[INDUCTION1]], 13 +; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i32 [[INDUCTION2]], 13 +; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i32 [[INDUCTION3]], 13 +; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; VF1UF4: pred.store.if: +; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDUCTION]] +; VF1UF4-NEXT: store i32 13, i32* [[TMP4]], align 1 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] +; VF1UF4: pred.store.continue: +; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; VF1UF4: pred.store.if4: +; VF1UF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION1]] +; VF1UF4-NEXT: store i32 13, i32* [[TMP5]], align 1 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE5]] +; VF1UF4: pred.store.continue5: +; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] +; VF1UF4: pred.store.if6: +; VF1UF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION2]] +; VF1UF4-NEXT: store i32 13, i32* [[TMP6]], align 1 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE7]] +; VF1UF4: pred.store.continue7: +; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] +; VF1UF4: pred.store.if8: +; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION3]] +; VF1UF4-NEXT: store i32 13, i32* [[TMP7]], align 1 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE9]] +; VF1UF4: pred.store.continue9: +; VF1UF4-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; VF1UF4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 +; VF1UF4-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]] +; VF1UF4: middle.block: +; VF1UF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; VF1UF4: scalar.ph: +; VF1UF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VF1UF4-NEXT: br label [[LOOP:%.*]] +; VF1UF4: loop: +; VF1UF4-NEXT: [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] +; VF1UF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]] +; VF1UF4-NEXT: store i32 13, i32* [[ARRAYIDX]], align 1 +; VF1UF4-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 +; VF1UF4-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 +; VF1UF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]] +; VF1UF4: exit: +; VF1UF4-NEXT: ret void +; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll index 2973a4425a5d4f..973d7013837a6e 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -loop-vectorize -force-vector-interleave=4 -pass-remarks='loop-vectorize' -disable-output -S 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS ; RUN: opt < %s -loop-vectorize -force-vector-interleave=4 -S | FileCheck %s -; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -S | FileCheck %s --check-prefix=CHECK-VF1 ; These tests are to check that fold-tail procedure produces correct scalar code when ; loop-vectorization is only unrolling but not vectorizing. @@ -110,80 +109,3 @@ for.body: %cond = icmp eq double* %ptr, %ptr2 br i1 %cond, label %for.cond.cleanup, label %for.body } - -; The following testcase is extended from the test of https://reviews.llvm.org/D80085 -; Similar to two tests above, it is to check that fold-tail procedure produces correct scalar code when -; loop-vectorization is only unrolling but not vectorizing. - -define void @pr45679(i32* %A) optsize { -; CHECK-VF1-LABEL: @pr45679 -; CHECK-VF1-NEXT: entry: -; CHECK-VF1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK-VF1: vector.ph: -; CHECK-VF1-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK-VF1: vector.body: -; CHECK-VF1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ] -; CHECK-VF1-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0 -; CHECK-VF1-NEXT: [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-VF1-NEXT: [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2 -; CHECK-VF1-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3 -; CHECK-VF1-NEXT: [[TMP0:%.*]] = icmp ule i32 [[INDUCTION]], 13 -; CHECK-VF1-NEXT: [[TMP1:%.*]] = icmp ule i32 [[INDUCTION1]], 13 -; CHECK-VF1-NEXT: [[TMP2:%.*]] = icmp ule i32 [[INDUCTION2]], 13 -; CHECK-VF1-NEXT: [[TMP3:%.*]] = icmp ule i32 [[INDUCTION3]], 13 -; CHECK-VF1-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; CHECK-VF1: pred.store.if: -; CHECK-VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDUCTION]] -; CHECK-VF1-NEXT: store i32 13, i32* [[TMP4]], align 1 -; CHECK-VF1-NEXT: br label [[PRED_STORE_CONTINUE]] -; CHECK-VF1: pred.store.continue: -; CHECK-VF1-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] -; CHECK-VF1: pred.store.if4: -; CHECK-VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION1]] -; CHECK-VF1-NEXT: store i32 13, i32* [[TMP5]], align 1 -; CHECK-VF1-NEXT: br label [[PRED_STORE_CONTINUE5]] -; CHECK-VF1: pred.store.continue5: -; CHECK-VF1-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] -; CHECK-VF1: pred.store.if6: -; CHECK-VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION2]] -; CHECK-VF1-NEXT: store i32 13, i32* [[TMP6]], align 1 -; CHECK-VF1-NEXT: br label [[PRED_STORE_CONTINUE7]] -; CHECK-VF1: pred.store.continue7: -; CHECK-VF1-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] -; CHECK-VF1: pred.store.if8: -; CHECK-VF1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION3]] -; CHECK-VF1-NEXT: store i32 13, i32* [[TMP7]], align 1 -; CHECK-VF1-NEXT: br label [[PRED_STORE_CONTINUE9]] -; CHECK-VF1: pred.store.continue9: -; CHECK-VF1-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-VF1-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; CHECK-VF1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]] -; CHECK-VF1: middle.block: -; CHECK-VF1-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK-VF1: scalar.ph: -; CHECK-VF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-VF1-NEXT: br label [[LOOP:%.*]] -; CHECK-VF1: loop: -; CHECK-VF1-NEXT: [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] -; CHECK-VF1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]] -; CHECK-VF1-NEXT: store i32 13, i32* [[ARRAYIDX]], align 1 -; CHECK-VF1-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 -; CHECK-VF1-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 -; CHECK-VF1-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]] -; CHECK-VF1: exit: -; CHECK-VF1-NEXT: ret void -; -entry: - br label %loop - -loop: - %riv = phi i32 [ 0, %entry ], [ %rivPlus1, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %A, i32 %riv - store i32 13, i32* %arrayidx, align 1 - %rivPlus1 = add nuw nsw i32 %riv, 1 - %cond = icmp eq i32 %rivPlus1, 14 - br i1 %cond, label %exit, label %loop - -exit: - ret void -} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll index 3b341f6a5b7a55..0d99654be52893 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll @@ -12,31 +12,24 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define <4 x i32> @square(<4 x i32> %num, i32 %y, i32 %x, i32 %h, i32 %k, i32 %w, i32 %p, i32 %j, i32 %u) { ; CHECK-LABEL: @square( ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[K:%.*]], 2 -; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[DIV]], i32 0 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[P:%.*]], 6234 -; CHECK-NEXT: [[SPLATINSERT2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL]], i32 0 ; CHECK-NEXT: [[MUL5:%.*]] = mul nsw i32 [[H:%.*]], 75 -; CHECK-NEXT: [[SPLATINSERT6:%.*]] = insertelement <4 x i32> undef, i32 [[MUL5]], i32 0 ; CHECK-NEXT: [[DIV9:%.*]] = sdiv i32 [[J:%.*]], 3452 -; CHECK-NEXT: [[SPLATINSERT10:%.*]] = insertelement <4 x i32> undef, i32 [[DIV9]], i32 0 ; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 [[W:%.*]], 53 -; CHECK-NEXT: [[SPLATINSERT14:%.*]] = insertelement <4 x i32> undef, i32 [[MUL13]], i32 0 ; CHECK-NEXT: [[DIV17:%.*]] = sdiv i32 [[X:%.*]], 820 -; CHECK-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> undef, i32 [[DIV17]], i32 0 ; CHECK-NEXT: [[MUL21:%.*]] = shl nsw i32 [[U:%.*]], 2 -; CHECK-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> undef, i32 [[MUL21]], i32 0 -; CHECK-NEXT: [[SPLATINSERT25:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SPLATINSERT25]], -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[SPLATINSERT18]] -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[SPLATINSERT6]] -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[SPLATINSERT]] -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[SPLATINSERT14]] -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[SPLATINSERT2]] -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], [[SPLATINSERT10]] -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[SPLATINSERT22]] -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP10]], [[NUM:%.*]] +; CHECK-NEXT: [[DOTSCALAR:%.*]] = add i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[DOTSCALAR1:%.*]] = add i32 [[DOTSCALAR]], [[DIV17]] +; CHECK-NEXT: [[DOTSCALAR2:%.*]] = add i32 [[DOTSCALAR1]], [[MUL5]] +; CHECK-NEXT: [[DOTSCALAR3:%.*]] = add i32 [[DOTSCALAR2]], [[DIV]] +; CHECK-NEXT: [[DOTSCALAR4:%.*]] = add i32 [[DOTSCALAR3]], [[MUL13]] +; CHECK-NEXT: [[DOTSCALAR5:%.*]] = add i32 [[DOTSCALAR4]], [[MUL]] +; CHECK-NEXT: [[DOTSCALAR6:%.*]] = add i32 [[DOTSCALAR5]], [[DIV9]] +; CHECK-NEXT: [[DOTSCALAR7:%.*]] = add i32 [[DOTSCALAR6]], [[MUL21]] +; CHECK-NEXT: [[DOTSCALAR8:%.*]] = add i32 [[DOTSCALAR7]], 317425 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[DOTSCALAR8]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP2]], [[NUM:%.*]] ; CHECK-NEXT: ret <4 x i32> [[ADD29]] ; %add = add <4 x i32> %num, diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll index e0d4623f505f70..a400e8f42907cd 100644 --- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll +++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll @@ -4,8 +4,8 @@ define <2 x i64> @add_constant(i64 %x) { ; CHECK-LABEL: @add_constant( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -15,8 +15,8 @@ define <2 x i64> @add_constant(i64 %x) { define <2 x i64> @add_constant_not_undef_lane(i64 %x) { ; CHECK-LABEL: @add_constant_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -41,8 +41,8 @@ define <2 x i64> @add_constant_load(i64* %p) { define <4 x i32> @sub_constant_op0(i32 %x) { ; CHECK-LABEL: @sub_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = sub nuw nsw <4 x i32> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sub nuw nsw i32 -42, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <4 x i32> undef, i32 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <4 x i32> [[BO]] ; %ins = insertelement <4 x i32> undef, i32 %x, i32 1 @@ -52,8 +52,8 @@ define <4 x i32> @sub_constant_op0(i32 %x) { define <4 x i32> @sub_constant_op0_not_undef_lane(i32 %x) { ; CHECK-LABEL: @sub_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = sub nuw <4 x i32> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sub nuw i32 42, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <4 x i32> undef, i32 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <4 x i32> [[BO]] ; %ins = insertelement <4 x i32> undef, i32 %x, i32 1 @@ -63,8 +63,8 @@ define <4 x i32> @sub_constant_op0_not_undef_lane(i32 %x) { define <8 x i16> @sub_constant_op1(i16 %x) { ; CHECK-LABEL: @sub_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <8 x i16> undef, i16 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = sub nuw <8 x i16> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sub nuw i16 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <8 x i16> undef, i16 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[BO]] ; %ins = insertelement <8 x i16> undef, i16 %x, i32 0 @@ -74,8 +74,8 @@ define <8 x i16> @sub_constant_op1(i16 %x) { define <8 x i16> @sub_constant_op1_not_undef_lane(i16 %x) { ; CHECK-LABEL: @sub_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <8 x i16> undef, i16 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = sub nuw <8 x i16> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sub nuw i16 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <8 x i16> undef, i16 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[BO]] ; %ins = insertelement <8 x i16> undef, i16 %x, i32 0 @@ -85,8 +85,8 @@ define <8 x i16> @sub_constant_op1_not_undef_lane(i16 %x) { define <16 x i8> @mul_constant(i8 %x) { ; CHECK-LABEL: @mul_constant( -; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 2 -; CHECK-NEXT: [[BO:%.*]] = mul <16 x i8> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = mul i8 [[X:%.*]], -42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <16 x i8> , i8 [[BO_SCALAR]], i64 2 ; CHECK-NEXT: ret <16 x i8> [[BO]] ; %ins = insertelement <16 x i8> undef, i8 %x, i32 2 @@ -96,8 +96,8 @@ define <16 x i8> @mul_constant(i8 %x) { define <3 x i64> @mul_constant_not_undef_lane(i64 %x) { ; CHECK-LABEL: @mul_constant_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <3 x i64> undef, i64 [[X:%.*]], i32 2 -; CHECK-NEXT: [[BO:%.*]] = mul <3 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = mul i64 [[X:%.*]], -42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <3 x i64> , i64 [[BO_SCALAR]], i64 2 ; CHECK-NEXT: ret <3 x i64> [[BO]] ; %ins = insertelement <3 x i64> undef, i64 %x, i32 2 @@ -106,12 +106,20 @@ define <3 x i64> @mul_constant_not_undef_lane(i64 %x) { } define <16 x i8> @mul_constant_multiuse(i8 %a0, <16 x i8> %a1) { -; CHECK-LABEL: @mul_constant_multiuse( -; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> , i8 [[A0:%.*]], i32 0 -; CHECK-NEXT: [[MUL:%.*]] = mul <16 x i8> [[INS]], -; CHECK-NEXT: [[AND:%.*]] = and <16 x i8> [[INS]], [[A1:%.*]] -; CHECK-NEXT: [[XOR:%.*]] = xor <16 x i8> [[AND]], [[MUL]] -; CHECK-NEXT: ret <16 x i8> [[XOR]] +; SSE-LABEL: @mul_constant_multiuse( +; SSE-NEXT: [[INS:%.*]] = insertelement <16 x i8> , i8 [[A0:%.*]], i32 0 +; SSE-NEXT: [[MUL:%.*]] = mul <16 x i8> [[INS]], +; SSE-NEXT: [[AND:%.*]] = and <16 x i8> [[INS]], [[A1:%.*]] +; SSE-NEXT: [[XOR:%.*]] = xor <16 x i8> [[AND]], [[MUL]] +; SSE-NEXT: ret <16 x i8> [[XOR]] +; +; AVX-LABEL: @mul_constant_multiuse( +; AVX-NEXT: [[INS:%.*]] = insertelement <16 x i8> , i8 [[A0:%.*]], i32 0 +; AVX-NEXT: [[MUL_SCALAR:%.*]] = mul i8 [[A0]], 3 +; AVX-NEXT: [[MUL:%.*]] = insertelement <16 x i8> , i8 [[MUL_SCALAR]], i64 0 +; AVX-NEXT: [[AND:%.*]] = and <16 x i8> [[INS]], [[A1:%.*]] +; AVX-NEXT: [[XOR:%.*]] = xor <16 x i8> [[AND]], [[MUL]] +; AVX-NEXT: ret <16 x i8> [[XOR]] ; %ins = insertelement <16 x i8> , i8 %a0, i32 0 %mul = mul <16 x i8> %ins, @@ -122,8 +130,8 @@ define <16 x i8> @mul_constant_multiuse(i8 %a0, <16 x i8> %a1) { define <2 x i64> @shl_constant_op0(i64 %x) { ; CHECK-LABEL: @shl_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -133,8 +141,8 @@ define <2 x i64> @shl_constant_op0(i64 %x) { define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @shl_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -156,12 +164,20 @@ define <2 x i64> @shl_constant_op0_load(i64* %p) { } define <4 x i32> @shl_constant_op0_multiuse(i32 %a0, <4 x i32> %a1) { -; CHECK-LABEL: @shl_constant_op0_multiuse( -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x i32> , i32 [[A0:%.*]], i32 0 -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INS]], -; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[INS]], [[A1:%.*]] -; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i32> [[AND]], [[MUL]] -; CHECK-NEXT: ret <4 x i32> [[XOR]] +; SSE-LABEL: @shl_constant_op0_multiuse( +; SSE-NEXT: [[INS:%.*]] = insertelement <4 x i32> , i32 [[A0:%.*]], i32 0 +; SSE-NEXT: [[MUL_SCALAR:%.*]] = shl i32 [[A0]], 3 +; SSE-NEXT: [[MUL:%.*]] = insertelement <4 x i32> , i32 [[MUL_SCALAR]], i64 0 +; SSE-NEXT: [[AND:%.*]] = and <4 x i32> [[INS]], [[A1:%.*]] +; SSE-NEXT: [[XOR:%.*]] = xor <4 x i32> [[AND]], [[MUL]] +; SSE-NEXT: ret <4 x i32> [[XOR]] +; +; AVX-LABEL: @shl_constant_op0_multiuse( +; AVX-NEXT: [[INS:%.*]] = insertelement <4 x i32> , i32 [[A0:%.*]], i32 0 +; AVX-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INS]], +; AVX-NEXT: [[AND:%.*]] = and <4 x i32> [[INS]], [[A1:%.*]] +; AVX-NEXT: [[XOR:%.*]] = xor <4 x i32> [[AND]], [[MUL]] +; AVX-NEXT: ret <4 x i32> [[XOR]] ; %ins = insertelement <4 x i32> , i32 %a0, i32 0 %mul = shl <4 x i32> %ins, @@ -172,8 +188,8 @@ define <4 x i32> @shl_constant_op0_multiuse(i32 %a0, <4 x i32> %a1) { define <2 x i64> @shl_constant_op1(i64 %x) { ; CHECK-LABEL: @shl_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[X:%.*]], 5 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -183,8 +199,8 @@ define <2 x i64> @shl_constant_op1(i64 %x) { define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) { ; CHECK-LABEL: @shl_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[X:%.*]], 5 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -207,8 +223,8 @@ define <2 x i64> @shl_constant_op1_load(i64* %p) { define <2 x i64> @ashr_constant_op0(i64 %x) { ; CHECK-LABEL: @ashr_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = ashr exact <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = ashr exact i64 2, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -218,8 +234,8 @@ define <2 x i64> @ashr_constant_op0(i64 %x) { define <2 x i64> @ashr_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @ashr_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = ashr exact <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = ashr exact i64 2, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -229,8 +245,8 @@ define <2 x i64> @ashr_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @ashr_constant_op1(i64 %x) { ; CHECK-LABEL: @ashr_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = ashr <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = ashr i64 [[X:%.*]], 5 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -240,8 +256,8 @@ define <2 x i64> @ashr_constant_op1(i64 %x) { define <2 x i64> @ashr_constant_op1_not_undef_lane(i64 %x) { ; CHECK-LABEL: @ashr_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = ashr <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = ashr i64 [[X:%.*]], 5 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -251,8 +267,8 @@ define <2 x i64> @ashr_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @lshr_constant_op0(i64 %x) { ; CHECK-LABEL: @lshr_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = lshr <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = lshr i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -262,8 +278,8 @@ define <2 x i64> @lshr_constant_op0(i64 %x) { define <2 x i64> @lshr_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @lshr_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = lshr <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = lshr i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -273,8 +289,8 @@ define <2 x i64> @lshr_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @lshr_constant_op1(i64 %x) { ; CHECK-LABEL: @lshr_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = lshr exact <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = lshr exact i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -284,8 +300,8 @@ define <2 x i64> @lshr_constant_op1(i64 %x) { define <2 x i64> @lshr_constant_op1_not_undef_lane(i64 %x) { ; CHECK-LABEL: @lshr_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = lshr exact <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = lshr exact i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -295,8 +311,8 @@ define <2 x i64> @lshr_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @urem_constant_op0(i64 %x) { ; CHECK-LABEL: @urem_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = urem <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -306,8 +322,8 @@ define <2 x i64> @urem_constant_op0(i64 %x) { define <2 x i64> @urem_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @urem_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = urem <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -317,8 +333,8 @@ define <2 x i64> @urem_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @urem_constant_op1(i64 %x) { ; CHECK-LABEL: @urem_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = urem <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -328,8 +344,8 @@ define <2 x i64> @urem_constant_op1(i64 %x) { define <2 x i64> @urem_constant_op1_not_undef_lane(i64 %x) { ; CHECK-LABEL: @urem_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = urem <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -339,8 +355,8 @@ define <2 x i64> @urem_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @srem_constant_op0(i64 %x) { ; CHECK-LABEL: @srem_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = srem <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -350,8 +366,8 @@ define <2 x i64> @srem_constant_op0(i64 %x) { define <2 x i64> @srem_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @srem_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = srem <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -361,8 +377,8 @@ define <2 x i64> @srem_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @srem_constant_op1(i64 %x) { ; CHECK-LABEL: @srem_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = srem <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -372,8 +388,8 @@ define <2 x i64> @srem_constant_op1(i64 %x) { define <2 x i64> @srem_constant_op1_not_undef_lane(i64 %x) { ; CHECK-LABEL: @srem_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = srem <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -383,8 +399,8 @@ define <2 x i64> @srem_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @udiv_constant_op0(i64 %x) { ; CHECK-LABEL: @udiv_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = udiv exact <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv exact i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -394,8 +410,8 @@ define <2 x i64> @udiv_constant_op0(i64 %x) { define <2 x i64> @udiv_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @udiv_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = udiv exact <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv exact i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -405,8 +421,8 @@ define <2 x i64> @udiv_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @udiv_constant_op1(i64 %x) { ; CHECK-LABEL: @udiv_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = udiv <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -416,8 +432,8 @@ define <2 x i64> @udiv_constant_op1(i64 %x) { define <2 x i64> @udiv_constant_op1_not_undef_lane(i64 %x) { ; CHECK-LABEL: @udiv_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = udiv <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -427,8 +443,8 @@ define <2 x i64> @udiv_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @sdiv_constant_op0(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = sdiv <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -438,8 +454,8 @@ define <2 x i64> @sdiv_constant_op0(i64 %x) { define <2 x i64> @sdiv_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = sdiv <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -449,8 +465,8 @@ define <2 x i64> @sdiv_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @sdiv_constant_op1(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = sdiv exact <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv exact i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -460,8 +476,8 @@ define <2 x i64> @sdiv_constant_op1(i64 %x) { define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = sdiv exact <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv exact i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -471,8 +487,8 @@ define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @and_constant(i64 %x) { ; CHECK-LABEL: @and_constant( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -482,8 +498,8 @@ define <2 x i64> @and_constant(i64 %x) { define <2 x i64> @and_constant_not_undef_lane(i64 %x) { ; CHECK-LABEL: @and_constant_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -493,8 +509,8 @@ define <2 x i64> @and_constant_not_undef_lane(i64 %x) { define <2 x i64> @or_constant(i64 %x) { ; CHECK-LABEL: @or_constant( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = or <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = or i64 [[X:%.*]], -42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -504,8 +520,8 @@ define <2 x i64> @or_constant(i64 %x) { define <2 x i64> @or_constant_not_undef_lane(i64 %x) { ; CHECK-LABEL: @or_constant_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = or <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = or i64 [[X:%.*]], -42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -515,8 +531,8 @@ define <2 x i64> @or_constant_not_undef_lane(i64 %x) { define <2 x i64> @xor_constant(i64 %x) { ; CHECK-LABEL: @xor_constant( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -526,8 +542,8 @@ define <2 x i64> @xor_constant(i64 %x) { define <2 x i64> @xor_constant_not_undef_lane(i64 %x) { ; CHECK-LABEL: @xor_constant_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -537,8 +553,8 @@ define <2 x i64> @xor_constant_not_undef_lane(i64 %x) { define <2 x double> @fadd_constant(double %x) { ; CHECK-LABEL: @fadd_constant( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = fadd <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fadd double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -548,8 +564,8 @@ define <2 x double> @fadd_constant(double %x) { define <2 x double> @fadd_constant_not_undef_lane(double %x) { ; CHECK-LABEL: @fadd_constant_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = fadd <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fadd double [[X:%.*]], -4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -559,8 +575,8 @@ define <2 x double> @fadd_constant_not_undef_lane(double %x) { define <2 x double> @fsub_constant_op0(double %x) { ; CHECK-LABEL: @fsub_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = fsub fast <2 x double> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub fast double 4.200000e+01, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -570,8 +586,8 @@ define <2 x double> @fsub_constant_op0(double %x) { define <2 x double> @fsub_constant_op0_not_undef_lane(double %x) { ; CHECK-LABEL: @fsub_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = fsub nsz <2 x double> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub nsz double -4.200000e+01, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -581,8 +597,8 @@ define <2 x double> @fsub_constant_op0_not_undef_lane(double %x) { define <2 x double> @fsub_constant_op1(double %x) { ; CHECK-LABEL: @fsub_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = fsub <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -592,8 +608,8 @@ define <2 x double> @fsub_constant_op1(double %x) { define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) { ; CHECK-LABEL: @fsub_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = fsub <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -603,8 +619,8 @@ define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) { define <2 x double> @fmul_constant(double %x) { ; CHECK-LABEL: @fmul_constant( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = fmul reassoc <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fmul reassoc double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -614,8 +630,8 @@ define <2 x double> @fmul_constant(double %x) { define <2 x double> @fmul_constant_not_undef_lane(double %x) { ; CHECK-LABEL: @fmul_constant_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = fmul <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fmul double [[X:%.*]], -4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -625,8 +641,8 @@ define <2 x double> @fmul_constant_not_undef_lane(double %x) { define <2 x double> @fdiv_constant_op0(double %x) { ; CHECK-LABEL: @fdiv_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = fdiv nnan <2 x double> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv nnan double 4.200000e+01, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -636,8 +652,8 @@ define <2 x double> @fdiv_constant_op0(double %x) { define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) { ; CHECK-LABEL: @fdiv_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = fdiv ninf <2 x double> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv ninf double 4.200000e+01, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -647,8 +663,8 @@ define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) { define <2 x double> @fdiv_constant_op1(double %x) { ; CHECK-LABEL: @fdiv_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = fdiv <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -658,8 +674,8 @@ define <2 x double> @fdiv_constant_op1(double %x) { define <2 x double> @fdiv_constant_op1_not_undef_lane(double %x) { ; CHECK-LABEL: @fdiv_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = fdiv <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -669,8 +685,8 @@ define <2 x double> @fdiv_constant_op1_not_undef_lane(double %x) { define <2 x double> @frem_constant_op0(double %x) { ; CHECK-LABEL: @frem_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = frem fast <2 x double> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = frem fast double 4.200000e+01, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -680,8 +696,8 @@ define <2 x double> @frem_constant_op0(double %x) { define <2 x double> @frem_constant_op0_not_undef_lane(double %x) { ; CHECK-LABEL: @frem_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = frem <2 x double> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = frem double -4.200000e+01, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -691,8 +707,8 @@ define <2 x double> @frem_constant_op0_not_undef_lane(double %x) { define <2 x double> @frem_constant_op1(double %x) { ; CHECK-LABEL: @frem_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = frem ninf <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = frem ninf double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -702,8 +718,8 @@ define <2 x double> @frem_constant_op1(double %x) { define <2 x double> @frem_constant_op1_not_undef_lane(double %x) { ; CHECK-LABEL: @frem_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = frem nnan <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = frem nnan double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 93f4f703d239b7..2cfab39d456228 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -176,6 +176,8 @@ TEST_F(AArch64GISelMITest, LowerBitCountingCTTZ2) { AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); + + B.setInsertPt(*EntryMBB, MIBCTTZ->getIterator()); EXPECT_TRUE(Helper.lower(*MIBCTTZ, 0, LLT::scalar(64)) == LegalizerHelper::LegalizeResult::Legalized); @@ -2583,6 +2585,7 @@ TEST_F(AArch64GISelMITest, BitcastLoad) { AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; + B.setInsertPt(*EntryMBB, Load->getIterator()); LegalizerHelper Helper(*MF, Info, Observer, B); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.bitcast(*Load, 0, S32)); @@ -2618,6 +2621,7 @@ TEST_F(AArch64GISelMITest, BitcastStore) { AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); + B.setInsertPt(*EntryMBB, Store->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.bitcast(*Store, 0, S32)); @@ -2651,6 +2655,7 @@ TEST_F(AArch64GISelMITest, BitcastSelect) { AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); + B.setInsertPt(*EntryMBB, Select->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.bitcast(*Select, 0, S32)); @@ -2669,6 +2674,8 @@ TEST_F(AArch64GISelMITest, BitcastSelect) { // Doesn't make sense auto VCond = B.buildUndef(LLT::vector(4, 1)); auto VSelect = B.buildSelect(V4S8, VCond, Val0, Val1); + + B.setInsertPt(*EntryMBB, VSelect->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::UnableToLegalize, Helper.bitcast(*VSelect, 0, S32)); EXPECT_EQ(LegalizerHelper::LegalizeResult::UnableToLegalize, @@ -2694,10 +2701,15 @@ TEST_F(AArch64GISelMITest, BitcastBitOps) { AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); + B.setInsertPt(*EntryMBB, And->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.bitcast(*And, 0, S32)); + + B.setInsertPt(*EntryMBB, Or->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.bitcast(*Or, 0, S32)); + + B.setInsertPt(*EntryMBB, Xor->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.bitcast(*Xor, 0, S32)); @@ -2773,12 +2785,20 @@ TEST_F(AArch64GISelMITest, NarrowImplicitDef) { LegalizerHelper Helper(*MF, Info, Observer, B); // Perform Legalization + + B.setInsertPt(*EntryMBB, Implicit1->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*Implicit1, 0, S48)); + + B.setInsertPt(*EntryMBB, Implicit2->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*Implicit2, 0, S32)); + + B.setInsertPt(*EntryMBB, Implicit3->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*Implicit3, 0, S48)); + + B.setInsertPt(*EntryMBB, Implicit4->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*Implicit4, 0, S32)); @@ -2828,8 +2848,12 @@ TEST_F(AArch64GISelMITest, WidenFreeze) { LegalizerHelper Helper(*MF, Info, Observer, B); // Perform Legalization + + B.setInsertPt(*EntryMBB, FreezeScalar->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.widenScalar(*FreezeScalar, 0, S128)); + + B.setInsertPt(*EntryMBB, FreezeVector->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.widenScalar(*FreezeVector, 0, V2S64)); @@ -2879,12 +2903,20 @@ TEST_F(AArch64GISelMITest, NarrowFreeze) { LegalizerHelper Helper(*MF, Info, Observer, B); // Perform Legalization + + B.setInsertPt(*EntryMBB, FreezeScalar->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*FreezeScalar, 0, S32)); + + B.setInsertPt(*EntryMBB, FreezeOdd->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*FreezeOdd, 0, S32)); + + B.setInsertPt(*EntryMBB, FreezeVector->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*FreezeVector, 0, V2S16)); + + B.setInsertPt(*EntryMBB, FreezeVector1->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*FreezeVector1, 0, S16)); @@ -2954,8 +2986,12 @@ TEST_F(AArch64GISelMITest, FewerElementsFreeze) { LegalizerHelper Helper(*MF, Info, Observer, B); // Perform Legalization + + B.setInsertPt(*EntryMBB, FreezeVector1->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.fewerElementsVector(*FreezeVector1, 0, S32)); + + B.setInsertPt(*EntryMBB, FreezeVector2->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.fewerElementsVector(*FreezeVector2, 0, V2S16)); @@ -2998,6 +3034,7 @@ TEST_F(AArch64GISelMITest, MoreElementsFreeze) { LegalizerHelper Helper(*MF, Info, Observer, B); // Perform Legalization + B.setInsertPt(*EntryMBB, FreezeVector1->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.moreElementsVector(*FreezeVector1, 0, V4S32)); diff --git a/llvm/utils/FileCheck/FileCheck.cpp b/llvm/utils/FileCheck/FileCheck.cpp index 3ee7c5a66a908f..8e41365de81a84 100644 --- a/llvm/utils/FileCheck/FileCheck.cpp +++ b/llvm/utils/FileCheck/FileCheck.cpp @@ -106,15 +106,6 @@ static cl::opt VerboseVerbose( cl::desc("Print information helpful in diagnosing internal FileCheck\n" "issues, or add it to the input dump if enabled. Implies\n" "-v.\n")); -static const char * DumpInputEnv = "FILECHECK_DUMP_INPUT_ON_FAILURE"; - -static cl::opt DumpInputOnFailure( - "dump-input-on-failure", - cl::init(std::getenv(DumpInputEnv) && *std::getenv(DumpInputEnv)), - cl::desc("Dump original input to stderr before failing.\n" - "The value can be also controlled using\n" - "FILECHECK_DUMP_INPUT_ON_FAILURE environment variable.\n" - "This option is deprecated in favor of -dump-input=fail.\n")); // The order of DumpInputValue members affects their precedence, as documented // for -dump-input below. @@ -678,7 +669,7 @@ int main(int argc, char **argv) { SMLoc()); if (DumpInput == DumpInputDefault) - DumpInput = DumpInputOnFailure ? DumpInputFail : DumpInputNever; + DumpInput = DumpInputFail; std::vector Diags; int ExitCode = FC.checkInput(SM, InputFileText, diff --git a/llvm/utils/lit/lit/TestingConfig.py b/llvm/utils/lit/lit/TestingConfig.py index dd2d3f52f89cb1..cfc0dab86e1105 100644 --- a/llvm/utils/lit/lit/TestingConfig.py +++ b/llvm/utils/lit/lit/TestingConfig.py @@ -26,9 +26,8 @@ def fromdefaults(litConfig): 'LSAN_OPTIONS', 'ADB', 'ANDROID_SERIAL', 'SANITIZER_IGNORE_CVE_2016_2143', 'TMPDIR', 'TMP', 'TEMP', 'TEMPDIR', 'AVRLIT_BOARD', 'AVRLIT_PORT', - 'FILECHECK_DUMP_INPUT_ON_FAILURE', 'FILECHECK_OPTS', - 'VCINSTALLDIR', 'VCToolsinstallDir', 'VSINSTALLDIR', - 'WindowsSdkDir', 'WindowsSDKLibVersion'] + 'FILECHECK_OPTS', 'VCINSTALLDIR', 'VCToolsinstallDir', + 'VSINSTALLDIR', 'WindowsSdkDir', 'WindowsSDKLibVersion'] if sys.platform == 'win32': pass_vars.append('INCLUDE') diff --git a/llvm/utils/lit/tests/lit.cfg b/llvm/utils/lit/tests/lit.cfg index ba9cb4da9cfd1f..f5686873ec891d 100644 --- a/llvm/utils/lit/tests/lit.cfg +++ b/llvm/utils/lit/tests/lit.cfg @@ -61,7 +61,7 @@ config.substitutions.append(('%{inputs}', os.path.join( config.test_source_root, 'Inputs'))) config.substitutions.append(('%{lit}', "{env} %{{python}} {lit}".format( - env="env -u FILECHECK_OPTS -u FILECHECK_DUMP_INPUT_ON_FAILURE", + env="env -u FILECHECK_OPTS", lit=os.path.join(lit_path, 'lit.py')))) config.substitutions.append(('%{python}', '"%s"' % (sys.executable))) diff --git a/mlir/test/Analysis/test-callgraph.mlir b/mlir/test/Analysis/test-callgraph.mlir index 8c295ff248e554..bb5ba60742006c 100644 --- a/mlir/test/Analysis/test-callgraph.mlir +++ b/mlir/test/Analysis/test-callgraph.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-print-callgraph -split-input-file 2>&1 | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -test-print-callgraph -split-input-file 2>&1 | FileCheck %s // CHECK-LABEL: Testing : "simple" module attributes {test.name = "simple"} { diff --git a/mlir/test/Analysis/test-dominance.mlir b/mlir/test/Analysis/test-dominance.mlir index 6366a49a62e3ef..9430038a538f96 100644 --- a/mlir/test/Analysis/test-dominance.mlir +++ b/mlir/test/Analysis/test-dominance.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-print-dominance -split-input-file 2>&1 | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -test-print-dominance -split-input-file 2>&1 | FileCheck %s // CHECK-LABEL: Testing : func_condBranch func @func_condBranch(%cond : i1) { diff --git a/mlir/test/Analysis/test-liveness.mlir b/mlir/test/Analysis/test-liveness.mlir index 9e1329f3609b23..3beb2186afb55f 100644 --- a/mlir/test/Analysis/test-liveness.mlir +++ b/mlir/test/Analysis/test-liveness.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-print-liveness -split-input-file 2>&1 | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -test-print-liveness -split-input-file 2>&1 | FileCheck %s // CHECK-LABEL: Testing : func_empty func @func_empty() { diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index 925615c0674e7d..20d166bab05d1a 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-gpu-to-nvvm -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -convert-gpu-to-nvvm -split-input-file | FileCheck %s gpu.module @test_module { // CHECK-LABEL: func @gpu_index_ops() diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index 4404cebec85319..61becff83c6cfd 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s gpu.module @test_module { // CHECK-LABEL: func @gpu_index_ops() diff --git a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir index 44f170bc43bbc9..451fcbe173da0c 100644 --- a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir +++ b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -convert-affine-for-to-gpu="gpu-block-dims=0 gpu-thread-dims=1" %s | FileCheck --check-prefix=CHECK-THREADS %s --dump-input-on-failure -// RUN: mlir-opt -convert-affine-for-to-gpu="gpu-block-dims=1 gpu-thread-dims=0" %s | FileCheck --check-prefix=CHECK-BLOCKS %s --dump-input-on-failure +// RUN: mlir-opt -convert-affine-for-to-gpu="gpu-block-dims=0 gpu-thread-dims=1" %s | FileCheck --check-prefix=CHECK-THREADS %s +// RUN: mlir-opt -convert-affine-for-to-gpu="gpu-block-dims=1 gpu-thread-dims=0" %s | FileCheck --check-prefix=CHECK-BLOCKS %s // CHECK-THREADS-LABEL: @one_d_loop // CHECK-BLOCKS-LABEL: @one_d_loop diff --git a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir index 52ed94cae567e7..de19331ce91192 100644 --- a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir +++ b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -convert-parallel-loops-to-gpu -split-input-file -verify-diagnostics %s | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -convert-parallel-loops-to-gpu -split-input-file -verify-diagnostics %s | FileCheck %s // 2-d parallel loop mapped to block.y and block.x diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir index fd1f980c536938..7c7098d76afa86 100644 --- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir +++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file --convert-shape-to-std --verify-diagnostics %s | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt --split-input-file --convert-shape-to-std --verify-diagnostics %s | FileCheck %s // Convert `size` to `index` type. // CHECK-LABEL: @size_id diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir index d15f10fd75ecee..51394ab615258c 100644 --- a/mlir/test/Dialect/GPU/outlining.mlir +++ b/mlir/test/Dialect/GPU/outlining.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -allow-unregistered-dialect -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s // CHECK: module attributes {gpu.container_module} diff --git a/mlir/test/Dialect/Linalg/fusion-tensor.mlir b/mlir/test/Dialect/Linalg/fusion-tensor.mlir index 6d6a409edbd2a8..5f1f90707a6ebf 100644 --- a/mlir/test/Dialect/Linalg/fusion-tensor.mlir +++ b/mlir/test/Dialect/Linalg/fusion-tensor.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-fusion-for-tensor-ops -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -linalg-fusion-for-tensor-ops -split-input-file | FileCheck %s // CHECK-DAG: [[MAP0:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0, d1)> #map0 = affine_map<(d0, d1) -> (d0, d1)> diff --git a/mlir/test/Dialect/Linalg/fusion.mlir b/mlir/test/Dialect/Linalg/fusion.mlir index 2f472aa6aaf2d4..db47e8eea6165c 100644 --- a/mlir/test/Dialect/Linalg/fusion.mlir +++ b/mlir/test/Dialect/Linalg/fusion.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-fusion -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -linalg-fusion -split-input-file | FileCheck %s func @f1(%A: memref, %B: memref, diff --git a/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir b/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir index de16e4b50f33ac..c14db3bed1c4b6 100644 --- a/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir +++ b/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-fusion -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -linalg-fusion -split-input-file | FileCheck %s #map = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)> #id_2d = affine_map<(d0, d1) -> (d0, d1)> diff --git a/mlir/test/Dialect/Linalg/parallel_loops.mlir b/mlir/test/Dialect/Linalg/parallel_loops.mlir index 2174ddc3c269da..597990eac264eb 100644 --- a/mlir/test/Dialect/Linalg/parallel_loops.mlir +++ b/mlir/test/Dialect/Linalg/parallel_loops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-linalg-to-parallel-loops -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -convert-linalg-to-parallel-loops -split-input-file | FileCheck %s #map0 = affine_map<(d0, d1) -> (d0, d1)> func @linalg_generic_sum(%lhs: memref<2x2xf32>, diff --git a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir index ed82c93622dff5..a744d14af74a9e 100644 --- a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir +++ b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -convert-linalg-on-tensors-to-buffers -buffer-placement -split-input-file %s | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -convert-linalg-on-tensors-to-buffers -buffer-placement -split-input-file %s | FileCheck %s #map0 = affine_map<(d0) -> (d0)> diff --git a/mlir/test/Dialect/Linalg/tile_conv_padding.mlir b/mlir/test/Dialect/Linalg/tile_conv_padding.mlir index 98cecc3e81e2e8..273f6491315973 100644 --- a/mlir/test/Dialect/Linalg/tile_conv_padding.mlir +++ b/mlir/test/Dialect/Linalg/tile_conv_padding.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004 --dump-input-on-failure -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-20000 --dump-input-on-failure +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004 +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-20000 // TILE-23004-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> // TILE-20000-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> diff --git a/mlir/test/Dialect/Linalg/tile_parallel.mlir b/mlir/test/Dialect/Linalg/tile_parallel.mlir index 963051b7c7b308..18d9d2016b1d24 100644 --- a/mlir/test/Dialect/Linalg/tile_parallel.mlir +++ b/mlir/test/Dialect/Linalg/tile_parallel.mlir @@ -1,7 +1,7 @@ -// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-2 --dump-input-on-failure -// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=0,2" | FileCheck %s -check-prefix=TILE-02 --dump-input-on-failure -// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=0,0,2" | FileCheck %s -check-prefix=TILE-002 --dump-input-on-failure -// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2,3,4" | FileCheck %s -check-prefix=TILE-234 --dump-input-on-failure +// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-2 +// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=0,2" | FileCheck %s -check-prefix=TILE-02 +// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=0,0,2" | FileCheck %s -check-prefix=TILE-002 +// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2,3,4" | FileCheck %s -check-prefix=TILE-234 #id_2d = affine_map<(i, j) -> (i, j)> #pointwise_2d_trait = { diff --git a/mlir/test/Dialect/SCF/ops.mlir b/mlir/test/Dialect/SCF/ops.mlir index c21451d8cf7fdf..1058983f5fb9a4 100644 --- a/mlir/test/Dialect/SCF/ops.mlir +++ b/mlir/test/Dialect/SCF/ops.mlir @@ -1,8 +1,8 @@ -// RUN: mlir-opt %s | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s | FileCheck %s // Verify the printed output can be parsed. -// RUN: mlir-opt %s | mlir-opt | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s | mlir-opt | FileCheck %s // Verify the generic form can be parsed. -// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s func @std_for(%arg0 : index, %arg1 : index, %arg2 : index) { scf.for %i0 = %arg0 to %arg1 step %arg2 { diff --git a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir index 6ed5ad36819e7c..8e6769961c10c6 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(parallel-loop-fusion)' -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(parallel-loop-fusion)' -split-input-file | FileCheck %s func @fuse_empty_loops() { %c2 = constant 2 : index diff --git a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir index 5843eb6d4134a8..d7c0f1d3074e3a 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -parallel-loop-specialization -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -parallel-loop-specialization -split-input-file | FileCheck %s #map0 = affine_map<()[s0, s1] -> (1024, s0 - s1)> #map1 = affine_map<()[s0, s1] -> (64, s0 - s1)> diff --git a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir index 7b37830e8c5db9..14912436f96b27 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func(parallel-loop-tiling{parallel-loop-tile-sizes=1,4})' -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -pass-pipeline='func(parallel-loop-tiling{parallel-loop-tile-sizes=1,4})' -split-input-file | FileCheck %s func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, diff --git a/mlir/test/Dialect/Shape/ops.mlir b/mlir/test/Dialect/Shape/ops.mlir index d25a7f01535e0e..a6668187f078df 100644 --- a/mlir/test/Dialect/Shape/ops.mlir +++ b/mlir/test/Dialect/Shape/ops.mlir @@ -1,8 +1,8 @@ -// RUN: mlir-opt -split-input-file %s | mlir-opt | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -split-input-file %s | mlir-opt | FileCheck %s // Verify the printed output can be parsed. -// RUN: mlir-opt %s | mlir-opt | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s | mlir-opt | FileCheck %s // Verify the generic form can be parsed. -// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s // CHECK-LABEL: shape_num_elements func @shape_num_elements(%shape : !shape.shape) -> !shape.size { diff --git a/mlir/test/Dialect/Shape/shape-to-shape.mlir b/mlir/test/Dialect/Shape/shape-to-shape.mlir index d2338cddc5e1c2..b3be4c9de3a1bd 100644 --- a/mlir/test/Dialect/Shape/shape-to-shape.mlir +++ b/mlir/test/Dialect/Shape/shape-to-shape.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -shape-to-shape-lowering -split-input-file %s | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -shape-to-shape-lowering -split-input-file %s | FileCheck %s // CHECK-LABEL: func @num_elements_to_reduce( // CHECK-SAME: [[ARG:%.*]]: !shape.shape) -> [[SIZE_TY:!.*]] { diff --git a/mlir/test/Dialect/Standard/expand-atomic.mlir b/mlir/test/Dialect/Standard/expand-atomic.mlir index b4e65945f58aeb..2f5cc7c179ed45 100644 --- a/mlir/test/Dialect/Standard/expand-atomic.mlir +++ b/mlir/test/Dialect/Standard/expand-atomic.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -expand-atomic -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -expand-atomic -split-input-file | FileCheck %s // CHECK-LABEL: func @atomic_rmw_to_generic // CHECK-SAME: ([[F:%.*]]: memref<10xf32>, [[f:%.*]]: f32, [[i:%.*]]: index) diff --git a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir index 76d7a9a0e7df0b..da784205224a6e 100644 --- a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s -test-vector-contraction-conversion | FileCheck %s --dump-input-on-failure -// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-lower-matrix-intrinsics=1 | FileCheck %s --check-prefix=MATRIX --dump-input-on-failure -// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-outerproduct=1 | FileCheck %s --check-prefix=OUTERPRODUCT --dump-input-on-failure +// RUN: mlir-opt %s -test-vector-contraction-conversion | FileCheck %s +// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-lower-matrix-intrinsics=1 | FileCheck %s --check-prefix=MATRIX +// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-outerproduct=1 | FileCheck %s --check-prefix=OUTERPRODUCT #dotp_accesses = [ affine_map<(i) -> (i)>, diff --git a/mlir/test/Dialect/Vector/vector-flat-transforms.mlir b/mlir/test/Dialect/Vector/vector-flat-transforms.mlir index e715755738de86..6a1e6ee85a7d47 100644 --- a/mlir/test/Dialect/Vector/vector-flat-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-flat-transforms.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-flat-transpose=1 | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-flat-transpose=1 | FileCheck %s // Tests for lowering 2-D vector.transpose into vector.flat_transpose. // diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp index b48fd99c8f7d54..4d0888e55312ab 100644 --- a/mlir/test/EDSC/builder-api-test.cpp +++ b/mlir/test/EDSC/builder-api-test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// RUN: mlir-edsc-builder-api-test | FileCheck %s -dump-input-on-failure +// RUN: mlir-edsc-builder-api-test | FileCheck %s #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" #include "mlir/Dialect/Linalg/EDSC/Builders.h" diff --git a/mlir/test/IR/print-op-local-scope.mlir b/mlir/test/IR/print-op-local-scope.mlir index 93b25fca943ebc..2ff201cf6debbf 100644 --- a/mlir/test/IR/print-op-local-scope.mlir +++ b/mlir/test/IR/print-op-local-scope.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -mlir-print-local-scope | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -allow-unregistered-dialect %s -mlir-print-local-scope | FileCheck %s // CHECK: "foo.op"() : () -> memref (d0 * 2)>> "foo.op"() : () -> (memref (2*d0)>>) diff --git a/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir b/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir index adf6e30fe6c6c8..97c96008f26910 100644 --- a/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir +++ b/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-buffer-placement-preparation-with-allowed-memref-results -split-input-file %s | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -test-buffer-placement-preparation-with-allowed-memref-results -split-input-file %s | FileCheck %s // Since allowMemrefEscaping is on for Buffer Placement in this test pass, all // tensor typed function results are converted to memref and remain as function diff --git a/mlir/test/Transforms/buffer-placement-preparation.mlir b/mlir/test/Transforms/buffer-placement-preparation.mlir index cae2829ead1757..9b0755aad18009 100644 --- a/mlir/test/Transforms/buffer-placement-preparation.mlir +++ b/mlir/test/Transforms/buffer-placement-preparation.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-buffer-placement-preparation -split-input-file %s | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -test-buffer-placement-preparation -split-input-file %s | FileCheck %s // CHECK-LABEL: func @func_signature_conversion func @func_signature_conversion(%arg0: tensor<4x8xf32>) { diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir index 4b401cc841afee..176e063a700be0 100644 --- a/mlir/test/Transforms/buffer-placement.mlir +++ b/mlir/test/Transforms/buffer-placement.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -buffer-placement -split-input-file %s | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -buffer-placement -split-input-file %s | FileCheck %s // This file checks the behaviour of BufferPlacement pass for moving Alloc and Dealloc // operations and inserting the missing the DeallocOps in their correct positions. diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir index 6e24bb3b2d832a..f1ad305d5c87fd 100644 --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(canonicalize)' -split-input-file | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(canonicalize)' -split-input-file | FileCheck %s // CHECK-LABEL: func @test_subi_zero func @test_subi_zero(%arg0: i32) -> i32 { diff --git a/mlir/test/Transforms/sccp-callgraph.mlir b/mlir/test/Transforms/sccp-callgraph.mlir index add65d9e33c5ae..c30cdf7bfb97de 100644 --- a/mlir/test/Transforms/sccp-callgraph.mlir +++ b/mlir/test/Transforms/sccp-callgraph.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -sccp -split-input-file | FileCheck %s -dump-input-on-failure -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="module(sccp)" -split-input-file | FileCheck %s --check-prefix=NESTED -dump-input-on-failure +// RUN: mlir-opt -allow-unregistered-dialect %s -sccp -split-input-file | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="module(sccp)" -split-input-file | FileCheck %s --check-prefix=NESTED /// Check that a constant is properly propagated through the arguments and /// results of a private function. diff --git a/mlir/test/mlir-tblgen/op-attribute.td b/mlir/test/mlir-tblgen/op-attribute.td index b4c850269a1d26..fc10d4c2d66e57 100644 --- a/mlir/test/mlir-tblgen/op-attribute.td +++ b/mlir/test/mlir-tblgen/op-attribute.td @@ -1,6 +1,6 @@ -// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL --dump-input-on-failure -// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s --check-prefix=DEF --dump-input-on-failure -// RUN: mlir-tblgen -print-records -I %S/../../include %s | FileCheck %s --check-prefix=RECORD --dump-input-on-failure +// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL +// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s --check-prefix=DEF +// RUN: mlir-tblgen -print-records -I %S/../../include %s | FileCheck %s --check-prefix=RECORD include "mlir/IR/OpBase.td" diff --git a/mlir/test/mlir-tblgen/op-decl.td b/mlir/test/mlir-tblgen/op-decl.td index a101103b08fc0f..655d49cbd3a7cf 100644 --- a/mlir/test/mlir-tblgen/op-decl.td +++ b/mlir/test/mlir-tblgen/op-decl.td @@ -1,4 +1,4 @@ -// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck --dump-input-on-failure %s +// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s include "mlir/IR/OpBase.td" include "mlir/Interfaces/InferTypeOpInterface.td" diff --git a/mlir/test/mlir-tblgen/op-derived-attribute.mlir b/mlir/test/mlir-tblgen/op-derived-attribute.mlir index b11df48a319c88..ec4f4dcf7dae42 100644 --- a/mlir/test/mlir-tblgen/op-derived-attribute.mlir +++ b/mlir/test/mlir-tblgen/op-derived-attribute.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-derived-attr -verify-diagnostics %s | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -test-derived-attr -verify-diagnostics %s | FileCheck %s // CHECK-LABEL: verifyDerivedAttributes func @verifyDerivedAttributes() { diff --git a/mlir/test/mlir-tblgen/op-format-spec.td b/mlir/test/mlir-tblgen/op-format-spec.td index 613f3d1d482963..47255d47f8a761 100644 --- a/mlir/test/mlir-tblgen/op-format-spec.td +++ b/mlir/test/mlir-tblgen/op-format-spec.td @@ -1,4 +1,4 @@ -// RUN: mlir-tblgen -gen-op-decls -asmformat-error-is-fatal=false -I %S/../../include %s -o=%t 2>&1 | FileCheck %s --dump-input-on-failure +// RUN: mlir-tblgen -gen-op-decls -asmformat-error-is-fatal=false -I %S/../../include %s -o=%t 2>&1 | FileCheck %s // This file contains tests for the specification of the declarative op format. diff --git a/mlir/test/mlir-tblgen/op-interface.td b/mlir/test/mlir-tblgen/op-interface.td index cb53a77ac0cb36..8e5167e6fe539c 100644 --- a/mlir/test/mlir-tblgen/op-interface.td +++ b/mlir/test/mlir-tblgen/op-interface.td @@ -1,5 +1,5 @@ -// RUN: mlir-tblgen -gen-op-interface-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL --dump-input-on-failure -// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=OP_DECL --dump-input-on-failure +// RUN: mlir-tblgen -gen-op-interface-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL +// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=OP_DECL include "mlir/IR/OpBase.td" diff --git a/mlir/test/mlir-tblgen/pattern.mlir b/mlir/test/mlir-tblgen/pattern.mlir index 50ec1688ddcce4..6154e6bc4c4579 100644 --- a/mlir/test/mlir-tblgen/pattern.mlir +++ b/mlir/test/mlir-tblgen/pattern.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-patterns -mlir-print-debuginfo %s | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -test-patterns -mlir-print-debuginfo %s | FileCheck %s // CHECK-LABEL: verifyFusedLocs func @verifyFusedLocs(%arg0 : i32) -> i32 { diff --git a/mlir/test/mlir-tblgen/predicate.td b/mlir/test/mlir-tblgen/predicate.td index a617208d157a0d..040d2b6de3935e 100644 --- a/mlir/test/mlir-tblgen/predicate.td +++ b/mlir/test/mlir-tblgen/predicate.td @@ -1,4 +1,4 @@ -// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s --dump-input-on-failure +// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s include "mlir/IR/OpBase.td" diff --git a/mlir/test/mlir-tblgen/return-types.mlir b/mlir/test/mlir-tblgen/return-types.mlir index d0eb364a6a9d78..01e6294564d157 100644 --- a/mlir/test/mlir-tblgen/return-types.mlir +++ b/mlir/test/mlir-tblgen/return-types.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-return-type -split-input-file -verify-diagnostics | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -test-return-type -split-input-file -verify-diagnostics | FileCheck %s // CHECK-LABEL: testCreateFunctions // This function tests invoking the create method with different inference