diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6b746cda53c71b0..9091f6341bd9b85 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -541,6 +541,9 @@ Improvements to Clang's diagnostics - Clang emits a ``-Wparentheses`` warning for expressions with consecutive comparisons like ``x < y < z``. Fixes #GH20456. +- Clang no longer emits a "declared here" note for a builtin function that has no declaration in source. + Fixes #GH93369. + Improvements to Clang's time-trace ---------------------------------- @@ -629,6 +632,9 @@ Bug Fixes in This Version - ``__is_array`` and ``__is_bounded_array`` no longer return ``true`` for zero-sized arrays. Fixes (#GH54705). +- Correctly reject declarations where a statement is required in C. + Fixes #GH92775 + Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -803,6 +809,7 @@ Bug Fixes to C++ Support with the same parameters not to be diagnosed. (Fixes #GH93456). - Clang now diagnoses unexpanded parameter packs in attributes. (Fixes #GH93269). - Clang now allows ``@$``` in raw string literals. Fixes (#GH93130). +- Fix an assertion failure when checking invalid ``this`` usage in the wrong context. (Fixes #GH91536). Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 8493026f5f7a692..00b475e5b42824b 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -467,15 +467,18 @@ class Parser : public CodeCompletionHandler { /// Flags describing a context in which we're parsing a statement. enum class ParsedStmtContext { + /// This context permits declarations in language modes where declarations + /// are not statements. + AllowDeclarationsInC = 0x1, /// This context permits standalone OpenMP directives. - AllowStandaloneOpenMPDirectives = 0x1, + AllowStandaloneOpenMPDirectives = 0x2, /// This context is at the top level of a GNU statement expression. - InStmtExpr = 0x2, + InStmtExpr = 0x4, /// The context of a regular substatement. SubStmt = 0, /// The context of a compound-statement. - Compound = AllowStandaloneOpenMPDirectives, + Compound = AllowDeclarationsInC | AllowStandaloneOpenMPDirectives, LLVM_MARK_AS_BITMASK_ENUM(InStmtExpr) }; diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp index 8c77b563657d907..d8e33ff421c06c8 100644 --- a/clang/lib/AST/APValue.cpp +++ b/clang/lib/AST/APValue.cpp @@ -90,7 +90,7 @@ QualType APValue::LValueBase::getType() const { // For a materialized temporary, the type of the temporary we materialized // may not be the type of the expression. if (const MaterializeTemporaryExpr *MTE = - clang::dyn_cast(Base)) { + llvm::dyn_cast(Base)) { SmallVector CommaLHSs; SmallVector Adjustments; const Expr *Temp = MTE->getSubExpr(); diff --git a/clang/lib/Analysis/MacroExpansionContext.cpp b/clang/lib/Analysis/MacroExpansionContext.cpp index 564e359668a5106..b212b7f2457927c 100644 --- a/clang/lib/Analysis/MacroExpansionContext.cpp +++ b/clang/lib/Analysis/MacroExpansionContext.cpp @@ -12,7 +12,7 @@ #define DEBUG_TYPE "macro-expansion-context" -static void dumpTokenInto(const clang::Preprocessor &PP, clang::raw_ostream &OS, +static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS, clang::Token Tok); namespace clang { diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index b0af04451166ca2..c25203243ee49bc 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -239,7 +239,15 @@ StmtResult Parser::ParseStatementOrDeclarationAfterAttributes( auto IsStmtAttr = [](ParsedAttr &Attr) { return Attr.isStmtAttr(); }; bool AllAttrsAreStmtAttrs = llvm::all_of(CXX11Attrs, IsStmtAttr) && llvm::all_of(GNUAttrs, IsStmtAttr); - if (((GNUAttributeLoc.isValid() && !(HaveAttrs && AllAttrsAreStmtAttrs)) || + // In C, the grammar production for statement (C23 6.8.1p1) does not allow + // for declarations, which is different from C++ (C++23 [stmt.pre]p1). So + // in C++, we always allow a declaration, but in C we need to check whether + // we're in a statement context that allows declarations. e.g., in C, the + // following is invalid: if (1) int x; + if ((getLangOpts().CPlusPlus || getLangOpts().MicrosoftExt || + (StmtCtx & ParsedStmtContext::AllowDeclarationsInC) != + ParsedStmtContext()) && + ((GNUAttributeLoc.isValid() && !(HaveAttrs && AllAttrsAreStmtAttrs)) || isDeclarationStatement())) { SourceLocation DeclStart = Tok.getLocation(), DeclEnd; DeclGroupPtrTy Decl; diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index d3e9dcb4f4399a6..6595abbcdda5b11 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1444,10 +1444,10 @@ bool Sema::CheckCXXThisType(SourceLocation Loc, QualType Type) { // category are defined within such member functions as they are within // an implicit object member function). DeclContext *DC = getFunctionLevelDeclContext(); - if (const auto *Method = dyn_cast(DC); - Method && Method->isExplicitObjectMemberFunction()) { + const auto *Method = dyn_cast(DC); + if (Method && Method->isExplicitObjectMemberFunction()) { Diag(Loc, diag::err_invalid_this_use) << 1; - } else if (isLambdaCallWithExplicitObjectParameter(CurContext)) { + } else if (Method && isLambdaCallWithExplicitObjectParameter(CurContext)) { Diag(Loc, diag::err_invalid_this_use) << 1; } else { Diag(Loc, diag::err_invalid_this_use) << 0; diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index ef0a655b631ab42..be6ea20a956a393 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -5897,6 +5897,16 @@ void Sema::diagnoseTypo(const TypoCorrection &Correction, NamedDecl *ChosenDecl = Correction.isKeyword() ? nullptr : Correction.getFoundDecl(); + + // For builtin functions which aren't declared anywhere in source, + // don't emit the "declared here" note. + if (const auto *FD = dyn_cast_if_present(ChosenDecl); + FD && FD->getBuiltinID() && + PrevNote.getDiagID() == diag::note_previous_decl && + Correction.getCorrectionRange().getBegin() == FD->getBeginLoc()) { + ChosenDecl = nullptr; + } + if (PrevNote.getDiagID() && ChosenDecl) Diag(ChosenDecl->getLocation(), PrevNote) << CorrectedQuotedStr << (ErrorRecovery ? FixItHint() : FixTypo); diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index 82373fe96a8243d..6f538ed55cb72e6 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -684,10 +684,8 @@ ExprResult Sema::ActOnCXXAssumeAttr(Stmt *St, const ParsedAttr &A, } if (!getLangOpts().CPlusPlus23 && - A.getSyntax() == AttributeCommonInfo::AS_CXX11) { - llvm::dbgs() << "Syntax: " << int(A.getSyntax()) << "\n"; + A.getSyntax() == AttributeCommonInfo::AS_CXX11) Diag(A.getLoc(), diag::ext_cxx23_attr) << A << Range; - } return Assumption; } diff --git a/clang/test/C/C99/block-scopes.c b/clang/test/C/C99/block-scopes.c index 589047df3e52bcb..116e5d922593e0a 100644 --- a/clang/test/C/C99/block-scopes.c +++ b/clang/test/C/C99/block-scopes.c @@ -18,8 +18,9 @@ enum {a, b}; void different(void) { - if (sizeof(enum {b, a}) != sizeof(int)) + if (sizeof(enum {b, a}) != sizeof(int)) { _Static_assert(a == 1, ""); + } /* In C89, the 'b' found here would have been from the enum declaration in * the controlling expression of the selection statement, not from the global * declaration. In C99 and later, that enumeration is scoped to the 'if' diff --git a/clang/test/Parser/decls.c b/clang/test/Parser/decls.c new file mode 100644 index 000000000000000..39ef05bf4bd9999 --- /dev/null +++ b/clang/test/Parser/decls.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 %s -fsyntax-only -verify -pedantic + +// Test that we can parse declarations at global scope. +int v; + +void func(void) { + // Test that we can parse declarations within a compound statement. + int a; + { + int b; + } + + int z = ({ // expected-warning {{use of GNU statement expression extension}} + // Test that we can parse declarations within a GNU statement expression. + int w = 12; + w; + }); + + // Test that we diagnose declarations where a statement is required. + // See GH92775. + if (1) + int x; // expected-error {{expected expression}} + for (;;) + int c; // expected-error {{expected expression}} + + label: + int y; // expected-warning {{label followed by a declaration is a C23 extension}} + + // Test that lookup works as expected. + (void)a; + (void)v; + (void)z; + (void)b; // expected-error {{use of undeclared identifier 'b'}} + (void)w; // expected-error {{use of undeclared identifier 'w'}} + (void)x; // expected-error {{use of undeclared identifier 'x'}} + (void)c; // expected-error {{use of undeclared identifier 'c'}} + (void)y; +} + diff --git a/clang/test/SemaCXX/invalid-if-constexpr.cpp b/clang/test/SemaCXX/invalid-if-constexpr.cpp index 7643c47488f0574..0007f2739cbbd07 100644 --- a/clang/test/SemaCXX/invalid-if-constexpr.cpp +++ b/clang/test/SemaCXX/invalid-if-constexpr.cpp @@ -4,8 +4,7 @@ namespace GH61885 { void similar() { // expected-note {{'similar' declared here}} if constexpr (similer<>) {} // expected-error {{use of undeclared identifier 'similer'; did you mean 'similar'?}} } -void a() { if constexpr (__adl_swap<>) {}} // expected-error{{use of undeclared identifier '__adl_swap'; did you mean '__sync_swap'?}} \ - // expected-note {{'__sync_swap' declared here}} +void a() { if constexpr (__adl_swap<>) {}} // expected-error{{use of undeclared identifier '__adl_swap'; did you mean '__sync_swap'?}} int AA() { return true;} // expected-note {{'AA' declared here}} diff --git a/clang/test/SemaCXX/invalid-this-in-lambda.cpp b/clang/test/SemaCXX/invalid-this-in-lambda.cpp new file mode 100644 index 000000000000000..ae65bda025e2323 --- /dev/null +++ b/clang/test/SemaCXX/invalid-this-in-lambda.cpp @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s + +decltype([]()->decltype(this) { }) a; // expected-error {{invalid use of 'this' outside of a non-static member function}} + diff --git a/clang/test/SemaCXX/typo-correction-builtin-func.cpp b/clang/test/SemaCXX/typo-correction-builtin-func.cpp new file mode 100644 index 000000000000000..8d369034d1be335 --- /dev/null +++ b/clang/test/SemaCXX/typo-correction-builtin-func.cpp @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +// Test that clang does not emit 'declared here' note for builtin functions that don't have a declaration in source. + +void t0() { + constexpr float A = __builtin_isinfinity(); // expected-error {{use of undeclared identifier '__builtin_isinfinity'; did you mean '__builtin_isfinite'?}} + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} +} diff --git a/clang/test/SemaOpenACC/parallel-loc-and-stmt.c b/clang/test/SemaOpenACC/parallel-loc-and-stmt.c index ba29f6da8ba25d4..bbcdd823483a52d 100644 --- a/clang/test/SemaOpenACC/parallel-loc-and-stmt.c +++ b/clang/test/SemaOpenACC/parallel-loc-and-stmt.c @@ -33,9 +33,11 @@ int foo3; void func() { // FIXME: Should we disallow this on declarations, or consider this to be on - // the initialization? + // the initialization? This is currently rejected in C because + // Parser::ParseOpenACCDirectiveStmt() calls ParseStatement() and passes the + // statement context as "SubStmt" which does not allow for a declaration in C. #pragma acc parallel - int foo; + int foo; // expected-error {{expected expression}} #pragma acc parallel { diff --git a/clang/unittests/Interpreter/IncrementalProcessingTest.cpp b/clang/unittests/Interpreter/IncrementalProcessingTest.cpp index 54159173d91e39b..f3b091b0c0e6cbb 100644 --- a/clang/unittests/Interpreter/IncrementalProcessingTest.cpp +++ b/clang/unittests/Interpreter/IncrementalProcessingTest.cpp @@ -36,14 +36,6 @@ using namespace clang; namespace { -static bool HostSupportsJit() { - auto J = llvm::orc::LLJITBuilder().create(); - if (J) - return true; - LLVMConsumeError(llvm::wrap(J.takeError())); - return false; -} - // Incremental processing produces several modules, all using the same "main // file". Make sure CodeGen can cope with that, e.g. for static initializers. const char TestProgram1[] = "extern \"C\" int funcForProg1() { return 17; }\n" diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp index 9d1612beae872e4..635ded554497bad 100644 --- a/lld/MachO/ObjC.cpp +++ b/lld/MachO/ObjC.cpp @@ -379,12 +379,21 @@ class ObjcCategoryMerger { InfoWriteSection catPtrListInfo; }; - // Information about a pointer list in the original categories (method lists, - // protocol lists, etc) + // Information about a pointer list in the original categories or class(method + // lists, protocol lists, etc) struct PointerListInfo { + PointerListInfo() = default; + PointerListInfo(const PointerListInfo &) = default; PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct) : categoryPrefix(_categoryPrefix), pointersPerStruct(_pointersPerStruct) {} + + inline bool operator==(const PointerListInfo &cmp) { + return pointersPerStruct == cmp.pointersPerStruct && + structSize == cmp.structSize && structCount == cmp.structCount && + allPtrs == cmp.allPtrs; + } + const char *categoryPrefix; uint32_t pointersPerStruct = 0; @@ -395,9 +404,9 @@ class ObjcCategoryMerger { std::vector allPtrs; }; - // Full information about all the categories that extend a class. This will - // include all the additional methods, protocols, and properties that are - // contained in all the categories that extend a particular class. + // Full information describing an ObjC class . This will include all the + // additional methods, protocols, and properties that are contained in the + // class and all the categories that extend a particular class. struct ClassExtensionInfo { ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){}; @@ -449,6 +458,9 @@ class ObjcCategoryMerger { void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset, PointerListInfo &ptrList); + PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec, + uint32_t secOffset); + void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset, PointerListInfo &ptrList); @@ -456,9 +468,9 @@ class ObjcCategoryMerger { const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList); - void emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset, - const ClassExtensionInfo &extInfo, - const PointerListInfo &ptrList); + Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset, + const ClassExtensionInfo &extInfo, + const PointerListInfo &ptrList); Defined *emitCategory(const ClassExtensionInfo &extInfo); Defined *emitCatListEntrySec(const std::string &forCategoryName, @@ -474,6 +486,10 @@ class ObjcCategoryMerger { uint32_t offset); Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, uint32_t offset); + Defined *getClassRo(const Defined *classSym, bool getMetaRo); + void mergeCategoriesIntoBaseClass(const Defined *baseClass, + std::vector &categories); + void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset); void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec, uint32_t offset); @@ -552,6 +568,29 @@ ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, return dyn_cast_or_null(sym); } +// Get the class's ro_data symbol. If getMetaRo is true, then we will return +// the meta-class's ro_data symbol. Otherwise, we will return the class +// (instance) ro_data symbol. +Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym, + bool getMetaRo) { + ConcatInputSection *isec = dyn_cast(classSym->isec()); + if (!isec) + return nullptr; + + if (!getMetaRo) + return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset + + classSym->value); + + Defined *metaClass = tryGetDefinedAtIsecOffset( + isec, classLayout.metaClassOffset + classSym->value); + if (!metaClass) + return nullptr; + + return tryGetDefinedAtIsecOffset( + dyn_cast(metaClass->isec()), + classLayout.roDataOffset); +} + // Given an ConcatInputSection or CStringInputSection and an offset, if there is // a symbol(Defined) at that offset, then erase the symbol (mark it not live) void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset( @@ -663,6 +702,15 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec, "Protocol list end offset does not match expected size"); } +// Parse a protocol list and return the PointerListInfo for it +ObjcCategoryMerger::PointerListInfo +ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec, + uint32_t secOffset) { + PointerListInfo ptrList; + parseProtocolListInfo(isec, secOffset, ptrList); + return ptrList; +} + // Parse a pointer list that might be linked to ConcatInputSection at a given // offset. This can be used for instance methods, class methods, instance props // and class props since they have the same format. @@ -769,11 +817,11 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo, // Generate a protocol list (including header) and link it into the parent at // the specified offset. -void ObjcCategoryMerger::emitAndLinkProtocolList( +Defined *ObjcCategoryMerger::emitAndLinkProtocolList( Defined *parentSym, uint32_t linkAtOffset, const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) { if (ptrList.allPtrs.empty()) - return; + return nullptr; assert(ptrList.allPtrs.size() == ptrList.structCount); @@ -820,6 +868,8 @@ void ObjcCategoryMerger::emitAndLinkProtocolList( infoCategoryWriter.catPtrListInfo.relocTemplate); offset += target->wordSize; } + + return ptrListSym; } // Generate a pointer list (including header) and link it into the parent at the @@ -1265,10 +1315,15 @@ void ObjcCategoryMerger::removeRefsToErasedIsecs() { void ObjcCategoryMerger::doMerge() { collectAndValidateCategoriesData(); - for (auto &entry : categoryMap) - if (entry.second.size() > 1) + for (auto &[baseClass, catInfos] : categoryMap) { + if (auto *baseClassDef = dyn_cast(baseClass)) { + // Merge all categories into the base class + mergeCategoriesIntoBaseClass(baseClassDef, catInfos); + } else if (catInfos.size() > 1) { // Merge all categories into a new, single category - mergeCategoriesIntoSingleCategory(entry.second); + mergeCategoriesIntoSingleCategory(catInfos); + } + } // Erase all categories that were merged eraseMergedCategories(); @@ -1302,3 +1357,101 @@ void objc::mergeCategories() { } void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); } + +void ObjcCategoryMerger::mergeCategoriesIntoBaseClass( + const Defined *baseClass, std::vector &categories) { + assert(categories.size() >= 1 && "Expected at least one category to merge"); + + // Collect all the info from the categories + ClassExtensionInfo extInfo(catLayout); + for (auto &catInfo : categories) { + parseCatInfoToExtInfo(catInfo, extInfo); + } + + // Get metadata for the base class + Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true); + ConcatInputSection *metaIsec = dyn_cast(metaRo->isec()); + Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false); + ConcatInputSection *classIsec = dyn_cast(classRo->isec()); + + // Now collect the info from the base class from the various lists in the + // class metadata + + // Protocol lists are a special case - the same protocol list is in classRo + // and metaRo, so we only need to parse it once + parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset, + extInfo.protocols); + + // Check that the classRo and metaRo protocol lists are identical + assert( + parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset) == + parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset) && + "Category merger expects classRo and metaRo to have the same protocol " + "list"); + + parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset, + extInfo.classMethods); + parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset, + extInfo.instanceMethods); + + parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset, + extInfo.classProps); + parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset, + extInfo.instanceProps); + + // Erase the old lists - these will be generated and replaced + eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset); + eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset); + eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset); + eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset); + eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset); + eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset); + + // Emit the newly merged lists - first into the meta RO then into the class RO + // First we emit and link the protocol list into the meta RO. Then we link it + // in the classRo as well (they're supposed to be identical) + if (Defined *protoListSym = + emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset, + extInfo, extInfo.protocols)) { + createSymbolReference(classRo, protoListSym, + roClassLayout.baseProtocolsOffset, + infoCategoryWriter.catBodyInfo.relocTemplate); + } + + emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo, + extInfo.classMethods); + emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo, + extInfo.instanceMethods); + + emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo, + extInfo.classProps); + + emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo, + extInfo.instanceProps); + + // Mark all the categories as merged - this will be used to erase them later + for (auto &catInfo : categories) + catInfo.wasMerged = true; +} + +// Erase the symbol at a given offset in an InputSection +void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec, + uint32_t offset) { + Defined *sym = tryGetDefinedAtIsecOffset(isec, offset); + if (!sym) + return; + + // Remove the symbol from isec->symbols + assert(isa(sym) && "Can only erase a Defined"); + llvm::erase(isec->symbols, sym); + + // Remove the relocs that refer to this symbol + auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; }; + llvm::erase_if(isec->relocs, removeAtOff); + + // Now, if the symbol fully occupies a ConcatInputSection, we can also erase + // the whole ConcatInputSection + if (ConcatInputSection *cisec = dyn_cast(sym->isec())) + if (cisec->data.size() == sym->size) + eraseISec(cisec); +} diff --git a/lld/test/MachO/objc-category-merging-complete-test.s b/lld/test/MachO/objc-category-merging-complete-test.s index 74400177b550dcb..cf3e19e2f9c8b4a 100644 --- a/lld/test/MachO/objc-category-merging-complete-test.s +++ b/lld/test/MachO/objc-category-merging-complete-test.s @@ -1,6 +1,7 @@ # REQUIRES: aarch64 # RUN: rm -rf %t; split-file %s %t && cd %t +############ Test merging multiple categories into a single category ############ ## Create a dylib to link against(a64_file1.dylib) and merge categories in the main binary (file2_merge_a64.exe) # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o a64_file1.o a64_file1.s # RUN: %lld -arch arm64 a64_file1.o -o a64_file1.dylib -dylib @@ -12,6 +13,10 @@ # RUN: llvm-objdump --objc-meta-data --macho a64_file2_no_merge.exe | FileCheck %s --check-prefixes=NO_MERGE_CATS # RUN: llvm-objdump --objc-meta-data --macho a64_file2_merge.exe | FileCheck %s --check-prefixes=MERGE_CATS +############ Test merging multiple categories into the base class ############ +# RUN: %lld -arch arm64 -o a64_file2_merge_into_class.exe -objc_category_merging a64_file1.o a64_file2.o +# RUN: llvm-objdump --objc-meta-data --macho a64_file2_merge_into_class.exe | FileCheck %s --check-prefixes=MERGE_CATS_CLS + MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass(Category02|Category03) MERGE_CATS-NEXT: name {{.*}} Category02|Category03 @@ -101,6 +106,211 @@ NO_MERGE_CATS-NEXT: 24 NO_MERGE_CATS-NEXT: 2 +MERGE_CATS_CLS: _OBJC_CLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: isa {{.*}} _OBJC_METACLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: superclass 0x0 +MERGE_CATS_CLS-NEXT: cache {{.*}} __objc_empty_cache +MERGE_CATS_CLS-NEXT: vtable 0x0 +MERGE_CATS_CLS-NEXT: data {{.*}} (struct class_ro_t *) +MERGE_CATS_CLS-NEXT: flags 0x2 RO_ROOT +MERGE_CATS_CLS-NEXT: instanceStart 0 +MERGE_CATS_CLS-NEXT: instanceSize 4 +MERGE_CATS_CLS-NEXT: reserved 0x0 +MERGE_CATS_CLS-NEXT: ivarLayout 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyBaseClass +MERGE_CATS_CLS-NEXT: baseMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 8 +MERGE_CATS_CLS-NEXT: name {{.*}} class02InstanceMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category02) class02InstanceMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol02Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category02) myProtocol02Method] +MERGE_CATS_CLS-NEXT: name {{.*}} class03InstanceMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category03) class03InstanceMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol03Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category03) myProtocol03Method] +MERGE_CATS_CLS-NEXT: name {{.*}} baseInstanceMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass baseInstanceMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol01Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass myProtocol01Method] +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass MyProtocol01Prop] +MERGE_CATS_CLS-NEXT: name {{.*}} setMyProtocol01Prop: +MERGE_CATS_CLS-NEXT: types {{.*}} v20@0:8i16 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass setMyProtocol01Prop:] +MERGE_CATS_CLS-NEXT: baseProtocols {{.*}} +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: list[0] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol02Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[1] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol03Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[2] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol01Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} setMyProtocol01Prop: +MERGE_CATS_CLS-NEXT: types {{.*}} v20@0:8i16 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: ivars {{.*}} +MERGE_CATS_CLS-NEXT: entsize 32 +MERGE_CATS_CLS-NEXT: count 1 +MERGE_CATS_CLS-NEXT: offset {{.*}} 0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: type {{.*}} i +MERGE_CATS_CLS-NEXT: alignment 2 +MERGE_CATS_CLS-NEXT: size 4 +MERGE_CATS_CLS-NEXT: weakIvarLayout 0x0 +MERGE_CATS_CLS-NEXT: baseProperties {{.*}} +MERGE_CATS_CLS-NEXT: entsize 16 +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: attributes {{.*}} Ti,R,D +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: attributes {{.*}} Ti,R,D +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: attributes {{.*}} Ti,N,VMyProtocol01Prop +MERGE_CATS_CLS-NEXT: Meta Class +MERGE_CATS_CLS-NEXT: isa {{.*}} _OBJC_METACLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: superclass {{.*}} _OBJC_CLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: cache {{.*}} __objc_empty_cache +MERGE_CATS_CLS-NEXT: vtable 0x0 +MERGE_CATS_CLS-NEXT: data {{.*}} (struct class_ro_t *) +MERGE_CATS_CLS-NEXT: flags 0x3 RO_META RO_ROOT +MERGE_CATS_CLS-NEXT: instanceStart 40 +MERGE_CATS_CLS-NEXT: instanceSize 40 +MERGE_CATS_CLS-NEXT: reserved 0x0 +MERGE_CATS_CLS-NEXT: ivarLayout 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyBaseClass +MERGE_CATS_CLS-NEXT: baseMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 5 +MERGE_CATS_CLS-NEXT: name {{.*}} class02ClassMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category02) class02ClassMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category02) MyProtocol02Prop] +MERGE_CATS_CLS-NEXT: name {{.*}} class03ClassMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category03) class03ClassMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category03) MyProtocol03Prop] +MERGE_CATS_CLS-NEXT: name {{.*}} baseClassMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass baseClassMethod] +MERGE_CATS_CLS-NEXT: baseProtocols {{.*}} +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: list[0] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol02Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[1] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol03Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[2] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol01Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} setMyProtocol01Prop: +MERGE_CATS_CLS-NEXT: types {{.*}} v20@0:8i16 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: ivars 0x0 +MERGE_CATS_CLS-NEXT: weakIvarLayout 0x0 +MERGE_CATS_CLS-NEXT: baseProperties 0x0 +MERGE_CATS_CLS: __OBJC_$_CATEGORY_MyBaseClass_$_Category04 + + #--- a64_file1.s ## @protocol MyProtocol01 diff --git a/lld/test/MachO/objc-category-merging-extern-class-minimal.s b/lld/test/MachO/objc-category-merging-minimal.s similarity index 59% rename from lld/test/MachO/objc-category-merging-extern-class-minimal.s rename to lld/test/MachO/objc-category-merging-minimal.s index 5dd8924df5ad683..fcd90f178b150e0 100644 --- a/lld/test/MachO/objc-category-merging-extern-class-minimal.s +++ b/lld/test/MachO/objc-category-merging-minimal.s @@ -1,7 +1,8 @@ # REQUIRES: aarch64 # RUN: rm -rf %t; split-file %s %t && cd %t -## Create a dylib with a fake base class to link against +############ Test merging multiple categories into a single category ############ +## Create a dylib with a fake base class to link against in when merging between categories # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o a64_fakedylib.o a64_fakedylib.s # RUN: %lld -arch arm64 a64_fakedylib.o -o a64_fakedylib.dylib -dylib @@ -14,6 +15,15 @@ # RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_CATS # RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_merge.dylib | FileCheck %s --check-prefixes=MERGE_CATS +############ Test merging multiple categories into the base class ############ +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_base_class_minimal.o merge_base_class_minimal.s +# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_yes_merge.dylib -objc_category_merging merge_base_class_minimal.o merge_cat_minimal.o +# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_no_merge.dylib merge_base_class_minimal.o merge_cat_minimal.o + +# RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_INTO_BASE +# RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_yes_merge.dylib | FileCheck %s --check-prefixes=YES_MERGE_INTO_BASE + + #### Check merge categories enabled ### # Check that the original categories are not there MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 @@ -44,6 +54,28 @@ NO_MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 NO_MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 +#### Check merge cateogires into base class is disabled #### +NO_MERGE_INTO_BASE: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 +NO_MERGE_INTO_BASE: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 + +#### Check merge cateogires into base class is enabled and categories are merged into base class #### +YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 +YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 + +YES_MERGE_INTO_BASE: _OBJC_CLASS_$_MyBaseClass +YES_MERGE_INTO_BASE-NEXT: _OBJC_METACLASS_$_MyBaseClass +YES_MERGE_INTO_BASE: baseMethods +YES_MERGE_INTO_BASE-NEXT: entsize 24 +YES_MERGE_INTO_BASE-NEXT: count 3 +YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat01_InstanceMethod +YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 +YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category01) cat01_InstanceMethod] +YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat02_InstanceMethod +YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 +YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category02) cat02_InstanceMethod] +YES_MERGE_INTO_BASE-NEXT: name {{.*}} baseInstanceMethod +YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 +YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass baseInstanceMethod] #--- a64_fakedylib.s @@ -156,3 +188,94 @@ L_OBJC_IMAGE_INFO: .addrsig .addrsig_sym __OBJC_$_CATEGORY_MyBaseClass_$_Category01 + +#--- merge_base_class_minimal.s +; clang -c merge_base_class_minimal.mm -O3 -target arm64-apple-macos -arch arm64 -S -o merge_base_class_minimal.s +; ================== Generated from ObjC: ================== +; __attribute__((objc_root_class)) +; @interface MyBaseClass +; - (void)baseInstanceMethod; +; @end +; +; @implementation MyBaseClass +; - (void)baseInstanceMethod {} +; @end +; ================== Generated from ObjC ================== + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 11, 0 + .p2align 2 +"-[MyBaseClass baseInstanceMethod]": + .cfi_startproc +; %bb.0: + ret + .cfi_endproc + .section __DATA,__objc_data + .globl _OBJC_CLASS_$_MyBaseClass + .p2align 3, 0x0 +_OBJC_CLASS_$_MyBaseClass: + .quad _OBJC_METACLASS_$_MyBaseClass + .quad 0 + .quad 0 + .quad 0 + .quad __OBJC_CLASS_RO_$_MyBaseClass + .globl _OBJC_METACLASS_$_MyBaseClass + .p2align 3, 0x0 +_OBJC_METACLASS_$_MyBaseClass: + .quad _OBJC_METACLASS_$_MyBaseClass + .quad _OBJC_CLASS_$_MyBaseClass + .quad 0 + .quad 0 + .quad __OBJC_METACLASS_RO_$_MyBaseClass + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_: + .asciz "MyBaseClass" + .section __DATA,__objc_const + .p2align 3, 0x0 +__OBJC_METACLASS_RO_$_MyBaseClass: + .long 3 + .long 40 + .long 40 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_: + .asciz "baseInstanceMethod" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_: + .asciz "v16@0:8" + .section __DATA,__objc_const + .p2align 3, 0x0 +__OBJC_$_INSTANCE_METHODS_MyBaseClass: + .long 24 + .long 1 + .quad l_OBJC_METH_VAR_NAME_ + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyBaseClass baseInstanceMethod]" + .p2align 3, 0x0 +__OBJC_CLASS_RO_$_MyBaseClass: + .long 2 + .long 0 + .long 0 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_INSTANCE_METHODS_MyBaseClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .section __DATA,__objc_classlist,regular,no_dead_strip + .p2align 3, 0x0 +l_OBJC_LABEL_CLASS_$: + .quad _OBJC_CLASS_$_MyBaseClass + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 64 +.subsections_via_symbols diff --git a/lldb/bindings/headers.swig b/lldb/bindings/headers.swig index ffdc3c31ec883ac..c91504604b6ac68 100644 --- a/lldb/bindings/headers.swig +++ b/lldb/bindings/headers.swig @@ -8,6 +8,8 @@ %{ #include "lldb/lldb-public.h" #include "lldb/API/SBAddress.h" +#include "lldb/API/SBAddressRange.h" +#include "lldb/API/SBAddressRangeList.h" #include "lldb/API/SBAttachInfo.h" #include "lldb/API/SBBlock.h" #include "lldb/API/SBBreakpoint.h" diff --git a/lldb/bindings/interface/SBAddressRangeDocstrings.i b/lldb/bindings/interface/SBAddressRangeDocstrings.i new file mode 100644 index 000000000000000..650195704d73e6e --- /dev/null +++ b/lldb/bindings/interface/SBAddressRangeDocstrings.i @@ -0,0 +1,3 @@ +%feature("docstring", +"API clients can get address range information." +) lldb::SBAddressRange; diff --git a/lldb/bindings/interface/SBAddressRangeExtensions.i b/lldb/bindings/interface/SBAddressRangeExtensions.i new file mode 100644 index 000000000000000..31bcfcb64590bcd --- /dev/null +++ b/lldb/bindings/interface/SBAddressRangeExtensions.i @@ -0,0 +1,11 @@ +%extend lldb::SBAddressRange { +#ifdef SWIGPYTHON + %pythoncode%{ + def __repr__(self): + import lldb + stream = lldb.SBStream() + self.GetDescription(stream, lldb.target if lldb.target else lldb.SBTarget()) + return stream.GetData() + %} +#endif +} diff --git a/lldb/bindings/interface/SBAddressRangeListDocstrings.i b/lldb/bindings/interface/SBAddressRangeListDocstrings.i new file mode 100644 index 000000000000000..e4b96b9ca59312d --- /dev/null +++ b/lldb/bindings/interface/SBAddressRangeListDocstrings.i @@ -0,0 +1,3 @@ +%feature("docstring", +"Represents a list of :py:class:`SBAddressRange`." +) lldb::SBAddressRangeList; diff --git a/lldb/bindings/interface/SBAddressRangeListExtensions.i b/lldb/bindings/interface/SBAddressRangeListExtensions.i new file mode 100644 index 000000000000000..e281a84d73d27df --- /dev/null +++ b/lldb/bindings/interface/SBAddressRangeListExtensions.i @@ -0,0 +1,29 @@ +%extend lldb::SBAddressRangeList { +#ifdef SWIGPYTHON + %pythoncode%{ + def __len__(self): + '''Return the number of address ranges in a lldb.SBAddressRangeList object.''' + return self.GetSize() + + def __iter__(self): + '''Iterate over all the address ranges in a lldb.SBAddressRangeList object.''' + return lldb_iter(self, 'GetSize', 'GetAddressRangeAtIndex') + + def __getitem__(self, idx): + '''Get the address range at a given index in an lldb.SBAddressRangeList object.''' + if not isinstance(idx, int): + raise TypeError("unsupported index type: %s" % type(idx)) + count = len(self) + if not (-count <= idx < count): + raise IndexError("list index out of range") + idx %= count + return self.GetAddressRangeAtIndex(idx) + + def __repr__(self): + import lldb + stream = lldb.SBStream() + self.GetDescription(stream, lldb.target if lldb.target else lldb.SBTarget()) + return stream.GetData() + %} +#endif +} diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig index 2a29a8dd7ef0b4d..0953f4c72a9101b 100644 --- a/lldb/bindings/interfaces.swig +++ b/lldb/bindings/interfaces.swig @@ -12,6 +12,8 @@ /* Docstrings for SB classes and methods */ %include "./interface/SBAddressDocstrings.i" +%include "./interface/SBAddressRangeDocstrings.i" +%include "./interface/SBAddressRangeListDocstrings.i" %include "./interface/SBAttachInfoDocstrings.i" %include "./interface/SBBlockDocstrings.i" %include "./interface/SBBreakpointDocstrings.i" @@ -86,6 +88,8 @@ /* API headers */ %include "lldb/API/SBAddress.h" +%include "lldb/API/SBAddressRange.h" +%include "lldb/API/SBAddressRangeList.h" %include "lldb/API/SBAttachInfo.h" %include "lldb/API/SBBlock.h" %include "lldb/API/SBBreakpoint.h" @@ -163,6 +167,8 @@ /* Extensions for SB classes */ %include "./interface/SBAddressExtensions.i" +%include "./interface/SBAddressRangeExtensions.i" +%include "./interface/SBAddressRangeListExtensions.i" %include "./interface/SBBlockExtensions.i" %include "./interface/SBBreakpointExtensions.i" %include "./interface/SBBreakpointListExtensions.i" diff --git a/lldb/include/lldb/API/LLDB.h b/lldb/include/lldb/API/LLDB.h index b256544326a224e..d8cc9f5067fe94d 100644 --- a/lldb/include/lldb/API/LLDB.h +++ b/lldb/include/lldb/API/LLDB.h @@ -10,6 +10,8 @@ #define LLDB_API_LLDB_H #include "lldb/API/SBAddress.h" +#include "lldb/API/SBAddressRange.h" +#include "lldb/API/SBAddressRangeList.h" #include "lldb/API/SBAttachInfo.h" #include "lldb/API/SBBlock.h" #include "lldb/API/SBBreakpoint.h" diff --git a/lldb/include/lldb/API/SBAddress.h b/lldb/include/lldb/API/SBAddress.h index 5e5f355ccc390c4..430dad4862dbffa 100644 --- a/lldb/include/lldb/API/SBAddress.h +++ b/lldb/include/lldb/API/SBAddress.h @@ -86,6 +86,7 @@ class LLDB_API SBAddress { lldb::SBLineEntry GetLineEntry(); protected: + friend class SBAddressRange; friend class SBBlock; friend class SBBreakpoint; friend class SBBreakpointLocation; diff --git a/lldb/include/lldb/API/SBAddressRange.h b/lldb/include/lldb/API/SBAddressRange.h new file mode 100644 index 000000000000000..152bd82426af1c3 --- /dev/null +++ b/lldb/include/lldb/API/SBAddressRange.h @@ -0,0 +1,66 @@ +//===-- SBAddressRange.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_API_SBADDRESSRANGE_H +#define LLDB_API_SBADDRESSRANGE_H + +#include "lldb/API/SBDefines.h" + +namespace lldb { + +class LLDB_API SBAddressRange { +public: + SBAddressRange(); + + SBAddressRange(const lldb::SBAddressRange &rhs); + + SBAddressRange(lldb::SBAddress addr, lldb::addr_t byte_size); + + ~SBAddressRange(); + + const lldb::SBAddressRange &operator=(const lldb::SBAddressRange &rhs); + + void Clear(); + + /// Check the address range refers to a valid base address and has a byte + /// size greater than zero. + /// + /// \return + /// True if the address range is valid, false otherwise. + bool IsValid() const; + + /// Get the base address of the range. + /// + /// \return + /// Base address object. + lldb::SBAddress GetBaseAddress() const; + + /// Get the byte size of this range. + /// + /// \return + /// The size in bytes of this address range. + lldb::addr_t GetByteSize() const; + + bool operator==(const SBAddressRange &rhs); + + bool operator!=(const SBAddressRange &rhs); + + bool GetDescription(lldb::SBStream &description, const SBTarget target); + +private: + friend class SBAddressRangeList; + friend class SBBlock; + friend class SBFunction; + friend class SBProcess; + + AddressRangeUP m_opaque_up; +}; + +} // namespace lldb + +#endif // LLDB_API_SBADDRESSRANGE_H diff --git a/lldb/include/lldb/API/SBAddressRangeList.h b/lldb/include/lldb/API/SBAddressRangeList.h new file mode 100644 index 000000000000000..a123287ef1b4fa3 --- /dev/null +++ b/lldb/include/lldb/API/SBAddressRangeList.h @@ -0,0 +1,54 @@ +//===-- SBAddressRangeList.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_API_SBADDRESSRANGELIST_H +#define LLDB_API_SBADDRESSRANGELIST_H + +#include + +#include "lldb/API/SBDefines.h" + +namespace lldb_private { +class AddressRangeListImpl; +} + +namespace lldb { + +class LLDB_API SBAddressRangeList { +public: + SBAddressRangeList(); + + SBAddressRangeList(const lldb::SBAddressRangeList &rhs); + + ~SBAddressRangeList(); + + const lldb::SBAddressRangeList & + operator=(const lldb::SBAddressRangeList &rhs); + + uint32_t GetSize() const; + + void Clear(); + + SBAddressRange GetAddressRangeAtIndex(uint64_t idx); + + void Append(const lldb::SBAddressRange &addr_range); + + void Append(const lldb::SBAddressRangeList &addr_range_list); + + bool GetDescription(lldb::SBStream &description, const SBTarget &target); + +private: + friend class SBBlock; + friend class SBProcess; + + std::unique_ptr m_opaque_up; +}; + +} // namespace lldb + +#endif // LLDB_API_SBADDRESSRANGELIST_H diff --git a/lldb/include/lldb/API/SBBlock.h b/lldb/include/lldb/API/SBBlock.h index 2570099f7652f3a..de4bb22be269256 100644 --- a/lldb/include/lldb/API/SBBlock.h +++ b/lldb/include/lldb/API/SBBlock.h @@ -9,6 +9,8 @@ #ifndef LLDB_API_SBBLOCK_H #define LLDB_API_SBBLOCK_H +#include "lldb/API/SBAddressRange.h" +#include "lldb/API/SBAddressRangeList.h" #include "lldb/API/SBDefines.h" #include "lldb/API/SBFrame.h" #include "lldb/API/SBTarget.h" @@ -52,6 +54,8 @@ class LLDB_API SBBlock { lldb::SBAddress GetRangeEndAddress(uint32_t idx); + lldb::SBAddressRangeList GetRanges(); + uint32_t GetRangeIndexForBlockAddress(lldb::SBAddress block_addr); lldb::SBValueList GetVariables(lldb::SBFrame &frame, bool arguments, diff --git a/lldb/include/lldb/API/SBDefines.h b/lldb/include/lldb/API/SBDefines.h index 1181920677b46f5..87c0a1c3661ca30 100644 --- a/lldb/include/lldb/API/SBDefines.h +++ b/lldb/include/lldb/API/SBDefines.h @@ -43,6 +43,8 @@ namespace lldb { class LLDB_API SBAddress; +class LLDB_API SBAddressRange; +class LLDB_API SBAddressRangeList; class LLDB_API SBAttachInfo; class LLDB_API SBBlock; class LLDB_API SBBreakpoint; diff --git a/lldb/include/lldb/API/SBFunction.h b/lldb/include/lldb/API/SBFunction.h index 71b372a818e4b5e..df607fdc7ebf59d 100644 --- a/lldb/include/lldb/API/SBFunction.h +++ b/lldb/include/lldb/API/SBFunction.h @@ -10,6 +10,7 @@ #define LLDB_API_SBFUNCTION_H #include "lldb/API/SBAddress.h" +#include "lldb/API/SBAddressRangeList.h" #include "lldb/API/SBDefines.h" #include "lldb/API/SBInstructionList.h" @@ -44,6 +45,8 @@ class LLDB_API SBFunction { lldb::SBAddress GetEndAddress(); + lldb::SBAddressRangeList GetRanges(); + const char *GetArgumentName(uint32_t arg_idx); uint32_t GetPrologueByteSize(); diff --git a/lldb/include/lldb/API/SBStream.h b/lldb/include/lldb/API/SBStream.h index 0e33f05b69916f2..71caf41fd754913 100644 --- a/lldb/include/lldb/API/SBStream.h +++ b/lldb/include/lldb/API/SBStream.h @@ -62,6 +62,8 @@ class LLDB_API SBStream { protected: friend class SBAddress; + friend class SBAddressRange; + friend class SBAddressRangeList; friend class SBBlock; friend class SBBreakpoint; friend class SBBreakpointLocation; diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h index feeaa1cb71132bc..35c2ed9c20a2387 100644 --- a/lldb/include/lldb/API/SBTarget.h +++ b/lldb/include/lldb/API/SBTarget.h @@ -943,6 +943,7 @@ class LLDB_API SBTarget { protected: friend class SBAddress; + friend class SBAddressRange; friend class SBBlock; friend class SBBreakpoint; friend class SBBreakpointList; diff --git a/lldb/include/lldb/Core/AddressRange.h b/lldb/include/lldb/Core/AddressRange.h index 4a33c2d79587653..68a3ad0edd2d794 100644 --- a/lldb/include/lldb/Core/AddressRange.h +++ b/lldb/include/lldb/Core/AddressRange.h @@ -86,6 +86,8 @@ class AddressRange { /// (LLDB_INVALID_ADDRESS) and a zero byte size. void Clear(); + bool IsValid() const; + /// Check if a section offset address is contained in this range. /// /// \param[in] so_addr @@ -236,12 +238,24 @@ class AddressRange { /// The new size in bytes of this address range. void SetByteSize(lldb::addr_t byte_size) { m_byte_size = byte_size; } + bool GetDescription(Stream *s, Target *target) const; + + bool operator==(const AddressRange &rhs); + + bool operator!=(const AddressRange &rhs); + protected: // Member variables Address m_base_addr; ///< The section offset base address of this range. lldb::addr_t m_byte_size = 0; ///< The size in bytes of this address range. }; +// Forward-declarable wrapper. +class AddressRanges : public std::vector { +public: + using std::vector::vector; +}; + } // namespace lldb_private #endif // LLDB_CORE_ADDRESSRANGE_H diff --git a/lldb/include/lldb/Core/AddressRangeListImpl.h b/lldb/include/lldb/Core/AddressRangeListImpl.h new file mode 100644 index 000000000000000..46ebfe73d4d92d2 --- /dev/null +++ b/lldb/include/lldb/Core/AddressRangeListImpl.h @@ -0,0 +1,51 @@ +//===-- AddressRangeListImpl.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_CORE_ADDRESSRANGELISTIMPL_H +#define LLDB_CORE_ADDRESSRANGELISTIMPL_H + +#include "lldb/Core/AddressRange.h" +#include + +namespace lldb { +class SBBlock; +} + +namespace lldb_private { + +class AddressRangeListImpl { +public: + AddressRangeListImpl(); + + AddressRangeListImpl(const AddressRangeListImpl &rhs) = default; + + AddressRangeListImpl &operator=(const AddressRangeListImpl &rhs); + + size_t GetSize() const; + + void Reserve(size_t capacity); + + void Append(const AddressRange &sb_region); + + void Append(const AddressRangeListImpl &list); + + void Clear(); + + lldb_private::AddressRange GetAddressRangeAtIndex(size_t index); + +private: + friend class lldb::SBBlock; + + AddressRanges &ref(); + + AddressRanges m_ranges; +}; + +} // namespace lldb_private + +#endif // LLDB_CORE_ADDRESSRANGE_H diff --git a/lldb/include/lldb/Symbol/Block.h b/lldb/include/lldb/Symbol/Block.h index 02fd2add5310334..c9c4d5ad767d7e8 100644 --- a/lldb/include/lldb/Symbol/Block.h +++ b/lldb/include/lldb/Symbol/Block.h @@ -355,6 +355,8 @@ class Block : public UserID, public SymbolContextScope { // be able to get at any of the address ranges in a block. bool GetRangeAtIndex(uint32_t range_idx, AddressRange &range); + AddressRanges GetRanges(); + bool GetStartAddress(Address &addr); void SetDidParseVariables(bool b, bool set_children); diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index 10ba921b9dac8c5..6d880b4da03c99d 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -19,6 +19,8 @@ class ASTResultSynthesizer; class ASTStructExtractor; class Address; class AddressRange; +class AddressRanges; +class AddressRangeList; class AddressResolver; class ArchSpec; class Architecture; @@ -308,6 +310,7 @@ template class StreamBuffer; namespace lldb { typedef std::shared_ptr ABISP; +typedef std::unique_ptr AddressRangeUP; typedef std::shared_ptr BatonSP; typedef std::shared_ptr BlockSP; typedef std::shared_ptr BreakpointSP; diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index e8228afe103f9c1..63971016093151a 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -42,6 +42,8 @@ set_target_properties(lldb-sbapi-dwarf-enums PROPERTIES FOLDER "LLDB/Tablegennin add_lldb_library(liblldb SHARED ${option_framework} SBAddress.cpp + SBAddressRange.cpp + SBAddressRangeList.cpp SBAttachInfo.cpp SBBlock.cpp SBBreakpoint.cpp diff --git a/lldb/source/API/SBAddressRange.cpp b/lldb/source/API/SBAddressRange.cpp new file mode 100644 index 000000000000000..9b1affdade439c6 --- /dev/null +++ b/lldb/source/API/SBAddressRange.cpp @@ -0,0 +1,103 @@ +//===-- SBAddressRange.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/API/SBAddressRange.h" +#include "Utils.h" +#include "lldb/API/SBAddress.h" +#include "lldb/API/SBStream.h" +#include "lldb/API/SBTarget.h" +#include "lldb/Core/AddressRange.h" +#include "lldb/Core/Section.h" +#include "lldb/Utility/Instrumentation.h" +#include "lldb/Utility/Stream.h" +#include +#include + +using namespace lldb; +using namespace lldb_private; + +SBAddressRange::SBAddressRange() + : m_opaque_up(std::make_unique()) { + LLDB_INSTRUMENT_VA(this); +} + +SBAddressRange::SBAddressRange(const SBAddressRange &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + m_opaque_up = clone(rhs.m_opaque_up); +} + +SBAddressRange::SBAddressRange(lldb::SBAddress addr, lldb::addr_t byte_size) + : m_opaque_up(std::make_unique(addr.ref(), byte_size)) { + LLDB_INSTRUMENT_VA(this, addr, byte_size); +} + +SBAddressRange::~SBAddressRange() = default; + +const SBAddressRange &SBAddressRange::operator=(const SBAddressRange &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + if (this != &rhs) + m_opaque_up = clone(rhs.m_opaque_up); + return *this; +} + +bool SBAddressRange::operator==(const SBAddressRange &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + if (!IsValid() || !rhs.IsValid()) + return false; + return m_opaque_up->operator==(*(rhs.m_opaque_up)); +} + +bool SBAddressRange::operator!=(const SBAddressRange &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + return !(*this == rhs); +} + +void SBAddressRange::Clear() { + LLDB_INSTRUMENT_VA(this); + + m_opaque_up.reset(); +} + +bool SBAddressRange::IsValid() const { + LLDB_INSTRUMENT_VA(this); + + return m_opaque_up && m_opaque_up->IsValid(); +} + +lldb::SBAddress SBAddressRange::GetBaseAddress() const { + LLDB_INSTRUMENT_VA(this); + + if (!IsValid()) + return lldb::SBAddress(); + return lldb::SBAddress(m_opaque_up->GetBaseAddress()); +} + +lldb::addr_t SBAddressRange::GetByteSize() const { + LLDB_INSTRUMENT_VA(this); + + if (!IsValid()) + return 0; + return m_opaque_up->GetByteSize(); +} + +bool SBAddressRange::GetDescription(SBStream &description, + const SBTarget target) { + LLDB_INSTRUMENT_VA(this, description, target); + + Stream &stream = description.ref(); + if (!IsValid()) { + stream << ""; + return true; + } + m_opaque_up->GetDescription(&stream, target.GetSP().get()); + return true; +} diff --git a/lldb/source/API/SBAddressRangeList.cpp b/lldb/source/API/SBAddressRangeList.cpp new file mode 100644 index 000000000000000..20660b3ff208829 --- /dev/null +++ b/lldb/source/API/SBAddressRangeList.cpp @@ -0,0 +1,94 @@ +//===-- SBAddressRangeList.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/API/SBAddressRangeList.h" +#include "Utils.h" +#include "lldb/API/SBAddressRange.h" +#include "lldb/API/SBStream.h" +#include "lldb/API/SBTarget.h" +#include "lldb/Core/AddressRangeListImpl.h" +#include "lldb/Utility/Instrumentation.h" +#include "lldb/Utility/Stream.h" + +#include + +using namespace lldb; +using namespace lldb_private; + +SBAddressRangeList::SBAddressRangeList() + : m_opaque_up(std::make_unique()) { + LLDB_INSTRUMENT_VA(this); +} + +SBAddressRangeList::SBAddressRangeList(const SBAddressRangeList &rhs) + : m_opaque_up(std::make_unique(*rhs.m_opaque_up)) { + LLDB_INSTRUMENT_VA(this, rhs); +} + +SBAddressRangeList::~SBAddressRangeList() = default; + +const SBAddressRangeList & +SBAddressRangeList::operator=(const SBAddressRangeList &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + if (this != &rhs) + *m_opaque_up = *rhs.m_opaque_up; + return *this; +} + +uint32_t SBAddressRangeList::GetSize() const { + LLDB_INSTRUMENT_VA(this); + + return m_opaque_up->GetSize(); +} + +SBAddressRange SBAddressRangeList::GetAddressRangeAtIndex(uint64_t idx) { + LLDB_INSTRUMENT_VA(this, idx); + + SBAddressRange sb_addr_range; + (*sb_addr_range.m_opaque_up) = m_opaque_up->GetAddressRangeAtIndex(idx); + return sb_addr_range; +} + +void SBAddressRangeList::Clear() { + LLDB_INSTRUMENT_VA(this); + + m_opaque_up->Clear(); +} + +void SBAddressRangeList::Append(const SBAddressRange &sb_addr_range) { + LLDB_INSTRUMENT_VA(this, sb_addr_range); + + m_opaque_up->Append(*sb_addr_range.m_opaque_up); +} + +void SBAddressRangeList::Append(const SBAddressRangeList &sb_addr_range_list) { + LLDB_INSTRUMENT_VA(this, sb_addr_range_list); + + m_opaque_up->Append(*sb_addr_range_list.m_opaque_up); +} + +bool SBAddressRangeList::GetDescription(SBStream &description, + const SBTarget &target) { + LLDB_INSTRUMENT_VA(this, description, target); + + const uint32_t num_ranges = GetSize(); + bool is_first = true; + Stream &stream = description.ref(); + stream << "["; + for (uint32_t i = 0; i < num_ranges; ++i) { + if (is_first) { + is_first = false; + } else { + stream.Printf(", "); + } + GetAddressRangeAtIndex(i).GetDescription(description, target); + } + stream << "]"; + return true; +} diff --git a/lldb/source/API/SBBlock.cpp b/lldb/source/API/SBBlock.cpp index 7d7565340836b1d..2577b14920f0652 100644 --- a/lldb/source/API/SBBlock.cpp +++ b/lldb/source/API/SBBlock.cpp @@ -13,6 +13,7 @@ #include "lldb/API/SBStream.h" #include "lldb/API/SBValue.h" #include "lldb/Core/AddressRange.h" +#include "lldb/Core/AddressRangeListImpl.h" #include "lldb/Core/ValueObjectVariable.h" #include "lldb/Symbol/Block.h" #include "lldb/Symbol/Function.h" @@ -219,6 +220,15 @@ lldb::SBAddress SBBlock::GetRangeEndAddress(uint32_t idx) { return sb_addr; } +lldb::SBAddressRangeList SBBlock::GetRanges() { + LLDB_INSTRUMENT_VA(this); + + lldb::SBAddressRangeList sb_ranges; + if (m_opaque_ptr) + sb_ranges.m_opaque_up->ref() = m_opaque_ptr->GetRanges(); + return sb_ranges; +} + uint32_t SBBlock::GetRangeIndexForBlockAddress(lldb::SBAddress block_addr) { LLDB_INSTRUMENT_VA(this, block_addr); diff --git a/lldb/source/API/SBFunction.cpp b/lldb/source/API/SBFunction.cpp index a01c7f79bbd31fd..6a97352fc2c2fdb 100644 --- a/lldb/source/API/SBFunction.cpp +++ b/lldb/source/API/SBFunction.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBFunction.h" +#include "lldb/API/SBAddressRange.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBStream.h" #include "lldb/Core/Disassembler.h" @@ -160,6 +161,19 @@ SBAddress SBFunction::GetEndAddress() { return addr; } +lldb::SBAddressRangeList SBFunction::GetRanges() { + LLDB_INSTRUMENT_VA(this); + + lldb::SBAddressRangeList ranges; + if (m_opaque_ptr) { + lldb::SBAddressRange range; + (*range.m_opaque_up) = m_opaque_ptr->GetAddressRange(); + ranges.Append(std::move(range)); + } + + return ranges; +} + const char *SBFunction::GetArgumentName(uint32_t arg_idx) { LLDB_INSTRUMENT_VA(this, arg_idx); diff --git a/lldb/source/Core/AddressRange.cpp b/lldb/source/Core/AddressRange.cpp index 1830f2ccd47fece..6cef7e149cd20bd 100644 --- a/lldb/source/Core/AddressRange.cpp +++ b/lldb/source/Core/AddressRange.cpp @@ -14,6 +14,7 @@ #include "lldb/Utility/FileSpec.h" #include "lldb/Utility/Stream.h" #include "lldb/lldb-defines.h" +#include "lldb/lldb-types.h" #include "llvm/Support/Compiler.h" @@ -145,6 +146,10 @@ void AddressRange::Clear() { m_byte_size = 0; } +bool AddressRange::IsValid() const { + return m_base_addr.IsValid() && (m_byte_size > 0); +} + bool AddressRange::Dump(Stream *s, Target *target, Address::DumpStyle style, Address::DumpStyle fallback_style) const { addr_t vmaddr = LLDB_INVALID_ADDRESS; @@ -203,3 +208,41 @@ void AddressRange::DumpDebug(Stream *s) const { static_cast(m_base_addr.GetSection().get()), m_base_addr.GetOffset(), GetByteSize()); } + +bool AddressRange::GetDescription(Stream *s, Target *target) const { + addr_t start_addr = m_base_addr.GetLoadAddress(target); + if (start_addr != LLDB_INVALID_ADDRESS) { + // We have a valid target and the address was resolved, or we have a base + // address with no section. Just print out a raw address range: [, + // ) + s->Printf("[0x%" PRIx64 "-0x%" PRIx64 ")", start_addr, + start_addr + GetByteSize()); + return true; + } + + // Either no target or the address wasn't resolved, print as + // [-) + const char *file_name = ""; + const auto section_sp = m_base_addr.GetSection(); + if (section_sp) { + if (const auto object_file = section_sp->GetObjectFile()) + file_name = object_file->GetFileSpec().GetFilename().AsCString(); + } + start_addr = m_base_addr.GetFileAddress(); + const addr_t end_addr = (start_addr == LLDB_INVALID_ADDRESS) + ? LLDB_INVALID_ADDRESS + : start_addr + GetByteSize(); + s->Printf("%s[0x%" PRIx64 "-0x%" PRIx64 ")", file_name, start_addr, end_addr); + return true; +} + +bool AddressRange::operator==(const AddressRange &rhs) { + if (!IsValid() || !rhs.IsValid()) + return false; + return m_base_addr == rhs.GetBaseAddress() && + m_byte_size == rhs.GetByteSize(); +} + +bool AddressRange::operator!=(const AddressRange &rhs) { + return !(*this == rhs); +} diff --git a/lldb/source/Core/AddressRangeListImpl.cpp b/lldb/source/Core/AddressRangeListImpl.cpp new file mode 100644 index 000000000000000..d405cf0fa3ec353 --- /dev/null +++ b/lldb/source/Core/AddressRangeListImpl.cpp @@ -0,0 +1,50 @@ +//===-- AddressRangeListImpl.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Core/AddressRangeListImpl.h" + +using namespace lldb; +using namespace lldb_private; + +AddressRangeListImpl::AddressRangeListImpl() : m_ranges() {} + +AddressRangeListImpl & +AddressRangeListImpl::operator=(const AddressRangeListImpl &rhs) { + if (this == &rhs) + return *this; + m_ranges = rhs.m_ranges; + return *this; +} + +size_t AddressRangeListImpl::GetSize() const { return m_ranges.size(); } + +void AddressRangeListImpl::Reserve(size_t capacity) { + m_ranges.reserve(capacity); +} + +void AddressRangeListImpl::Append(const AddressRange &sb_region) { + m_ranges.emplace_back(sb_region); +} + +void AddressRangeListImpl::Append(const AddressRangeListImpl &list) { + Reserve(GetSize() + list.GetSize()); + + for (const auto &range : list.m_ranges) + Append(range); +} + +void AddressRangeListImpl::Clear() { m_ranges.clear(); } + +lldb_private::AddressRange +AddressRangeListImpl::GetAddressRangeAtIndex(size_t index) { + if (index >= GetSize()) + return AddressRange(); + return m_ranges[index]; +} + +AddressRanges &AddressRangeListImpl::ref() { return m_ranges; } diff --git a/lldb/source/Core/CMakeLists.txt b/lldb/source/Core/CMakeLists.txt index f24dbbd45a8e8cb..dbc620b91b1ed13 100644 --- a/lldb/source/Core/CMakeLists.txt +++ b/lldb/source/Core/CMakeLists.txt @@ -20,6 +20,7 @@ endif() add_lldb_library(lldbCore Address.cpp AddressRange.cpp + AddressRangeListImpl.cpp AddressResolver.cpp AddressResolverFileLine.cpp Communication.cpp diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp index ca582cb1d5a46f4..ddaa7a8a597b4ff 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp @@ -13,6 +13,8 @@ #include "lldb/Symbol/CompilerType.h" #include "lldb/Target/Process.h" #include "lldb/Target/Target.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" #include "lldb/Utility/StringLexer.h" #include "clang/Basic/TargetInfo.h" @@ -234,12 +236,15 @@ clang::QualType AppleObjCTypeEncodingParser::BuildObjCObjectPointerType( auto types = decl_vendor->FindTypes(ConstString(name), /*max_matches*/ 1); - // The user can forward-declare something that has no definition. The runtime - // doesn't prohibit this at all. This is a rare and very weird case. We keep - // this assert in debug builds so we catch other weird cases. - lldbassert(!types.empty()); - if (types.empty()) + if (types.empty()) { + // The user can forward-declare something that has no definition. The + // runtime doesn't prohibit this at all. This is a rare and very weird + // case. Assert assert in debug builds so we catch other weird cases. + assert(false && "forward declaration without definition"); + LLDB_LOG(GetLog(LLDBLog::Types), + "forward declaration without definition: {0}", name); return ast_ctx.getObjCIdType(); + } return ClangUtil::GetQualType(types.front().GetPointerType()); } else { diff --git a/lldb/source/Symbol/Block.cpp b/lldb/source/Symbol/Block.cpp index 6eeabe0ff5e4d01..f7d9c0d2d330656 100644 --- a/lldb/source/Symbol/Block.cpp +++ b/lldb/source/Symbol/Block.cpp @@ -314,6 +314,22 @@ bool Block::GetRangeAtIndex(uint32_t range_idx, AddressRange &range) { return false; } +AddressRanges Block::GetRanges() { + AddressRanges ranges; + Function *function = CalculateSymbolContextFunction(); + if (!function) + return ranges; + for (size_t i = 0, e = m_ranges.GetSize(); i < e; ++i) { + ranges.emplace_back(); + auto &range = ranges.back(); + const Range &vm_range = m_ranges.GetEntryRef(i); + range.GetBaseAddress() = function->GetAddressRange().GetBaseAddress(); + range.GetBaseAddress().Slide(vm_range.GetRangeBase()); + range.SetByteSize(vm_range.GetByteSize()); + } + return ranges; +} + bool Block::GetStartAddress(Address &addr) { if (m_ranges.IsEmpty()) return false; diff --git a/lldb/test/API/python_api/address_range/Makefile b/lldb/test/API/python_api/address_range/Makefile new file mode 100644 index 000000000000000..99998b20bcb0502 --- /dev/null +++ b/lldb/test/API/python_api/address_range/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/python_api/address_range/TestAddressRange.py b/lldb/test/API/python_api/address_range/TestAddressRange.py new file mode 100644 index 000000000000000..8c27558af4752d4 --- /dev/null +++ b/lldb/test/API/python_api/address_range/TestAddressRange.py @@ -0,0 +1,256 @@ +""" +Test SBAddressRange APIs. +""" + +import lldb +from lldbsuite.test.lldbtest import * + + +class AddressRangeTestCase(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def setUp(self): + TestBase.setUp(self) + + self.build() + exe = self.getBuildArtifact("a.out") + + self.dbg.SetAsync(True) + + self.target = self.dbg.CreateTarget(exe) + self.assertTrue(self.target, VALID_TARGET) + self.launch_info = self.target.GetLaunchInfo() + self.launch_info.SetWorkingDirectory(self.get_process_working_directory()) + + self.bp1 = self.target.BreakpointCreateByName("main", "a.out") + self.bp2 = self.target.BreakpointCreateByName("foo", "a.out") + self.bp3 = self.target.BreakpointCreateByName("bar", "a.out") + + self.assertTrue(self.bp1.IsValid()) + self.assertTrue(self.bp2.IsValid()) + self.assertTrue(self.bp3.IsValid()) + + self.addr1 = self.bp1.GetLocationAtIndex(0).GetAddress() + self.addr2 = self.bp2.GetLocationAtIndex(0).GetAddress() + self.addr3 = self.bp3.GetLocationAtIndex(0).GetAddress() + + self.assertTrue(self.addr1.IsValid()) + self.assertTrue(self.addr2.IsValid()) + self.assertTrue(self.addr3.IsValid()) + + def test_address_range_default(self): + """Testing default constructor.""" + empty_range = lldb.SBAddressRange() + self.assertEqual(empty_range.IsValid(), False) + + def test_address_range_construction(self): + """Make sure the construction and getters work.""" + range = lldb.SBAddressRange(self.addr1, 8) + self.assertEqual(range.IsValid(), True) + self.assertEqual(range.GetBaseAddress(), self.addr1) + self.assertEqual(range.GetByteSize(), 8) + + def test_address_range_clear(self): + """Make sure the clear method works.""" + range = lldb.SBAddressRange(self.addr1, 8) + self.assertEqual(range.IsValid(), True) + self.assertEqual(range.GetBaseAddress(), self.addr1) + self.assertEqual(range.GetByteSize(), 8) + + range.Clear() + self.assertEqual(range.IsValid(), False) + + def test_function(self): + """Make sure the range works in SBFunction APIs.""" + + # Setup breakpoints in main + loc = self.bp1.GetLocationAtIndex(0) + loc_addr = loc.GetAddress() + func = loc_addr.GetFunction() + ranges = func.GetRanges() + self.assertEqual(ranges.GetSize(), 1) + + range = ranges.GetAddressRangeAtIndex(0) + self.assertEqual( + range.GetByteSize(), + func.GetEndAddress().GetOffset() - func.GetStartAddress().GetOffset(), + ) + self.assertEqual( + range.GetBaseAddress().GetOffset(), + func.GetStartAddress().GetOffset(), + ) + + def test_block(self): + """Make sure the range works in SBBlock APIs.""" + loc = self.bp1.GetLocationAtIndex(0) + loc_addr = loc.GetAddress() + block = loc_addr.GetBlock() + + ranges = block.GetRanges() + self.assertEqual(ranges.GetSize(), 1) + + range = ranges.GetAddressRangeAtIndex(0) + self.assertEqual( + range.GetByteSize(), + block.GetRangeEndAddress(0).GetOffset() + - block.GetRangeStartAddress(0).GetOffset(), + ) + self.assertEqual( + range.GetBaseAddress().GetOffset(), + block.GetRangeStartAddress(0).GetOffset(), + ) + + def test_address_range_list(self): + """Make sure the SBAddressRangeList works by adding and getting ranges.""" + range1 = lldb.SBAddressRange(self.addr1, 8) + range2 = lldb.SBAddressRange(self.addr2, 16) + range3 = lldb.SBAddressRange(self.addr3, 32) + + range_list = lldb.SBAddressRangeList() + self.assertEqual(range_list.GetSize(), 0) + + range_list.Append(range1) + range_list.Append(range2) + range_list.Append(range3) + self.assertEqual(range_list.GetSize(), 3) + self.assertRaises(IndexError, lambda: range_list[3]) + + range1_copy = range_list.GetAddressRangeAtIndex(0) + self.assertEqual(range1.GetByteSize(), range1_copy.GetByteSize()) + self.assertEqual( + range1.GetBaseAddress().GetOffset(), + range1_copy.GetBaseAddress().GetOffset(), + ) + + range2_copy = range_list.GetAddressRangeAtIndex(1) + self.assertEqual(range2.GetByteSize(), range2_copy.GetByteSize()) + self.assertEqual( + range2.GetBaseAddress().GetOffset(), + range2_copy.GetBaseAddress().GetOffset(), + ) + + range3_copy = range_list.GetAddressRangeAtIndex(2) + self.assertEqual(range3.GetByteSize(), range3_copy.GetByteSize()) + self.assertEqual( + range3.GetBaseAddress().GetOffset(), + range3_copy.GetBaseAddress().GetOffset(), + ) + + range_list.Clear() + self.assertEqual(range_list.GetSize(), 0) + + def test_address_range_list_len(self): + """Make sure the len() operator works.""" + range = lldb.SBAddressRange(self.addr1, 8) + + range_list = lldb.SBAddressRangeList() + self.assertEqual(len(range_list), 0) + + range_list.Append(range) + self.assertEqual(len(range_list), 1) + + def test_address_range_list_iterator(self): + """Make sure the SBAddressRangeList iterator works.""" + range1 = lldb.SBAddressRange(self.addr1, 8) + range2 = lldb.SBAddressRange(self.addr2, 16) + range3 = lldb.SBAddressRange(self.addr3, 32) + + range_list = lldb.SBAddressRangeList() + range_list.Append(range1) + range_list.Append(range2) + range_list.Append(range3) + self.assertEqual(range_list.GetSize(), 3) + + # Test the iterator + for range in range_list: + self.assertTrue(range.IsValid()) + + def test_address_range_print_invalid(self): + """Make sure the SBAddressRange can be printed when invalid.""" + range = lldb.SBAddressRange() + self.assertEqual(str(range), "") + + def test_address_range_print_resolved(self): + """Make sure the SBAddressRange can be printed when resolved.""" + lldb.target = self.target + error = lldb.SBError() + process = self.target.Launch(self.launch_info, error) + self.assertTrue(error.Success(), "Make sure process launched successfully") + self.assertTrue(process, PROCESS_IS_VALID) + self.assertState(process.GetState(), lldb.eStateStopped, PROCESS_STOPPED) + + loc = self.bp1.GetLocationAtIndex(0) + loc_addr = loc.GetAddress() + func = loc_addr.GetFunction() + range = func.GetRanges().GetAddressRangeAtIndex(0) + range_str = str(range) + # [0x1000-0x2000] // Resolved with target or addresses without sections + self.assertRegex(range_str, "^\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") + process.Kill() + + def test_address_range_print_no_section_resolved(self): + """Make sure the SBAddressRange can be printed with no secion.""" + lldb.target = self.target + error = lldb.SBError() + process = self.target.Launch(self.launch_info, error) + self.assertTrue(error.Success(), "Make sure process launched successfully") + self.assertTrue(process, PROCESS_IS_VALID) + self.assertState(process.GetState(), lldb.eStateStopped, PROCESS_STOPPED) + + loc = self.bp1.GetLocationAtIndex(0) + loc_addr = loc.GetAddress() + func = loc_addr.GetFunction() + range = func.GetRanges().GetAddressRangeAtIndex(0) + + addr = lldb.SBAddress() + addr.SetAddress(lldb.SBSection(), range.GetBaseAddress().GetOffset()) + self.assertFalse(addr.GetSection().IsValid()) + range = lldb.SBAddressRange(addr, range.GetByteSize()) + + range_str = str(range) + # [0x1000-0x2000] // Resolved with target or addresses without sections + self.assertRegex(range_str, "^\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") + process.Kill() + + def test_address_range_print_not_resolved(self): + """Make sure the SBAddressRange can be printed when not resolved.""" + range = lldb.SBAddressRange(self.addr1, 8) + range_str = str(range) + # a.out[0x1000-0x2000] // Without target + self.assertRegex(range_str, "^a.out\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") + + def test_address_range_list_print(self): + """Make sure the SBAddressRangeList can be printed.""" + range1 = lldb.SBAddressRange(self.addr1, 8) + range2 = lldb.SBAddressRange(self.addr2, 16) + range3 = lldb.SBAddressRange(self.addr3, 32) + self.dbg.SetAsync(True) + + range_list = lldb.SBAddressRangeList() + self.assertEqual(range_list.GetSize(), 0) + + range_list.Append(range1) + range_list.Append(range2) + range_list.Append(range3) + self.assertEqual(range_list.GetSize(), 3) + + range_list_str = str(range_list) + self.assertTrue(range_list_str.startswith("[")) + self.assertGreater(range_list_str.count(","), 1) + self.assertTrue(range_list_str.endswith("]")) + + def test_address_range_list_indexing(self): + """Make sure the SBAddressRangeList can be printed.""" + range1 = lldb.SBAddressRange(self.addr1, 8) + range2 = lldb.SBAddressRange(self.addr2, 16) + range_list = lldb.SBAddressRangeList() + range_list.Append(range1) + range_list.Append(range2) + + self.assertEqual(range_list.GetSize(), 2) + self.assertRaises(IndexError, lambda: range_list[2]) + self.assertRaises(TypeError, lambda: range_list["0"]) + self.assertEqual(range_list[0], range1) + self.assertEqual(range_list[1], range2) + self.assertEqual(range_list[-1], range2) + self.assertEqual(range_list[-2], range1) diff --git a/lldb/test/API/python_api/address_range/main.cpp b/lldb/test/API/python_api/address_range/main.cpp new file mode 100644 index 000000000000000..b6eaec4a23699b4 --- /dev/null +++ b/lldb/test/API/python_api/address_range/main.cpp @@ -0,0 +1,8 @@ +void foo() {} +void bar() {} + +int main() { + foo(); + bar(); + return 0; +} diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 5828cc156cc785e..1d016b28347d273 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1761,11 +1761,6 @@ class ScalarEvolution { ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, bool AllowPredicates = false); - /// Return a symbolic upper bound for the backedge taken count of the loop. - /// This is more general than getConstantMaxBackedgeTakenCount as it returns - /// an arbitrary expression as opposed to only constants. - const SCEV *computeSymbolicMaxBackedgeTakenCount(const Loop *L); - // Helper functions for computeExitLimitFromCond to avoid exponential time // complexity. diff --git a/llvm/include/llvm/Transforms/Scalar/Reassociate.h b/llvm/include/llvm/Transforms/Scalar/Reassociate.h index f3a2e0f4380eb02..84d72df6fc4d81b 100644 --- a/llvm/include/llvm/Transforms/Scalar/Reassociate.h +++ b/llvm/include/llvm/Transforms/Scalar/Reassociate.h @@ -63,6 +63,16 @@ struct Factor { Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {} }; +struct OverflowTracking { + bool HasNUW; + bool HasNSW; + bool AllKnownNonNegative; + // Note: AllKnownNonNegative can be true in a case where one of the operands + // is negative, but one the operators is not NSW. AllKnownNonNegative should + // not be used independently of HasNSW + OverflowTracking() : HasNUW(true), HasNSW(true), AllKnownNonNegative(true) {} +}; + class XorOpnd; } // end namespace reassociate @@ -103,7 +113,7 @@ class ReassociatePass : public PassInfoMixin { void ReassociateExpression(BinaryOperator *I); void RewriteExprTree(BinaryOperator *I, SmallVectorImpl &Ops, - bool HasNUW); + reassociate::OverflowTracking Flags); Value *OptimizeExpression(BinaryOperator *I, SmallVectorImpl &Ops); Value *OptimizeAdd(Instruction *I, diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index bc8b9b8479e4ff2..bd4c2a35ebf2cb0 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1983,20 +1983,25 @@ getDependenceDistanceStrideAndSize( return MemoryDepChecker::Dependence::IndirectUnsafe; // Check if we can prove that Sink only accesses memory after Src's end or - // vice versa. - const auto &[SrcStart, SrcEnd] = - getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE); - const auto &[SinkStart, SinkEnd] = - getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE); - - if (!isa(SrcStart) && - !isa(SrcEnd) && - !isa(SinkStart) && - !isa(SinkEnd)) { - if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart)) - return MemoryDepChecker::Dependence::NoDep; - if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart)) - return MemoryDepChecker::Dependence::NoDep; + // vice versa. At the moment this is limited to cases where either source or + // sink are loop invariant to avoid compile-time increases. This is not + // required for correctness. + if (SE.isLoopInvariant(Src, InnermostLoop) || + SE.isLoopInvariant(Sink, InnermostLoop)) { + const auto &[SrcStart, SrcEnd] = + getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE); + const auto &[SinkStart, SinkEnd] = + getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE); + + if (!isa(SrcStart) && + !isa(SrcEnd) && + !isa(SinkStart) && + !isa(SinkEnd)) { + if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart)) + return MemoryDepChecker::Dependence::NoDep; + if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart)) + return MemoryDepChecker::Dependence::NoDep; + } } // Need accesses with constant strides and the same direction. We don't want diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 8d971e6a78e420f..bb56b41fe15d583 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8647,8 +8647,28 @@ ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const { const SCEV * ScalarEvolution::BackedgeTakenInfo::getSymbolicMax(const Loop *L, ScalarEvolution *SE) { - if (!SymbolicMax) - SymbolicMax = SE->computeSymbolicMaxBackedgeTakenCount(L); + if (!SymbolicMax) { + // Form an expression for the maximum exit count possible for this loop. We + // merge the max and exact information to approximate a version of + // getConstantMaxBackedgeTakenCount which isn't restricted to just + // constants. + SmallVector ExitCounts; + + for (const auto &ENT : ExitNotTaken) { + const SCEV *ExitCount = ENT.SymbolicMaxNotTaken; + if (!isa(ExitCount)) { + assert(SE->DT.dominates(ENT.ExitingBlock, L->getLoopLatch()) && + "We should only have known counts for exiting blocks that " + "dominate latch!"); + ExitCounts.push_back(ExitCount); + } + } + if (ExitCounts.empty()) + SymbolicMax = SE->getCouldNotCompute(); + else + SymbolicMax = + SE->getUMinFromMismatchedTypes(ExitCounts, /*Sequential*/ true); + } return SymbolicMax; } @@ -14964,30 +14984,6 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, return false; } -const SCEV * -ScalarEvolution::computeSymbolicMaxBackedgeTakenCount(const Loop *L) { - SmallVector ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - - // Form an expression for the maximum exit count possible for this loop. We - // merge the max and exact information to approximate a version of - // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. - SmallVector ExitCounts; - for (BasicBlock *ExitingBB : ExitingBlocks) { - const SCEV *ExitCount = - getExitCount(L, ExitingBB, ScalarEvolution::SymbolicMaximum); - if (!isa(ExitCount)) { - assert(DT.dominates(ExitingBB, L->getLoopLatch()) && - "We should only have known counts for exiting blocks that " - "dominate latch!"); - ExitCounts.push_back(ExitCount); - } - } - if (ExitCounts.empty()) - return getCouldNotCompute(); - return getUMinFromMismatchedTypes(ExitCounts, /*Sequential*/ true); -} - /// A rewriter to replace SCEV expressions in Map with the corresponding entry /// in the map. It skips AddRecExpr because we cannot guarantee that the /// replacement is loop invariant in the loop of the AddRec. diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c04f7208c61f2a8..d8b0f52ecf9e32e 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3972,7 +3972,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { // target can override this with custom lowering and calling the // implementation functions. LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (LI.isLegalOrCustom({G_UMIN, Ty}) && LI.isLegalOrCustom({G_UMAX, Ty})) + if (LI.isLegalOrCustom({G_UMIN, Ty})) return lowerAddSubSatToMinMax(MI); return lowerAddSubSatToAddoSubo(MI); } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 93d866384b48291..2f4fdf5208d0761 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11186,17 +11186,19 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { return SDValue(); } -// FIXME: This should be checking for no signed zeros on individual operands, as -// well as no nans. static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, - SDValue RHS, + SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI) { - const TargetOptions &Options = DAG.getTarget().Options; EVT VT = LHS.getValueType(); + if (!VT.isFloatingPoint()) + return false; + + const TargetOptions &Options = DAG.getTarget().Options; - return Options.NoSignedZerosFPMath && VT.isFloatingPoint() && + return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) && TLI.isProfitableToCombineMinNumMaxNum(VT) && - DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS); + (Flags.hasNoNaNs() || + (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS))); } static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, @@ -11674,7 +11676,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // select (fcmp gt x, y), x, y -> fmaxnum x, y // // This is OK if we don't care what happens if either operand is a NaN. - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI)) + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI)) if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC)) return FMinMax; @@ -12267,7 +12269,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // This is OK if we don't care about what happens if either operand is a // NaN. // - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { + if (N0.hasOneUse() && + isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) { if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC)) return FMinMax; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index fc96ecdc6628089..fb1424f75e097d3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2488,6 +2488,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMINIMUM: case ISD::FMAXNUM: case ISD::FMINNUM: + case ISD::FMAXNUM_IEEE: + case ISD::FMINNUM_IEEE: case ISD::FMUL: case ISD::FPOW: case ISD::FREM: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 40e621f0db22099..14e8708fd3f38f2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4237,8 +4237,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::SHL: case ISD::VP_SHL: case ISD::SRA: case ISD::VP_SRA: case ISD::SRL: case ISD::VP_SRL: - case ISD::FMINNUM: case ISD::VP_FMINNUM: - case ISD::FMAXNUM: case ISD::VP_FMAXNUM: + case ISD::FMINNUM: + case ISD::FMINNUM_IEEE: + case ISD::VP_FMINNUM: + case ISD::FMAXNUM: + case ISD::FMAXNUM_IEEE: + case ISD::VP_FMAXNUM: case ISD::FMINIMUM: case ISD::VP_FMINIMUM: case ISD::FMAXIMUM: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index de342e89657367b..68126992ddcd726 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -388,15 +388,29 @@ using WebAssembly::WasmEnableEmSjLj; using WebAssembly::WasmEnableSjLj; static void basicCheckForEHAndSjLj(TargetMachine *TM) { - // Before checking, we make sure TargetOptions.ExceptionModel is the same as + + // You can't enable two modes of EH at the same time + if (WasmEnableEmEH && WasmEnableEH) + report_fatal_error( + "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-eh"); + // You can't enable two modes of SjLj at the same time + if (WasmEnableEmSjLj && WasmEnableSjLj) + report_fatal_error( + "-enable-emscripten-sjlj not allowed with -wasm-enable-sjlj"); + // You can't mix Emscripten EH with Wasm SjLj. + if (WasmEnableEmEH && WasmEnableSjLj) + report_fatal_error( + "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-sjlj"); + + // Here we make sure TargetOptions.ExceptionModel is the same as // MCAsmInfo.ExceptionsType. Normally these have to be the same, because clang // stores the exception model info in LangOptions, which is later transferred // to TargetOptions and MCAsmInfo. But when clang compiles bitcode directly, // clang's LangOptions is not used and thus the exception model info is not // correctly transferred to TargetOptions and MCAsmInfo, so we make sure we - // have the correct exception model in WebAssemblyMCAsmInfo constructor. - // But in this case TargetOptions is still not updated, so we make sure they - // are the same. + // have the correct exception model in WebAssemblyMCAsmInfo constructor. But + // in this case TargetOptions is still not updated, so we make sure they are + // the same. TM->Options.ExceptionModel = TM->getMCAsmInfo()->getExceptionHandlingType(); // Basic Correctness checking related to -exception-model @@ -418,18 +432,6 @@ static void basicCheckForEHAndSjLj(TargetMachine *TM) { "-exception-model=wasm only allowed with at least one of " "-wasm-enable-eh or -wasm-enable-sjlj"); - // You can't enable two modes of EH at the same time - if (WasmEnableEmEH && WasmEnableEH) - report_fatal_error( - "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-eh"); - // You can't enable two modes of SjLj at the same time - if (WasmEnableEmSjLj && WasmEnableSjLj) - report_fatal_error( - "-enable-emscripten-sjlj not allowed with -wasm-enable-sjlj"); - // You can't mix Emscripten EH with Wasm SjLj. - if (WasmEnableEmEH && WasmEnableSjLj) - report_fatal_error( - "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-sjlj"); // Currently it is allowed to mix Wasm EH with Emscripten SjLj as an interim // measure, but some code will error out at compile time in this combination. // See WebAssemblyLowerEmscriptenEHSjLj pass for details. diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index c7e25c9f3d2c92e..3fe5478408d4572 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -22,8 +22,6 @@ // // Future loop memory idioms to recognize: // memcmp, strlen, etc. -// Future floating point idioms to recognize in -ffast-math mode: -// fpowi // // This could recognize common matrix multiplies and dot product idioms and // replace them with calls to BLAS (if linked in??). @@ -1107,7 +1105,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( GV->setAlignment(Align(16)); Value *PatternPtr = GV; NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); - + // Set the TBAA info if present. if (AATags.TBAA) NewCall->setMetadata(LLVMContext::MD_tbaa, AATags.TBAA); @@ -1117,7 +1115,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( if (AATags.NoAlias) NewCall->setMetadata(LLVMContext::MD_noalias, AATags.NoAlias); - } + } NewCall->setDebugLoc(TheStore->getDebugLoc()); diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index d91320863e241df..c903e47a93cafd7 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -471,7 +471,7 @@ using RepeatedValue = std::pair; static bool LinearizeExprTree(Instruction *I, SmallVectorImpl &Ops, ReassociatePass::OrderedSet &ToRedo, - bool &HasNUW) { + reassociate::OverflowTracking &Flags) { assert((isa(I) || isa(I)) && "Expected a UnaryOperator or BinaryOperator!"); LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); @@ -512,6 +512,7 @@ static bool LinearizeExprTree(Instruction *I, using LeafMap = DenseMap; LeafMap Leaves; // Leaf -> Total weight so far. SmallVector LeafOrder; // Ensure deterministic leaf output order. + const DataLayout DL = I->getModule()->getDataLayout(); #ifndef NDEBUG SmallPtrSet Visited; // For checking the iteration scheme. @@ -520,8 +521,10 @@ static bool LinearizeExprTree(Instruction *I, std::pair P = Worklist.pop_back_val(); I = P.first; // We examine the operands of this binary operator. - if (isa(I)) - HasNUW &= I->hasNoUnsignedWrap(); + if (isa(I)) { + Flags.HasNUW &= I->hasNoUnsignedWrap(); + Flags.HasNSW &= I->hasNoSignedWrap(); + } for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands. Value *Op = I->getOperand(OpIdx); @@ -648,6 +651,8 @@ static bool LinearizeExprTree(Instruction *I, // Ensure the leaf is only output once. It->second = 0; Ops.push_back(std::make_pair(V, Weight)); + if (Opcode == Instruction::Add && Flags.AllKnownNonNegative && Flags.HasNSW) + Flags.AllKnownNonNegative &= isKnownNonNegative(V, SimplifyQuery(DL)); } // For nilpotent operations or addition there may be no operands, for example @@ -666,7 +671,7 @@ static bool LinearizeExprTree(Instruction *I, /// linearized and optimized, emit them in-order. void ReassociatePass::RewriteExprTree(BinaryOperator *I, SmallVectorImpl &Ops, - bool HasNUW) { + OverflowTracking Flags) { assert(Ops.size() > 1 && "Single values should be used directly!"); // Since our optimizations should never increase the number of operations, the @@ -834,8 +839,12 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I, // Note that it doesn't hold for mul if one of the operands is zero. // TODO: We can preserve NUW flag if we prove that all mul operands // are non-zero. - if (HasNUW && ExpressionChangedStart->getOpcode() == Instruction::Add) - ExpressionChangedStart->setHasNoUnsignedWrap(); + if (ExpressionChangedStart->getOpcode() == Instruction::Add) { + if (Flags.HasNUW) + ExpressionChangedStart->setHasNoUnsignedWrap(); + if (Flags.HasNSW && (Flags.AllKnownNonNegative || Flags.HasNUW)) + ExpressionChangedStart->setHasNoSignedWrap(); + } } } @@ -1192,8 +1201,8 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { return nullptr; SmallVector Tree; - bool HasNUW = true; - MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, HasNUW); + OverflowTracking Flags; + MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, Flags); SmallVector Factors; Factors.reserve(Tree.size()); for (unsigned i = 0, e = Tree.size(); i != e; ++i) { @@ -1235,7 +1244,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { if (!FoundFactor) { // Make sure to restore the operands to the expression tree. - RewriteExprTree(BO, Factors, HasNUW); + RewriteExprTree(BO, Factors, Flags); return nullptr; } @@ -1247,7 +1256,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { RedoInsts.insert(BO); V = Factors[0].Op; } else { - RewriteExprTree(BO, Factors, HasNUW); + RewriteExprTree(BO, Factors, Flags); V = BO; } @@ -2373,8 +2382,8 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { // First, walk the expression tree, linearizing the tree, collecting the // operand information. SmallVector Tree; - bool HasNUW = true; - MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, HasNUW); + OverflowTracking Flags; + MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, Flags); SmallVector Ops; Ops.reserve(Tree.size()); for (const RepeatedValue &E : Tree) @@ -2567,7 +2576,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { dbgs() << '\n'); // Now that we ordered and optimized the expressions, splat them back into // the expression tree, removing any unneeded nodes. - RewriteExprTree(I, Ops, HasNUW); + RewriteExprTree(I, Ops, Flags); } void diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll index 809b15b2004952e..81d8b01fe7fb720 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll @@ -130,8 +130,16 @@ define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) { ; CHECK-LABEL: 'neg_dist_dep_type_size_equivalence' ; CHECK-NEXT: loop: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding. +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8 +; CHECK-EMPTY: ; CHECK-NEXT: BackwardVectorizableButPreventsForwarding: ; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 -> ; CHECK-NEXT: store double %val, ptr %gep.iv.101.i64, align 8 diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll index 845ff078ee0eb4d..416742a94e0d36a 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll @@ -45,8 +45,13 @@ exit: define void @different_non_constant_strides_known_backward_distance_larger_than_trip_count(ptr %A) { ; CHECK-LABEL: 'different_non_constant_strides_known_backward_distance_larger_than_trip_count' ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %l = load i32, ptr %gep, align 4 -> +; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: ; CHECK-EMPTY: diff --git a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll new file mode 100644 index 000000000000000..50a3336a7483c74 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll @@ -0,0 +1,1757 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s + +; Test if fcmp+select patterns form min/max instructions when allowed +; by flags. + +; TODO: Merge with fmin_legacy.ll/fmax_legacy.ll + +define float @v_test_fmin_legacy_ule_f32_safe(float %a, float %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f32_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f32_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f32_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmin_legacy_ule_f32_nnan_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val = select nnan i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmin_legacy_ule_f32_nsz_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val = select nsz i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmin_legacy_ule_f32_nnan_nsz_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val = select nnan nsz i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_safe(float %a, float %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_nnan_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge float %a, %b + %val = select nnan i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_nsz_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge float %a, %b + %val = select nsz i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_nnan_nsz_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge float %a, %b + %val = select nnan nsz i1 %cmp, float %a, float %b + ret float %val +} + +define <2 x float> @v_test_fmin_legacy_ule_v2f32_safe(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x float> %a, %b + %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x float> %a, %b + %val = select nnan <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmin_legacy_ule_v2f32_nsz_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x float> %a, %b + %val = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x float> %a, %b + %val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmax_legacy_uge_v2f32_safe(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x float> %a, %b + %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x float> %a, %b + %val = select nnan <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmax_legacy_uge_v2f32_nsz_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x float> %a, %b + %val = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x float> %a, %b + %val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define half @v_test_fmin_legacy_ule_f16_safe(half %a, half %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule half %a, %b + %val = select i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmin_legacy_ule_f16_nnan_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule half %a, %b + %val = select nnan i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmin_legacy_ule_f16_nsz_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule half %a, %b + %val = select nsz i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule half %a, %b + %val = select nnan nsz i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmax_legacy_uge_f16_safe(half %a, half %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge half %a, %b + %val = select i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmax_legacy_uge_f16_nnan_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge half %a, %b + %val = select nnan i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmax_legacy_uge_f16_nsz_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge half %a, %b + %val = select nsz i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge half %a, %b + %val = select nnan nsz i1 %cmp, half %a, half %b + ret half %val +} + +define <2 x half> @v_test_fmin_legacy_ule_v2f16_safe(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x half> %a, %b + %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x half> %a, %b + %val = select nnan <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmin_legacy_ule_v2f16_nsz_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x half> %a, %b + %val = select nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x half> %a, %b + %val = select nnan nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmax_legacy_uge_v2f16_safe(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x half> %a, %b + %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x half> %a, %b + %val = select nnan <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmax_legacy_uge_v2f16_nsz_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x half> %a, %b + %val = select nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x half> %a, %b + %val = select nnan nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <4 x half> @v_test_fmin_legacy_ule_v4f16_safe(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <4 x half> %a, %b + %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <4 x half> %a, %b + %val = select nnan <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmin_legacy_ule_v4f16_nsz_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <4 x half> %a, %b + %val = select nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX9-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <4 x half> %a, %b + %val = select nnan nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmax_legacy_uge_v4f16_safe(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <4 x half> %a, %b + %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <4 x half> %a, %b + %val = select nnan <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmax_legacy_uge_v4f16_nsz_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <4 x half> %a, %b + %val = select nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX9-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <4 x half> %a, %b + %val = select nnan nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define float @v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float %arg1) { +; GFX7-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_f32_e32 v0, v0, v0 +; GFX7-NEXT: v_add_f32_e32 v1, v1, v1 +; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_f32_e32 v0, v0, v0 +; GFX9-NEXT: v_add_f32_e32 v1, v1, v1 +; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %a = fadd nnan float %arg0, %arg0 + %b = fadd nnan float %arg1, %arg1 + %cmp = fcmp ule float %a, %b + %val = select nsz i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float %arg1) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_f32_e32 v0, v0, v0 +; GFX7-NEXT: v_add_f32_e32 v1, v1, v1 +; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_f32_e32 v0, v0, v0 +; GFX9-NEXT: v_add_f32_e32 v1, v1, v1 +; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %a = fadd nnan float %arg0, %arg0 + %b = fadd nnan float %arg1, %arg1 + %cmp = fcmp uge float %a, %b + %val = select nsz i1 %cmp, float %a, float %b + ret float %val +} diff --git a/llvm/test/CodeGen/WebAssembly/eh-option-errors.ll b/llvm/test/CodeGen/WebAssembly/eh-option-errors.ll new file mode 100644 index 000000000000000..74d02ddc405d3f2 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/eh-option-errors.ll @@ -0,0 +1,19 @@ +target triple = "wasm32-unknown-unknown" + +; RUN: not --crash llc < %s -enable-emscripten-cxx-exceptions -wasm-enable-eh 2>&1 | FileCheck %s --check-prefix=EM_EH_W_WASM_EH +; EM_EH_W_WASM_EH: LLVM ERROR: -enable-emscripten-cxx-exceptions not allowed with -wasm-enable-eh + +; RUN: not --crash llc < %s -enable-emscripten-sjlj -wasm-enable-sjlj 2>&1 | FileCheck %s --check-prefix=EM_SJLJ_W_WASM_SJLJ +; EM_SJLJ_W_WASM_SJLJ: LLVM ERROR: -enable-emscripten-sjlj not allowed with -wasm-enable-sjlj + +; RUN: not --crash llc < %s -enable-emscripten-cxx-exceptions -wasm-enable-sjlj 2>&1 | FileCheck %s --check-prefix=EM_EH_W_WASM_SJLJ +; EM_EH_W_WASM_SJLJ: LLVM ERROR: -enable-emscripten-cxx-exceptions not allowed with -wasm-enable-sjlj + +; RUN: not --crash llc < %s -wasm-enable-eh -exception-model=dwarf 2>&1 | FileCheck %s --check-prefix=EH_MODEL_DWARF +; EH_MODEL_DWARF: LLVM ERROR: -exception-model should be either 'none' or 'wasm' + +; RUN: not --crash llc < %s -enable-emscripten-cxx-exceptions -exception-model=wasm 2>&1 | FileCheck %s --check-prefix=EM_EH_W_MODEL_WASM +; EM_EH_W_MODEL_WASM: LLVM ERROR: -exception-model=wasm not allowed with -enable-emscripten-cxx-exceptions + +; RUN: not --crash llc < %s -exception-model=wasm 2>&1 | FileCheck %s --check-prefix=MODEL_WASM_WO_WASM_EH_SJLJ +; MODEL_WASM_WO_WASM_EH_SJLJ: LLVM ERROR: -exception-model=wasm only allowed with at least one of -wasm-enable-eh or -wasm-enable-sjlj diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll b/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll index 4a63c812d6ae9ac..66872a542298625 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll @@ -1,7 +1,6 @@ ; RUN: llc < %s -enable-emscripten-cxx-exceptions | FileCheck %s --check-prefix=EH ; RUN: llc < %s -enable-emscripten-sjlj | FileCheck %s --check-prefix=SJLJ ; RUN: llc < %s | FileCheck %s --check-prefix=NONE -; RUN: not --crash llc < %s -enable-emscripten-cxx-exceptions -exception-model=wasm 2>&1 | FileCheck %s --check-prefix=WASM-EH-EM-EH target triple = "wasm32-unknown-unknown" @@ -97,5 +96,3 @@ declare void @free(ptr) attributes #0 = { returns_twice } attributes #1 = { noreturn } attributes #2 = { nounwind } - -; WASM-EH-EM-EH: LLVM ERROR: -exception-model=wasm not allowed with -enable-emscripten-cxx-exceptions diff --git a/llvm/test/Transforms/Reassociate/local-cse.ll b/llvm/test/Transforms/Reassociate/local-cse.ll index 4d0467e263f5538..d0d609f022b46b8 100644 --- a/llvm/test/Transforms/Reassociate/local-cse.ll +++ b/llvm/test/Transforms/Reassociate/local-cse.ll @@ -26,16 +26,16 @@ define void @chain_spanning_several_blocks(i64 %inv1, i64 %inv2, i64 %inv3, i64 ; LOCAL_CSE-LABEL: define void @chain_spanning_several_blocks ; LOCAL_CSE-SAME: (i64 [[INV1:%.*]], i64 [[INV2:%.*]], i64 [[INV3:%.*]], i64 [[INV4:%.*]], i64 [[INV5:%.*]]) { ; LOCAL_CSE-NEXT: bb1: -; LOCAL_CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw i64 [[INV2]], [[INV1]] +; LOCAL_CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw nsw i64 [[INV2]], [[INV1]] ; LOCAL_CSE-NEXT: br label [[BB2:%.*]] ; LOCAL_CSE: bb2: ; LOCAL_CSE-NEXT: [[VAL_BB2:%.*]] = call i64 @get_val() -; LOCAL_CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV4]] -; LOCAL_CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw i64 [[CHAIN_A1]], [[VAL_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_B1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV5]] -; LOCAL_CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw i64 [[CHAIN_B1]], [[VAL_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_C0:%.*]] = add nuw i64 [[INV3]], [[INV1]] -; LOCAL_CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw i64 [[CHAIN_C0]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV4]] +; LOCAL_CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_B1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV5]] +; LOCAL_CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw nsw i64 [[CHAIN_B1]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_C0:%.*]] = add nuw nsw i64 [[INV3]], [[INV1]] +; LOCAL_CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw nsw i64 [[CHAIN_C0]], [[VAL_BB2]] ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_A2]]) ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_B2]]) ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_C1]]) @@ -47,11 +47,11 @@ define void @chain_spanning_several_blocks(i64 %inv1, i64 %inv2, i64 %inv3, i64 ; CSE-NEXT: br label [[BB2:%.*]] ; CSE: bb2: ; CSE-NEXT: [[VAL_BB2:%.*]] = call i64 @get_val() -; CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw i64 [[VAL_BB2]], [[INV1]] -; CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV2]] +; CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw nsw i64 [[VAL_BB2]], [[INV1]] +; CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV2]] ; CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[INV4]] ; CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[INV5]] -; CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV3]] +; CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV3]] ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_A2]]) ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_B2]]) ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_C1]]) @@ -90,19 +90,19 @@ define void @chain_spanning_several_blocks_no_entry_anchor() { ; LOCAL_CSE-NEXT: br label [[BB1:%.*]] ; LOCAL_CSE: bb1: ; LOCAL_CSE-NEXT: [[INV1_BB1:%.*]] = call i64 @get_val() -; LOCAL_CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw i64 [[INV1_BB1]], [[INV2_BB0]] +; LOCAL_CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw nsw i64 [[INV1_BB1]], [[INV2_BB0]] ; LOCAL_CSE-NEXT: br label [[BB2:%.*]] ; LOCAL_CSE: bb2: ; LOCAL_CSE-NEXT: [[INV3_BB2:%.*]] = call i64 @get_val() ; LOCAL_CSE-NEXT: [[INV4_BB2:%.*]] = call i64 @get_val() ; LOCAL_CSE-NEXT: [[INV5_BB2:%.*]] = call i64 @get_val() ; LOCAL_CSE-NEXT: [[VAL_BB2:%.*]] = call i64 @get_val() -; LOCAL_CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV4_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw i64 [[CHAIN_A1]], [[VAL_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_B1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV5_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw i64 [[CHAIN_B1]], [[VAL_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_C0:%.*]] = add nuw i64 [[VAL_BB2]], [[INV1_BB1]] -; LOCAL_CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw i64 [[CHAIN_C0]], [[INV3_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV4_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_B1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV5_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw nsw i64 [[CHAIN_B1]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_C0:%.*]] = add nuw nsw i64 [[VAL_BB2]], [[INV1_BB1]] +; LOCAL_CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw nsw i64 [[CHAIN_C0]], [[INV3_BB2]] ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_A2]]) ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_B2]]) ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_C1]]) @@ -120,11 +120,11 @@ define void @chain_spanning_several_blocks_no_entry_anchor() { ; CSE-NEXT: [[INV4_BB2:%.*]] = call i64 @get_val() ; CSE-NEXT: [[INV5_BB2:%.*]] = call i64 @get_val() ; CSE-NEXT: [[VAL_BB2:%.*]] = call i64 @get_val() -; CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw i64 [[VAL_BB2]], [[INV1_BB1]] -; CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV2_BB0]] +; CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw nsw i64 [[VAL_BB2]], [[INV1_BB1]] +; CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV2_BB0]] ; CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[INV4_BB2]] ; CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[INV5_BB2]] -; CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV3_BB2]] +; CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV3_BB2]] ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_A2]]) ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_B2]]) ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_C1]]) diff --git a/llvm/test/Transforms/Reassociate/reassoc-add-nsw.ll b/llvm/test/Transforms/Reassociate/reassoc-add-nsw.ll new file mode 100644 index 000000000000000..fcebc4980e6d7dc --- /dev/null +++ b/llvm/test/Transforms/Reassociate/reassoc-add-nsw.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=reassociate -S | FileCheck %s +define i32 @nsw_preserve_nonnegative(ptr %ptr0, ptr %ptr1, ptr %ptr2) { +; CHECK-LABEL: define i32 @nsw_preserve_nonnegative( +; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], ptr [[PTR2:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[PTR0]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[PTR1]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[PTR2]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[ADD0:%.*]] = add nsw i32 [[V1]], [[V0]] +; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD0]], [[V2]] +; CHECK-NEXT: ret i32 [[ADD1]] +; + %v0 = load i32, ptr %ptr0, !range !1 + %v1 = load i32, ptr %ptr1, !range !1 + %v2 = load i32, ptr %ptr2, !range !1 + %add0 = add nsw i32 %v1, %v2 + %add1 = add nsw i32 %add0, %v0 + ret i32 %add1 +} + +define i32 @nsw_preserve_nuw_nsw(ptr %ptr0, ptr %ptr1, ptr %ptr2) { +; CHECK-LABEL: define i32 @nsw_preserve_nuw_nsw( +; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], ptr [[PTR2:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[PTR0]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[PTR1]], align 4 +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[PTR2]], align 4 +; CHECK-NEXT: [[ADD0:%.*]] = add nuw nsw i32 [[V1]], [[V0]] +; CHECK-NEXT: [[ADD1:%.*]] = add nuw nsw i32 [[ADD0]], [[V2]] +; CHECK-NEXT: ret i32 [[ADD1]] +; + %v0 = load i32, ptr %ptr0 + %v1 = load i32, ptr %ptr1 + %v2 = load i32, ptr %ptr2 + %add0 = add nuw nsw i32 %v1, %v2 + %add1 = add nuw nsw i32 %add0, %v0 + ret i32 %add1 +} + +define i32 @nsw_dont_preserve_negative(ptr %ptr0, ptr %ptr1, ptr %ptr2) { +; CHECK-LABEL: define i32 @nsw_dont_preserve_negative( +; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], ptr [[PTR2:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[PTR0]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[PTR1]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[PTR2]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[ADD0:%.*]] = add i32 [[V1]], [[V0]] +; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ADD0]], [[V2]] +; CHECK-NEXT: ret i32 [[ADD1]] +; + %v0 = load i32, ptr %ptr0 + %v1 = load i32, ptr %ptr1, !range !1 + %v2 = load i32, ptr %ptr2, !range !1 + %add0 = add nsw i32 %v1, %v2 + %add1 = add nsw i32 %add0, %v0 + ret i32 %add1 +} + +define i32 @nsw_nopreserve_notallnsw(ptr %ptr0, ptr %ptr1, ptr %ptr2) { +; CHECK-LABEL: define i32 @nsw_nopreserve_notallnsw( +; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], ptr [[PTR2:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[PTR0]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[PTR1]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[PTR2]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[ADD0:%.*]] = add i32 [[V1]], [[V0]] +; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ADD0]], [[V2]] +; CHECK-NEXT: ret i32 [[ADD1]] +; + %v0 = load i32, ptr %ptr0, !range !1 + %v1 = load i32, ptr %ptr1, !range !1 + %v2 = load i32, ptr %ptr2, !range !1 + %add0 = add nsw i32 %v1, %v2 + %add1 = add i32 %add0, %v0 + ret i32 %add1 +} + +; Positive 32 bit integers +!1 = !{i32 0, i32 2147483648} +;. +; CHECK: [[RNG0]] = !{i32 0, i32 -2147483648} +;. diff --git a/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn index c99c1b5483355be..f0bf6a8f3dbaf81 100644 --- a/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn @@ -40,6 +40,8 @@ target(liblldb_type, "liblldb") { include_dirs = [ ".." ] sources = [ "SBAddress.cpp", + "SBAddressRange.cpp", + "SBAddressRangeList.cpp", "SBAttachInfo.cpp", "SBBlock.cpp", "SBBreakpoint.cpp", diff --git a/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn index 30a9fb3ecceaa03..0c9632a0a1915fd 100644 --- a/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn @@ -45,6 +45,7 @@ static_library("Core") { sources = [ "Address.cpp", "AddressRange.cpp", + "AddressRangeListImpl.cpp", "AddressResolver.cpp", "AddressResolverFileLine.cpp", "Communication.cpp", diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h index bb49d6c256f21bd..d6d038ef65bdf41 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h @@ -65,12 +65,6 @@ void populateSparseAssembler(RewritePatternSet &patterns, bool directOut); std::unique_ptr createSparseAssembler(); std::unique_ptr createSparseAssembler(bool directOut); -//===----------------------------------------------------------------------===// -// The SparseEncodingPropagation pass. -//===----------------------------------------------------------------------===// - -std::unique_ptr createSparseEncodingPropagationPass(); - //===----------------------------------------------------------------------===// // The SparseReinterpretMap pass. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td index 94c3ca60030eeb1..2f844cee5ff5283 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td @@ -40,42 +40,6 @@ def SparseAssembler : Pass<"sparse-assembler", "ModuleOp"> { ]; } -def SparseEncodingPropagation : Pass<"sparse-encoding-propagation", "func::FuncOp"> { - let summary = "Propagate sparse tensor encodings"; - let description = [{ - A pass that propagates sparse tensor encodings. - - Background: To avoid introducing repetitive operations, sparse tensors - in MLIR try to reuse tensor operations whenever available. However, most - tensor operations are canonicalized/transformed without the knowledge - of sparsity. The pass tries to propagate missing sparse encodings. - - For example: - ```mlir - %s = tensor.extract_slice %input[0, 0,] [2, 1] [1, 1] - : tensor<2x3xf32, #sparse> to tensor<2x1xf32, #sparse> - - // After rank reducing (by tensor dialect transformation) - %t = tensor.extract_slice %input[0, 0,] [2, 1] [1, 1] - : tensor<2x3xf32, #sparse> to tensor<2xf32> - %s = tensor.expand_shape [[0, 1]] %t - : tensor<2xf32> to tensor<2x1xf32, #sparse> - - // After sparsity propagation - %t = tensor.extract_slice %input[0, 0,] [2, 1] [1, 1] - : tensor<2x3xf32, #sparse> to tensor<2xf32, #sparse1> - %s = tensor.expand_shape [[0, 1]] %t - : tensor<2xf32, #sparse1> to tensor<2x1xf32, #sparse> - ``` - }]; - - let constructor = "mlir::createSparseEncodingPropagationPass()"; - let dependentDialects = [ - "sparse_tensor::SparseTensorDialect", - "tensor::TensorDialect", - ]; -} - def SparseReinterpretMap : Pass<"sparse-reinterpret-map", "ModuleOp"> { let summary = "Reinterprets sparse tensor type mappings"; let description = [{ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp index f57353b5892b5a1..b42d58634a36c4e 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp @@ -23,7 +23,6 @@ namespace mlir { #define GEN_PASS_DEF_SPARSEASSEMBLER -#define GEN_PASS_DEF_SPARSEENCODINGPROPAGATION #define GEN_PASS_DEF_SPARSEREINTERPRETMAP #define GEN_PASS_DEF_PRESPARSIFICATIONREWRITE #define GEN_PASS_DEF_SPARSIFICATIONPASS @@ -61,14 +60,6 @@ struct SparseAssembler : public impl::SparseAssemblerBase { } }; -struct SparseEncodingPropagation - : public impl::SparseEncodingPropagationBase { - SparseEncodingPropagation() = default; - SparseEncodingPropagation(const SparseEncodingPropagation &pass) = default; - - void runOnOperation() override {} -}; - struct SparseReinterpretMap : public impl::SparseReinterpretMapBase { SparseReinterpretMap() = default; @@ -407,10 +398,6 @@ std::unique_ptr mlir::createSparseAssembler() { return std::make_unique(); } -std::unique_ptr mlir::createSparseEncodingPropagationPass() { - return std::make_unique(); -} - std::unique_ptr mlir::createSparseReinterpretMapPass() { return std::make_unique(); } diff --git a/openmp/tools/archer/ompt-tsan.cpp b/openmp/tools/archer/ompt-tsan.cpp index de77e25db2d399d..d7658077e83ae0c 100644 --- a/openmp/tools/archer/ompt-tsan.cpp +++ b/openmp/tools/archer/ompt-tsan.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -29,7 +30,6 @@ #include #include #include -#include #include "omp-tools.h" @@ -146,18 +146,28 @@ void __attribute__((weak)) __tsan_flush_memory() {} static ArcherFlags *archer_flags; #ifndef TsanHappensBefore + +template static void __ompt_tsan_func(Args...) {} + +#define DECLARE_TSAN_FUNCTION(name, ...) \ + static void (*name)(__VA_ARGS__) = __ompt_tsan_func<__VA_ARGS__>; + // Thread Sanitizer is a tool that finds races in code. // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations . // tsan detects these exact functions by name. extern "C" { -static void (*AnnotateHappensAfter)(const char *, int, const volatile void *); -static void (*AnnotateHappensBefore)(const char *, int, const volatile void *); -static void (*AnnotateIgnoreWritesBegin)(const char *, int); -static void (*AnnotateIgnoreWritesEnd)(const char *, int); -static void (*AnnotateNewMemory)(const char *, int, const volatile void *, - size_t); -static void (*__tsan_func_entry)(const void *); -static void (*__tsan_func_exit)(void); +DECLARE_TSAN_FUNCTION(AnnotateHappensAfter, const char *, int, + const volatile void *) +DECLARE_TSAN_FUNCTION(AnnotateHappensBefore, const char *, int, + const volatile void *) +DECLARE_TSAN_FUNCTION(AnnotateIgnoreWritesBegin, const char *, int) +DECLARE_TSAN_FUNCTION(AnnotateIgnoreWritesEnd, const char *, int) +DECLARE_TSAN_FUNCTION(AnnotateNewMemory, const char *, int, + const volatile void *, size_t) +DECLARE_TSAN_FUNCTION(__tsan_func_entry, const void *) +DECLARE_TSAN_FUNCTION(__tsan_func_exit) + +// RunningOnValgrind is used to detect absence of TSan and must intentionally be a nullptr. static int (*RunningOnValgrind)(void); } @@ -1142,7 +1152,10 @@ static void ompt_tsan_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id, #define findTsanFunction(f, fSig) \ do { \ - if (NULL == (f = fSig dlsym(RTLD_DEFAULT, #f))) \ + void *fp = dlsym(RTLD_DEFAULT, #f); \ + if (fp) \ + f = fSig fp; \ + else \ printf("Unable to find TSan function " #f ".\n"); \ } while (0) diff --git a/polly/include/polly/ScheduleTreeTransform.h b/polly/include/polly/ScheduleTreeTransform.h index ee504c4e5f5244d..6bd5a3abf9ea283 100644 --- a/polly/include/polly/ScheduleTreeTransform.h +++ b/polly/include/polly/ScheduleTreeTransform.h @@ -47,9 +47,9 @@ struct ScheduleTreeVisitor { return getDerived().visitSequence(Node.as(), std::forward(args)...); case isl_schedule_node_set: + assert(isl_schedule_node_n_children(Node.get()) >= 2); return getDerived().visitSet(Node.as(), std::forward(args)...); - assert(isl_schedule_node_n_children(Node.get()) >= 2); case isl_schedule_node_leaf: assert(isl_schedule_node_n_children(Node.get()) == 0); return getDerived().visitLeaf(Node.as(),