diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt index 02573534ccaef8..923976197ebe86 100644 --- a/clang-tools-extra/clang-tidy/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/CMakeLists.txt @@ -46,6 +46,7 @@ endif() # If you add a check, also add it to ClangTidyForceLinker.h in this directory. add_subdirectory(android) add_subdirectory(abseil) +add_subdirectory(altera) add_subdirectory(boost) add_subdirectory(bugprone) add_subdirectory(cert) @@ -71,6 +72,7 @@ add_subdirectory(zircon) set(ALL_CLANG_TIDY_CHECKS clangTidyAndroidModule clangTidyAbseilModule + clangTidyAlteraModule clangTidyBoostModule clangTidyBugproneModule clangTidyCERTModule diff --git a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h index 1d6bd2a4fd6214..63e681f878db2d 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h +++ b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h @@ -20,6 +20,11 @@ extern volatile int AbseilModuleAnchorSource; static int LLVM_ATTRIBUTE_UNUSED AbseilModuleAnchorDestination = AbseilModuleAnchorSource; +// This anchor is used to force the linker to link the AlteraModule. +extern volatile int AlteraModuleAnchorSource; +static int LLVM_ATTRIBUTE_UNUSED AlteraModuleAnchorDestination = + AlteraModuleAnchorSource; + // This anchor is used to force the linker to link the AndroidModule. extern volatile int AndroidModuleAnchorSource; static int LLVM_ATTRIBUTE_UNUSED AndroidModuleAnchorDestination = diff --git a/clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp b/clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp new file mode 100644 index 00000000000000..d91f67ac148565 --- /dev/null +++ b/clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp @@ -0,0 +1,39 @@ +//===--- AlteraTidyModule.cpp - clang-tidy --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../ClangTidy.h" +#include "../ClangTidyModule.h" +#include "../ClangTidyModuleRegistry.h" +#include "StructPackAlignCheck.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace altera { + +class AlteraModule : public ClangTidyModule { +public: + void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { + CheckFactories.registerCheck( + "altera-struct-pack-align"); + } +}; + +} // namespace altera + +// Register the AlteraTidyModule using this statically initialized variable. +static ClangTidyModuleRegistry::Add + X("altera-module", "Adds Altera FPGA OpenCL lint checks."); + +// This anchor is used to force the linker to link in the generated object file +// and thus register the AlteraModule. +volatile int AlteraModuleAnchorSource = 0; + +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/altera/CMakeLists.txt b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt new file mode 100644 index 00000000000000..ed28d9f4892d28 --- /dev/null +++ b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt @@ -0,0 +1,22 @@ +set(LLVM_LINK_COMPONENTS + FrontendOpenMP + support + ) + +add_clang_library(clangTidyAlteraModule + AlteraTidyModule.cpp + StructPackAlignCheck.cpp + + LINK_LIBS + clangTidy + clangTidyUtils + ) + +clang_target_link_libraries(clangTidyAlteraModule + PRIVATE + clangAnalysis + clangAST + clangASTMatchers + clangBasic + clangLex + ) diff --git a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp new file mode 100644 index 00000000000000..9f28a22a9d03ec --- /dev/null +++ b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp @@ -0,0 +1,144 @@ +//===--- StructPackAlignCheck.cpp - clang-tidy ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "StructPackAlignCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/RecordLayout.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include +#include + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace altera { + +void StructPackAlignCheck::registerMatchers(MatchFinder *Finder) { + Finder->addMatcher(recordDecl(isStruct(), isDefinition(), + unless(isExpansionInSystemHeader())) + .bind("struct"), + this); +} + +CharUnits +StructPackAlignCheck::computeRecommendedAlignment(CharUnits MinByteSize) { + CharUnits NewAlign = CharUnits::fromQuantity(1); + if (!MinByteSize.isPowerOfTwo()) { + int MSB = (int)MinByteSize.getQuantity(); + for (; MSB > 0; MSB /= 2) { + NewAlign = NewAlign.alignTo( + CharUnits::fromQuantity(((int)NewAlign.getQuantity()) * 2)); + // Abort if the computed alignment meets the maximum configured alignment. + if (NewAlign.getQuantity() >= MaxConfiguredAlignment) + break; + } + } else { + NewAlign = MinByteSize; + } + return NewAlign; +} + +void StructPackAlignCheck::check(const MatchFinder::MatchResult &Result) { + const auto *Struct = Result.Nodes.getNodeAs("struct"); + + // Do not trigger on templated struct declarations because the packing and + // alignment requirements are unknown. + if (Struct->isTemplated()) + return; + + // Get sizing info for the struct. + llvm::SmallVector, 10> FieldSizes; + unsigned int TotalBitSize = 0; + for (const FieldDecl *StructField : Struct->fields()) { + // For each StructField, record how big it is (in bits). + // Would be good to use a pair of to advise a better + // packing order. + unsigned int StructFieldWidth = + (unsigned int)Result.Context + ->getTypeInfo(StructField->getType().getTypePtr()) + .Width; + FieldSizes.emplace_back(StructFieldWidth, StructField->getFieldIndex()); + // FIXME: Recommend a reorganization of the struct (sort by StructField + // size, largest to smallest). + TotalBitSize += StructFieldWidth; + } + + uint64_t CharSize = Result.Context->getCharWidth(); + CharUnits CurrSize = Result.Context->getASTRecordLayout(Struct).getSize(); + CharUnits MinByteSize = + CharUnits::fromQuantity(ceil((float)TotalBitSize / CharSize)); + CharUnits MaxAlign = CharUnits::fromQuantity( + ceil((float)Struct->getMaxAlignment() / CharSize)); + CharUnits CurrAlign = + Result.Context->getASTRecordLayout(Struct).getAlignment(); + CharUnits NewAlign = computeRecommendedAlignment(MinByteSize); + + bool IsPacked = Struct->hasAttr(); + bool NeedsPacking = (MinByteSize < CurrSize) && (MaxAlign != NewAlign) && + (CurrSize != NewAlign); + bool NeedsAlignment = CurrAlign.getQuantity() != NewAlign.getQuantity(); + + if (!NeedsAlignment && !NeedsPacking) + return; + + // If it's using much more space than it needs, suggest packing. + // (Do not suggest packing if it is currently explicitly aligned to what the + // minimum byte size would suggest as the new alignment.) + if (NeedsPacking && !IsPacked) { + diag(Struct->getLocation(), + "accessing fields in struct %0 is inefficient due to padding; only " + "needs %1 bytes but is using %2 bytes") + << Struct << (int)MinByteSize.getQuantity() + << (int)CurrSize.getQuantity() + << FixItHint::CreateInsertion(Struct->getEndLoc().getLocWithOffset(1), + " __attribute__((packed))"); + diag(Struct->getLocation(), + "use \"__attribute__((packed))\" to reduce the amount of padding " + "applied to struct %0", + DiagnosticIDs::Note) + << Struct; + } + + FixItHint FixIt; + AlignedAttr *Attribute = Struct->getAttr(); + std::string NewAlignQuantity = std::to_string((int)NewAlign.getQuantity()); + if (Attribute) { + std::ostringstream FixItString; + FixItString << "aligned(" << NewAlignQuantity << ")"; + FixIt = + FixItHint::CreateReplacement(Attribute->getRange(), FixItString.str()); + } else { + std::ostringstream FixItString; + FixItString << " __attribute__((aligned(" << NewAlignQuantity << ")))"; + FixIt = FixItHint::CreateInsertion(Struct->getEndLoc().getLocWithOffset(1), + FixItString.str()); + } + + // And suggest the minimum power-of-two alignment for the struct as a whole + // (with and without packing). + if (NeedsAlignment) { + diag(Struct->getLocation(), + "accessing fields in struct %0 is inefficient due to poor alignment; " + "currently aligned to %1 bytes, but recommended alignment is %2 bytes") + << Struct << (int)CurrAlign.getQuantity() << NewAlignQuantity << FixIt; + + diag(Struct->getLocation(), + "use \"__attribute__((aligned(%0)))\" to align struct %1 to %0 bytes", + DiagnosticIDs::Note) + << NewAlignQuantity << Struct; + } +} + +void StructPackAlignCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "MaxConfiguredAlignment", MaxConfiguredAlignment); +} + +} // namespace altera +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h new file mode 100644 index 00000000000000..510e03030590c0 --- /dev/null +++ b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h @@ -0,0 +1,41 @@ +//===--- StructPackAlignCheck.h - clang-tidy --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_STRUCTPACKALIGNCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_STRUCTPACKALIGNCHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace altera { + +/// Finds structs that are inefficiently packed or aligned, and recommends +/// packing and/or aligning of said structs as needed. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/altera-struct-pack-align.html +class StructPackAlignCheck : public ClangTidyCheck { +public: + StructPackAlignCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + MaxConfiguredAlignment(Options.get("MaxConfiguredAlignment", 128)) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; + +private: + const unsigned MaxConfiguredAlignment; + CharUnits computeRecommendedAlignment(CharUnits MinByteSize); +}; + +} // namespace altera +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_STRUCTPACKALIGNCHECK_H diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 781fef27c47617..53c3894914e528 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -67,6 +67,27 @@ The improvements are... Improvements to clang-tidy -------------------------- +New modules +^^^^^^^^^^^ + +- New :doc:`altera ` module. + + Includes checks related to OpenCL for FPGA coding guidelines, based on the + `Altera SDK for OpenCL: Best Practices Guide + `_. + +New checks +^^^^^^^^^^ + +- New :doc:`altera-struct-pack-align + ` check. + + Finds structs that are inefficiently packed or aligned, and recommends + packing and/or aligning of said structs as needed. + +- New :doc:`bugprone-misplaced-pointer-arithmetic-in-alloc + ` check. + - New :doc:`bugprone-redundant-branch-condition ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/altera-struct-pack-align.rst b/clang-tools-extra/docs/clang-tidy/checks/altera-struct-pack-align.rst new file mode 100644 index 00000000000000..b03a4fcf7fcf33 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/altera-struct-pack-align.rst @@ -0,0 +1,54 @@ +.. title:: clang-tidy - altera-struct-pack-align + +altera-struct-pack-align +======================== + +Finds structs that are inefficiently packed or aligned, and recommends +packing and/or aligning of said structs as needed. + +Structs that are not packed take up more space than they should, and accessing +structs that are not well aligned is inefficient. + +Fix-its are provided to fix both of these issues by inserting and/or amending +relevant struct attributes. + +Based on the `Altera SDK for OpenCL: Best Practices Guide +`_. + +.. code-block:: c++ + + // The following struct is originally aligned to 4 bytes, and thus takes up + // 12 bytes of memory instead of 10. Packing the struct will make it use + // only 10 bytes of memory, and aligning it to 16 bytes will make it + // efficient to access. + struct example { + char a; // 1 byte + double b; // 8 bytes + char c; // 1 byte + }; + + // The following struct is arranged in such a way that packing is not needed. + // However, it is aligned to 4 bytes instead of 8, and thus needs to be + // explicitly aligned. + struct implicitly_packed_example { + char a; // 1 byte + char b; // 1 byte + char c; // 1 byte + char d; // 1 byte + int e; // 4 bytes + }; + + // The following struct is explicitly aligned and packed. + struct good_example { + char a; // 1 byte + double b; // 8 bytes + char c; // 1 byte + } __attribute__((packed)) __attribute__((aligned(16)); + + // Explicitly aligning a struct to the wrong value will result in a warning. + // The following example should be aligned to 16 bytes, not 32. + struct badly_aligned_example { + char a; // 1 byte + double b; // 8 bytes + char c; // 1 byte + } __attribute__((packed)) __attribute__((aligned(32))); diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 91414ee8c90f32..c569ce704d979e 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -30,6 +30,7 @@ Clang-Tidy Checks `abseil-time-comparison `_, "Yes" `abseil-time-subtraction `_, "Yes" `abseil-upgrade-duration-conversions `_, "Yes" + `altera-struct-pack-align `_, `android-cloexec-accept `_, "Yes" `android-cloexec-accept4 `_, `android-cloexec-creat `_, "Yes" diff --git a/clang-tools-extra/docs/clang-tidy/index.rst b/clang-tools-extra/docs/clang-tidy/index.rst index b9a4a7d694b4f0..a85c721541784a 100644 --- a/clang-tools-extra/docs/clang-tidy/index.rst +++ b/clang-tools-extra/docs/clang-tidy/index.rst @@ -58,6 +58,7 @@ There are currently the following groups of checks: Name prefix Description ====================== ========================================================= ``abseil-`` Checks related to Abseil library. +``altera-`` Checks related to OpenCL programming for FPGAs. ``android-`` Checks related to Android. ``boost-`` Checks related to Boost library. ``bugprone-`` Checks that target bugprone code constructs. diff --git a/clang-tools-extra/test/clang-tidy/checkers/altera-struct-pack-align.cpp b/clang-tools-extra/test/clang-tidy/checkers/altera-struct-pack-align.cpp new file mode 100644 index 00000000000000..615b6cafe87a25 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/altera-struct-pack-align.cpp @@ -0,0 +1,101 @@ +// RUN: %check_clang_tidy %s altera-struct-pack-align %t -- -header-filter=.* + +// Struct needs both alignment and packing +struct error { + char a; + double b; + char c; +}; +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'error' is inefficient due to padding; only needs 10 bytes but is using 24 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((packed))" to reduce the amount of padding applied to struct 'error' +// CHECK-MESSAGES: :[[@LINE-7]]:8: warning: accessing fields in struct 'error' is inefficient due to poor alignment; currently aligned to 8 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-8]]:8: note: use "__attribute__((aligned(16)))" to align struct 'error' to 16 bytes +// CHECK-FIXES: __attribute__((packed)) +// CHECK-FIXES: __attribute__((aligned(16))); + +// Struct is explicitly packed, but needs alignment +struct error_packed { + char a; + double b; + char c; +} __attribute__((packed)); +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'error_packed' is inefficient due to poor alignment; currently aligned to 1 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((aligned(16)))" to align struct 'error_packed' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))) + +// Struct is properly packed, but needs alignment +struct align_only { + char a; + char b; + char c; + char d; + int e; + double f; +}; +// CHECK-MESSAGES: :[[@LINE-8]]:8: warning: accessing fields in struct 'align_only' is inefficient due to poor alignment; currently aligned to 8 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-9]]:8: note: use "__attribute__((aligned(16)))" to align struct 'align_only' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))); + +// Struct is perfectly packed but wrongly aligned +struct bad_align { + char a; + double b; + char c; +} __attribute__((packed)) __attribute__((aligned(8))); +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'bad_align' is inefficient due to poor alignment; currently aligned to 8 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((aligned(16)))" to align struct 'bad_align' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))); + +struct bad_align2 { + char a; + double b; + char c; +} __attribute__((packed)) __attribute__((aligned(32))); +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'bad_align2' is inefficient due to poor alignment; currently aligned to 32 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((aligned(16)))" to align struct 'bad_align2' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))); + +struct bad_align3 { + char a; + double b; + char c; +} __attribute__((packed)) __attribute__((aligned(4))); +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'bad_align3' is inefficient due to poor alignment; currently aligned to 4 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((aligned(16)))" to align struct 'bad_align3' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))); + +// Struct is both perfectly packed and aligned +struct success { + char a; + double b; + char c; +} __attribute__((packed)) __attribute__((aligned(16))); +//Should take 10 bytes and be aligned to 16 bytes + +// Struct is properly packed, and explicitly aligned +struct success2 { + int a; + int b; + int c; +} __attribute__((aligned(16))); + +// If struct is properly aligned, packing not needed +struct success3 { + char a; + double b; + char c; +} __attribute__((aligned(16))); + +// If struct is templated, warnings should not be triggered +template +struct success4 { + A a; + B b; + int c; +}; + +// Warnings should not trigger on struct instantiations +void no_trigger_on_instantiation() { + struct bad_align3 instantiated { 'a', 0.001, 'b' }; +} + diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 5ac0e6b6ef0cb1..f015951c7ec727 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -136,38 +136,19 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} ) if(LLVM_INCLUDE_TESTS) - if(CMAKE_VERSION VERSION_LESS 3.12) - include(FindPythonInterp) - if(NOT PYTHONINTERP_FOUND) - message(FATAL_ERROR - "Unable to find Python interpreter, required for builds and testing. - - Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") - endif() - - if( ${PYTHON_VERSION_STRING} VERSION_LESS 2.7 ) - message(FATAL_ERROR "Python 2.7 or newer is required") + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() + # Treat python2 as python3 add_executable(Python3::Interpreter IMPORTED) set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) - else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() # Check prebuilt llvm/utils. diff --git a/clang/include/clang/AST/IgnoreExpr.h b/clang/include/clang/AST/IgnoreExpr.h index 15d31f3af99546..1c2b538e5b6353 100644 --- a/clang/include/clang/AST/IgnoreExpr.h +++ b/clang/include/clang/AST/IgnoreExpr.h @@ -14,12 +14,13 @@ #define LLVM_CLANG_AST_IGNOREEXPR_H #include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" namespace clang { namespace detail { /// Given an expression E and functions Fn_1,...,Fn_n : Expr * -> Expr *, /// Return Fn_n(...(Fn_1(E))) -inline Expr *IgnoreExprNodesImpl(Expr *E) { return E; }; +inline Expr *IgnoreExprNodesImpl(Expr *E) { return E; } template Expr *IgnoreExprNodesImpl(Expr *E, FnTy &&Fn, FnTys &&... Fns) { return IgnoreExprNodesImpl(Fn(E), std::forward(Fns)...); @@ -38,23 +39,122 @@ template Expr *IgnoreExprNodes(Expr *E, FnTys &&... Fns) { return E; } -Expr *IgnoreImplicitCastsSingleStep(Expr *E); +template +const Expr *IgnoreExprNodes(const Expr *E, FnTys &&...Fns) { + return const_cast(IgnoreExprNodes(E, std::forward(Fns)...)); +} + +inline Expr *IgnoreImplicitCastsSingleStep(Expr *E) { + if (auto *ICE = dyn_cast(E)) + return ICE->getSubExpr(); + + if (auto *FE = dyn_cast(E)) + return FE->getSubExpr(); + + return E; +} + +inline Expr *IgnoreImplicitCastsExtraSingleStep(Expr *E) { + // FIXME: Skip MaterializeTemporaryExpr and SubstNonTypeTemplateParmExpr in + // addition to what IgnoreImpCasts() skips to account for the current + // behaviour of IgnoreParenImpCasts(). + Expr *SubE = IgnoreImplicitCastsSingleStep(E); + if (SubE != E) + return SubE; + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *NTTP = dyn_cast(E)) + return NTTP->getReplacement(); + + return E; +} + +inline Expr *IgnoreCastsSingleStep(Expr *E) { + if (auto *CE = dyn_cast(E)) + return CE->getSubExpr(); + + if (auto *FE = dyn_cast(E)) + return FE->getSubExpr(); + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *NTTP = dyn_cast(E)) + return NTTP->getReplacement(); + + return E; +} + +inline Expr *IgnoreLValueCastsSingleStep(Expr *E) { + // Skip what IgnoreCastsSingleStep skips, except that only + // lvalue-to-rvalue casts are skipped. + if (auto *CE = dyn_cast(E)) + if (CE->getCastKind() != CK_LValueToRValue) + return E; -Expr *IgnoreImplicitCastsExtraSingleStep(Expr *E); + return IgnoreCastsSingleStep(E); +} + +inline Expr *IgnoreBaseCastsSingleStep(Expr *E) { + if (auto *CE = dyn_cast(E)) + if (CE->getCastKind() == CK_DerivedToBase || + CE->getCastKind() == CK_UncheckedDerivedToBase || + CE->getCastKind() == CK_NoOp) + return CE->getSubExpr(); + + return E; +} + +inline Expr *IgnoreImplicitSingleStep(Expr *E) { + Expr *SubE = IgnoreImplicitCastsSingleStep(E); + if (SubE != E) + return SubE; + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *BTE = dyn_cast(E)) + return BTE->getSubExpr(); + + return E; +} + +inline Expr *IgnoreImplicitAsWrittenSingleStep(Expr *E) { + if (auto *ICE = dyn_cast(E)) + return ICE->getSubExprAsWritten(); -Expr *IgnoreCastsSingleStep(Expr *E); + return IgnoreImplicitSingleStep(E); +} -Expr *IgnoreLValueCastsSingleStep(Expr *E); +inline Expr *IgnoreParensOnlySingleStep(Expr *E) { + if (auto *PE = dyn_cast(E)) + return PE->getSubExpr(); + return E; +} -Expr *IgnoreBaseCastsSingleStep(Expr *E); +inline Expr *IgnoreParensSingleStep(Expr *E) { + if (auto *PE = dyn_cast(E)) + return PE->getSubExpr(); -Expr *IgnoreImplicitSingleStep(Expr *E); + if (auto *UO = dyn_cast(E)) { + if (UO->getOpcode() == UO_Extension) + return UO->getSubExpr(); + } -Expr *IgnoreImplicitAsWrittenSingleStep(Expr *E); + else if (auto *GSE = dyn_cast(E)) { + if (!GSE->isResultDependent()) + return GSE->getResultExpr(); + } -Expr *IgnoreParensOnlySingleStep(Expr *E); + else if (auto *CE = dyn_cast(E)) { + if (!CE->isConditionDependent()) + return CE->getChosenSubExpr(); + } -Expr *IgnoreParensSingleStep(Expr *E); + return E; +} } // namespace clang diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d856f784e0eeae..46f7ffc97ce779 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5092,6 +5092,9 @@ def err_fold_expression_empty : Error< "with no fallback value">; def err_fold_expression_bad_operand : Error< "expression not permitted as operand of fold expression">; +def err_fold_expression_limit_exceeded: Error< + "instantiating fold expression with %0 arguments exceeded expression nesting " + "limit of %1">, DefaultFatal, NoSFINAE; def err_unexpected_typedef : Error< "unexpected type name %0: expected expression">; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 53d0285d370276..129ac0355c87f5 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12415,6 +12415,7 @@ class Sema final { /// The struct behind the CFErrorRef pointer. RecordDecl *CFError = nullptr; + bool isCFError(RecordDecl *D); /// Retrieve the identifier "NSError". IdentifierInfo *getNSErrorIdent(); diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index a444843c500603..a61af452313486 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -349,7 +349,6 @@ let ParentPackage = APIModeling in { def StdCLibraryFunctionsChecker : Checker<"StdCLibraryFunctions">, HelpText<"Improve modeling of the C standard library functions">, - Dependencies<[CallAndMessageModeling]>, CheckerOptions<[ CmdLineOption, "such as whether the parameter of isalpha is in the range [0, 255] " "or is EOF.">, Dependencies<[StdCLibraryFunctionsChecker]>, - WeakDependencies<[NonNullParamChecker]>, + WeakDependencies<[CallAndMessageChecker, NonNullParamChecker, StreamChecker]>, Documentation; } // end "alpha.unix" diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt index dfd26fd97bc6d8..35099fd0dacf83 100644 --- a/clang/lib/AST/CMakeLists.txt +++ b/clang/lib/AST/CMakeLists.txt @@ -55,7 +55,6 @@ add_clang_library(clangAST ExternalASTMerger.cpp ExternalASTSource.cpp FormatString.cpp - IgnoreExpr.cpp InheritViz.cpp Interp/ByteCodeEmitter.cpp Interp/ByteCodeExprGen.cpp diff --git a/clang/lib/AST/IgnoreExpr.cpp b/clang/lib/AST/IgnoreExpr.cpp deleted file mode 100644 index 65aaaeb6a1ed00..00000000000000 --- a/clang/lib/AST/IgnoreExpr.cpp +++ /dev/null @@ -1,129 +0,0 @@ -//===--- IgnoreExpr.cpp - Ignore intermediate Expressions -----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements common functions to ignore intermediate expression nodes -// -//===----------------------------------------------------------------------===// - -#include "clang/AST/IgnoreExpr.h" -#include "clang/AST/Expr.h" -#include "clang/AST/ExprCXX.h" - -using namespace clang; - -Expr *clang::IgnoreImplicitCastsSingleStep(Expr *E) { - if (auto *ICE = dyn_cast(E)) - return ICE->getSubExpr(); - - if (auto *FE = dyn_cast(E)) - return FE->getSubExpr(); - - return E; -} - -Expr *clang::IgnoreImplicitCastsExtraSingleStep(Expr *E) { - // FIXME: Skip MaterializeTemporaryExpr and SubstNonTypeTemplateParmExpr in - // addition to what IgnoreImpCasts() skips to account for the current - // behaviour of IgnoreParenImpCasts(). - Expr *SubE = IgnoreImplicitCastsSingleStep(E); - if (SubE != E) - return SubE; - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *NTTP = dyn_cast(E)) - return NTTP->getReplacement(); - - return E; -} - -Expr *clang::IgnoreCastsSingleStep(Expr *E) { - if (auto *CE = dyn_cast(E)) - return CE->getSubExpr(); - - if (auto *FE = dyn_cast(E)) - return FE->getSubExpr(); - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *NTTP = dyn_cast(E)) - return NTTP->getReplacement(); - - return E; -} - -Expr *clang::IgnoreLValueCastsSingleStep(Expr *E) { - // Skip what IgnoreCastsSingleStep skips, except that only - // lvalue-to-rvalue casts are skipped. - if (auto *CE = dyn_cast(E)) - if (CE->getCastKind() != CK_LValueToRValue) - return E; - - return IgnoreCastsSingleStep(E); -} - -Expr *clang::IgnoreBaseCastsSingleStep(Expr *E) { - if (auto *CE = dyn_cast(E)) - if (CE->getCastKind() == CK_DerivedToBase || - CE->getCastKind() == CK_UncheckedDerivedToBase || - CE->getCastKind() == CK_NoOp) - return CE->getSubExpr(); - - return E; -} - -Expr *clang::IgnoreImplicitSingleStep(Expr *E) { - Expr *SubE = IgnoreImplicitCastsSingleStep(E); - if (SubE != E) - return SubE; - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *BTE = dyn_cast(E)) - return BTE->getSubExpr(); - - return E; -} - -Expr *clang::IgnoreImplicitAsWrittenSingleStep(Expr *E) { - if (auto *ICE = dyn_cast(E)) - return ICE->getSubExprAsWritten(); - - return IgnoreImplicitSingleStep(E); -} - -Expr *clang::IgnoreParensOnlySingleStep(Expr *E) { - if (auto *PE = dyn_cast(E)) - return PE->getSubExpr(); - return E; -} - -Expr *clang::IgnoreParensSingleStep(Expr *E) { - if (auto *PE = dyn_cast(E)) - return PE->getSubExpr(); - - if (auto *UO = dyn_cast(E)) { - if (UO->getOpcode() == UO_Extension) - return UO->getSubExpr(); - } - - else if (auto *GSE = dyn_cast(E)) { - if (!GSE->isResultDependent()) - return GSE->getResultExpr(); - } - - else if (auto *CE = dyn_cast(E)) { - if (!CE->isConditionDependent()) - return CE->getChosenSubExpr(); - } - - return E; -} diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp index 5b97265a6d8ae5..64e0da9e64b122 100644 --- a/clang/lib/Analysis/ThreadSafety.cpp +++ b/clang/lib/Analysis/ThreadSafety.cpp @@ -1266,21 +1266,13 @@ ClassifyDiagnostic(const AttrTy *A) { } bool ThreadSafetyAnalyzer::inCurrentScope(const CapabilityExpr &CapE) { - const threadSafety::til::SExpr *SExp = CapE.sexpr(); - assert(SExp && "Null expressions should be ignored"); - - // Global variables are always in scope. - if (isa(SExp)) - return true; - - // Members are in scope from methods of the same class. - if (const auto *P = dyn_cast(SExp)) { - if (!CurrentMethod) + if (!CurrentMethod) return false; - const ValueDecl *VD = P->clangDecl(); - return VD->getDeclContext() == CurrentMethod->getDeclContext(); + if (const auto *P = dyn_cast_or_null(CapE.sexpr())) { + const auto *VD = P->clangDecl(); + if (VD) + return VD->getDeclContext() == CurrentMethod->getDeclContext(); } - return false; } diff --git a/clang/lib/Analysis/ThreadSafetyCommon.cpp b/clang/lib/Analysis/ThreadSafetyCommon.cpp index aee91857600717..1b8c55e56d4704 100644 --- a/clang/lib/Analysis/ThreadSafetyCommon.cpp +++ b/clang/lib/Analysis/ThreadSafetyCommon.cpp @@ -274,7 +274,7 @@ til::SExpr *SExprBuilder::translateDeclRefExpr(const DeclRefExpr *DRE, const auto *VD = cast(DRE->getDecl()->getCanonicalDecl()); // Function parameters require substitution and/or renaming. - if (const auto *PV = dyn_cast(VD)) { + if (const auto *PV = dyn_cast_or_null(VD)) { unsigned I = PV->getFunctionScopeIndex(); const DeclContext *D = PV->getDeclContext(); if (Ctx && Ctx->FunArgs) { diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 709185707bd9c5..2abbe3e81e0a2e 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -84,7 +84,7 @@ CudaArchToStringMap arch_names[] = { GFX(810), // stoney GFX(900), // vega, instinct GFX(902), GFX(904), GFX(906), GFX(908), GFX(909), - GFX(1010), GFX(1011), GFX(1012), + GFX(1010), GFX(1011), GFX(1012), GFX(1030), GFX(1031) // clang-format on }; #undef SM diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index c1def6c88f0a66..b9260892bd2150 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2982,7 +2982,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( ((ScheduleKind.Schedule == OMPC_SCHEDULE_static || ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) && !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || - ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || + ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; if ((RT.isStaticNonchunked(ScheduleKind.Schedule, diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 5dd6a7a9da40b0..841f0b41e9a7fc 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -56,6 +56,13 @@ static bool isLambdaParameterList(const FormatToken *Left) { Left->Previous->MatchingParen->is(TT_LambdaLSquare); } +/// Returns \c true if the token is followed by a boolean condition, \c false +/// otherwise. +static bool isKeywordWithCondition(const FormatToken &Tok) { + return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, + tok::kw_constexpr, tok::kw_catch); +} + /// A parser that gathers additional information about tokens. /// /// The \c TokenAnnotator tries to match parenthesis and square brakets and @@ -108,6 +115,12 @@ class AnnotatingParser { while (CurrentToken) { if (CurrentToken->is(tok::greater)) { + // Try to do a better job at looking for ">>" within the condition of + // a statement. + if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) && + Left->ParentBracket != tok::less && + isKeywordWithCondition(*Line.First)) + return false; Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; // In TT_Proto, we must distignuish between: @@ -2768,13 +2781,6 @@ bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const { Right.ParameterCount > 0); } -/// Returns \c true if the token is followed by a boolean condition, \c false -/// otherwise. -static bool isKeywordWithCondition(const FormatToken &Tok) { - return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, - tok::kw_constexpr, tok::kw_catch); -} - bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, const FormatToken &Right) { diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 03442fb03b3aa2..d8ea9c03725920 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -4043,32 +4043,9 @@ classifyPointerDeclarator(Sema &S, QualType type, Declarator &declarator, if (auto recordType = type->getAs()) { RecordDecl *recordDecl = recordType->getDecl(); - bool isCFError = false; - if (S.CFError) { - // If we already know about CFError, test it directly. - isCFError = (S.CFError == recordDecl); - } else { - // Check whether this is CFError, which we identify based on its bridge - // to NSError. CFErrorRef used to be declared with "objc_bridge" but is - // now declared with "objc_bridge_mutable", so look for either one of - // the two attributes. - if (recordDecl->getTagKind() == TTK_Struct && numNormalPointers > 0) { - IdentifierInfo *bridgedType = nullptr; - if (auto bridgeAttr = recordDecl->getAttr()) - bridgedType = bridgeAttr->getBridgedType(); - else if (auto bridgeAttr = - recordDecl->getAttr()) - bridgedType = bridgeAttr->getBridgedType(); - - if (bridgedType == S.getNSErrorIdent()) { - S.CFError = recordDecl; - isCFError = true; - } - } - } - // If this is CFErrorRef*, report it as such. - if (isCFError && numNormalPointers == 2 && numTypeSpecifierPointers < 2) { + if (numNormalPointers == 2 && numTypeSpecifierPointers < 2 && + S.isCFError(recordDecl)) { return PointerDeclaratorKind::CFErrorRefPointer; } break; @@ -4092,6 +4069,31 @@ classifyPointerDeclarator(Sema &S, QualType type, Declarator &declarator, } } +bool Sema::isCFError(RecordDecl *RD) { + // If we already know about CFError, test it directly. + if (CFError) + return CFError == RD; + + // Check whether this is CFError, which we identify based on its bridge to + // NSError. CFErrorRef used to be declared with "objc_bridge" but is now + // declared with "objc_bridge_mutable", so look for either one of the two + // attributes. + if (RD->getTagKind() == TTK_Struct) { + IdentifierInfo *bridgedType = nullptr; + if (auto bridgeAttr = RD->getAttr()) + bridgedType = bridgeAttr->getBridgedType(); + else if (auto bridgeAttr = RD->getAttr()) + bridgedType = bridgeAttr->getBridgedType(); + + if (bridgedType == getNSErrorIdent()) { + CFError = RD; + return true; + } + } + + return false; +} + static FileID getNullabilityCompletenessCheckFileID(Sema &S, SourceLocation loc) { // If we're anywhere in a function, method, or closure context, don't perform diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 4c8293f3bf4c00..6457b192477e37 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -28,6 +28,7 @@ #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/Basic/DiagnosticParse.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Sema/Designator.h" #include "clang/Sema/Lookup.h" @@ -13193,6 +13194,18 @@ TreeTransform::TransformCXXFoldExpr(CXXFoldExpr *E) { E->getEllipsisLoc(), RHS.get(), E->getEndLoc(), NumExpansions); } + // Formally a fold expression expands to nested parenthesized expressions. + // Enforce this limit to avoid creating trees so deep we can't safely traverse + // them. + if (NumExpansions && SemaRef.getLangOpts().BracketDepth < NumExpansions) { + SemaRef.Diag(E->getEllipsisLoc(), + clang::diag::err_fold_expression_limit_exceeded) + << *NumExpansions << SemaRef.getLangOpts().BracketDepth + << E->getSourceRange(); + SemaRef.Diag(E->getEllipsisLoc(), diag::note_bracket_depth); + return ExprError(); + } + // The transform has determined that we should perform an elementwise // expansion of the pattern. Do so. ExprResult Result = getDerived().TransformExpr(E->getInit()); diff --git a/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp index 285d2da104f1ac..88e80c481a5a7f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp @@ -83,7 +83,7 @@ class PthreadLockChecker : public Checker PThreadCallbacks = { // Init. {{"pthread_mutex_init", 2}, &PthreadLockChecker::InitAnyLock}, @@ -167,46 +167,49 @@ class PthreadLockChecker : public Checker BT[], + const Expr *MtxExpr, CheckerKind CheckKind, + StringRef Desc) const; // Init. void InitAnyLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; - void InitLockAux(const CallEvent &Call, CheckerContext &C, unsigned ArgNo, - SVal Lock, CheckerKind checkkind) const; + CheckerKind CheckKind) const; + void InitLockAux(const CallEvent &Call, CheckerContext &C, + const Expr *MtxExpr, SVal MtxVal, + CheckerKind CheckKind) const; // Lock, Try-lock. void AcquirePthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void AcquireXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void TryPthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void TryXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void TryFuchsiaLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void TryC11Lock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; - void AcquireLockAux(const CallEvent &Call, CheckerContext &C, unsigned ArgNo, - SVal lock, bool isTryLock, LockingSemantics semantics, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; + void AcquireLockAux(const CallEvent &Call, CheckerContext &C, + const Expr *MtxExpr, SVal MtxVal, bool IsTryLock, + LockingSemantics Semantics, CheckerKind CheckKind) const; // Release. void ReleaseAnyLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; - void ReleaseLockAux(const CallEvent &Call, CheckerContext &C, unsigned ArgNo, - SVal lock, CheckerKind checkkind) const; + CheckerKind CheckKind) const; + void ReleaseLockAux(const CallEvent &Call, CheckerContext &C, + const Expr *MtxExpr, SVal MtxVal, + CheckerKind CheckKind) const; // Destroy. void DestroyPthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void DestroyXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; - void DestroyLockAux(const CallEvent &Call, CheckerContext &C, unsigned ArgNo, - SVal Lock, LockingSemantics semantics, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; + void DestroyLockAux(const CallEvent &Call, CheckerContext &C, + const Expr *MtxExpr, SVal MtxVal, + LockingSemantics Semantics, CheckerKind CheckKind) const; public: void checkPostCall(const CallEvent &Call, CheckerContext &C) const; @@ -226,18 +229,18 @@ class PthreadLockChecker : public Checker BT_initlock[CK_NumCheckKinds]; mutable std::unique_ptr BT_lor[CK_NumCheckKinds]; - void initBugType(CheckerKind checkKind) const { - if (BT_doublelock[checkKind]) + void initBugType(CheckerKind CheckKind) const { + if (BT_doublelock[CheckKind]) return; - BT_doublelock[checkKind].reset( - new BugType{CheckNames[checkKind], "Double locking", "Lock checker"}); - BT_doubleunlock[checkKind].reset( - new BugType{CheckNames[checkKind], "Double unlocking", "Lock checker"}); - BT_destroylock[checkKind].reset(new BugType{ - CheckNames[checkKind], "Use destroyed lock", "Lock checker"}); - BT_initlock[checkKind].reset(new BugType{ - CheckNames[checkKind], "Init invalid lock", "Lock checker"}); - BT_lor[checkKind].reset(new BugType{CheckNames[checkKind], + BT_doublelock[CheckKind].reset( + new BugType{CheckNames[CheckKind], "Double locking", "Lock checker"}); + BT_doubleunlock[CheckKind].reset( + new BugType{CheckNames[CheckKind], "Double unlocking", "Lock checker"}); + BT_destroylock[CheckKind].reset(new BugType{ + CheckNames[CheckKind], "Use destroyed lock", "Lock checker"}); + BT_initlock[CheckKind].reset(new BugType{ + CheckNames[CheckKind], "Init invalid lock", "Lock checker"}); + BT_lor[CheckKind].reset(new BugType{CheckNames[CheckKind], "Lock order reversal", "Lock checker"}); } }; @@ -341,53 +344,53 @@ void PthreadLockChecker::printState(raw_ostream &Out, ProgramStateRef State, void PthreadLockChecker::AcquirePthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), false, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), false, + PthreadSemantics, CheckKind); } void PthreadLockChecker::AcquireXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), false, XNUSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), false, + XNUSemantics, CheckKind); } void PthreadLockChecker::TryPthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), true, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), true, + PthreadSemantics, CheckKind); } void PthreadLockChecker::TryXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), true, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), true, + PthreadSemantics, CheckKind); } void PthreadLockChecker::TryFuchsiaLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), true, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), true, + PthreadSemantics, CheckKind); } void PthreadLockChecker::TryC11Lock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), true, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), true, + PthreadSemantics, CheckKind); } void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, - CheckerContext &C, unsigned ArgNo, - SVal lock, bool isTryLock, - enum LockingSemantics semantics, - CheckerKind checkKind) const { - if (!ChecksEnabled[checkKind]) + CheckerContext &C, const Expr *MtxExpr, + SVal MtxVal, bool IsTryLock, + enum LockingSemantics Semantics, + CheckerKind CheckKind) const { + if (!ChecksEnabled[CheckKind]) return; - const MemRegion *lockR = lock.getAsRegion(); + const MemRegion *lockR = MtxVal.getAsRegion(); if (!lockR) return; @@ -398,28 +401,23 @@ void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, if (const LockState *LState = state->get(lockR)) { if (LState->isLocked()) { - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto report = std::make_unique( - *BT_doublelock[checkKind], "This lock has already been acquired", N); - report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(report)); + reportBug(C, BT_doublelock, MtxExpr, CheckKind, + "This lock has already been acquired"); return; } else if (LState->isDestroyed()) { - reportUseDestroyedBug(Call, C, ArgNo, checkKind); + reportBug(C, BT_destroylock, MtxExpr, CheckKind, + "This lock has already been destroyed"); return; } } ProgramStateRef lockSucc = state; - if (isTryLock) { + if (IsTryLock) { // Bifurcate the state, and allow a mode where the lock acquisition fails. SVal RetVal = Call.getReturnValue(); if (auto DefinedRetVal = RetVal.getAs()) { ProgramStateRef lockFail; - switch (semantics) { + switch (Semantics) { case PthreadSemantics: std::tie(lockFail, lockSucc) = state->assume(*DefinedRetVal); break; @@ -434,7 +432,7 @@ void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, } // We might want to handle the case when the mutex lock function was inlined // and returned an Unknown or Undefined value. - } else if (semantics == PthreadSemantics) { + } else if (Semantics == PthreadSemantics) { // Assume that the return value was 0. SVal RetVal = Call.getReturnValue(); if (auto DefinedRetVal = RetVal.getAs()) { @@ -447,7 +445,7 @@ void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, // and returned an Unknown or Undefined value. } else { // XNU locking semantics return void on non-try locks - assert((semantics == XNUSemantics) && "Unknown locking semantics"); + assert((Semantics == XNUSemantics) && "Unknown locking semantics"); lockSucc = state; } @@ -459,18 +457,18 @@ void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, void PthreadLockChecker::ReleaseAnyLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - ReleaseLockAux(Call, C, 0, Call.getArgSVal(0), checkKind); + CheckerKind CheckKind) const { + ReleaseLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), CheckKind); } void PthreadLockChecker::ReleaseLockAux(const CallEvent &Call, - CheckerContext &C, unsigned ArgNo, - SVal lock, - CheckerKind checkKind) const { - if (!ChecksEnabled[checkKind]) + CheckerContext &C, const Expr *MtxExpr, + SVal MtxVal, + CheckerKind CheckKind) const { + if (!ChecksEnabled[CheckKind]) return; - const MemRegion *lockR = lock.getAsRegion(); + const MemRegion *lockR = MtxVal.getAsRegion(); if (!lockR) return; @@ -481,18 +479,12 @@ void PthreadLockChecker::ReleaseLockAux(const CallEvent &Call, if (const LockState *LState = state->get(lockR)) { if (LState->isUnlocked()) { - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto Report = std::make_unique( - *BT_doubleunlock[checkKind], "This lock has already been unlocked", - N); - Report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(Report)); + reportBug(C, BT_doubleunlock, MtxExpr, CheckKind, + "This lock has already been unlocked"); return; } else if (LState->isDestroyed()) { - reportUseDestroyedBug(Call, C, ArgNo, checkKind); + reportBug(C, BT_destroylock, MtxExpr, CheckKind, + "This lock has already been destroyed"); return; } } @@ -502,17 +494,9 @@ void PthreadLockChecker::ReleaseLockAux(const CallEvent &Call, if (!LS.isEmpty()) { const MemRegion *firstLockR = LS.getHead(); if (firstLockR != lockR) { - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto report = std::make_unique( - *BT_lor[checkKind], - "This was not the most recently acquired lock. Possible " - "lock order reversal", - N); - report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(report)); + reportBug(C, BT_lor, MtxExpr, CheckKind, + "This was not the most recently acquired lock. Possible lock " + "order reversal"); return; } // Record that the lock was released. @@ -525,25 +509,27 @@ void PthreadLockChecker::ReleaseLockAux(const CallEvent &Call, void PthreadLockChecker::DestroyPthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - DestroyLockAux(Call, C, 0, Call.getArgSVal(0), PthreadSemantics, checkKind); + CheckerKind CheckKind) const { + DestroyLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), + PthreadSemantics, CheckKind); } void PthreadLockChecker::DestroyXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - DestroyLockAux(Call, C, 0, Call.getArgSVal(0), XNUSemantics, checkKind); + CheckerKind CheckKind) const { + DestroyLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), XNUSemantics, + CheckKind); } void PthreadLockChecker::DestroyLockAux(const CallEvent &Call, - CheckerContext &C, unsigned ArgNo, - SVal Lock, - enum LockingSemantics semantics, - CheckerKind checkKind) const { - if (!ChecksEnabled[checkKind]) + CheckerContext &C, const Expr *MtxExpr, + SVal MtxVal, + enum LockingSemantics Semantics, + CheckerKind CheckKind) const { + if (!ChecksEnabled[CheckKind]) return; - const MemRegion *LockR = Lock.getAsRegion(); + const MemRegion *LockR = MtxVal.getAsRegion(); if (!LockR) return; @@ -556,7 +542,7 @@ void PthreadLockChecker::DestroyLockAux(const CallEvent &Call, const LockState *LState = State->get(LockR); // Checking the return value of the destroy method only in the case of // PthreadSemantics - if (semantics == PthreadSemantics) { + if (Semantics == PthreadSemantics) { if (!LState || LState->isUnlocked()) { SymbolRef sym = Call.getReturnValue().getAsSymbol(); if (!sym) { @@ -581,36 +567,26 @@ void PthreadLockChecker::DestroyLockAux(const CallEvent &Call, return; } } - StringRef Message; - if (LState->isLocked()) { - Message = "This lock is still locked"; - } else { - Message = "This lock has already been destroyed"; - } + StringRef Message = LState->isLocked() + ? "This lock is still locked" + : "This lock has already been destroyed"; - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto Report = std::make_unique( - *BT_destroylock[checkKind], Message, N); - Report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(Report)); + reportBug(C, BT_destroylock, MtxExpr, CheckKind, Message); } void PthreadLockChecker::InitAnyLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - InitLockAux(Call, C, 0, Call.getArgSVal(0), checkKind); + CheckerKind CheckKind) const { + InitLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), CheckKind); } void PthreadLockChecker::InitLockAux(const CallEvent &Call, CheckerContext &C, - unsigned ArgNo, SVal Lock, - CheckerKind checkKind) const { - if (!ChecksEnabled[checkKind]) + const Expr *MtxExpr, SVal MtxVal, + CheckerKind CheckKind) const { + if (!ChecksEnabled[CheckKind]) return; - const MemRegion *LockR = Lock.getAsRegion(); + const MemRegion *LockR = MtxVal.getAsRegion(); if (!LockR) return; @@ -627,35 +603,24 @@ void PthreadLockChecker::InitLockAux(const CallEvent &Call, CheckerContext &C, return; } - StringRef Message; - - if (LState->isLocked()) { - Message = "This lock is still being held"; - } else { - Message = "This lock has already been initialized"; - } + StringRef Message = LState->isLocked() + ? "This lock is still being held" + : "This lock has already been initialized"; - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto Report = std::make_unique( - *BT_initlock[checkKind], Message, N); - Report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(Report)); + reportBug(C, BT_initlock, MtxExpr, CheckKind, Message); } -void PthreadLockChecker::reportUseDestroyedBug(const CallEvent &Call, - CheckerContext &C, - unsigned ArgNo, - CheckerKind checkKind) const { +void PthreadLockChecker::reportBug(CheckerContext &C, + std::unique_ptr BT[], + const Expr *MtxExpr, CheckerKind CheckKind, + StringRef Desc) const { ExplodedNode *N = C.generateErrorNode(); if (!N) return; - initBugType(checkKind); - auto Report = std::make_unique( - *BT_destroylock[checkKind], "This lock has already been destroyed", N); - Report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); + initBugType(CheckKind); + auto Report = + std::make_unique(*BT[CheckKind], Desc, N); + Report->addRange(MtxExpr->getSourceRange()); C.emitReport(std::move(Report)); } diff --git a/clang/lib/Tooling/ArgumentsAdjusters.cpp b/clang/lib/Tooling/ArgumentsAdjusters.cpp index a857b57fbf7bca..bcfb5b39a07706 100644 --- a/clang/lib/Tooling/ArgumentsAdjusters.cpp +++ b/clang/lib/Tooling/ArgumentsAdjusters.cpp @@ -21,6 +21,16 @@ namespace clang { namespace tooling { +static StringRef getDriverMode(const CommandLineArguments &Args) { + for (const auto &Arg : Args) { + StringRef ArgRef = Arg; + if (ArgRef.consume_front("--driver-mode=")) { + return ArgRef; + } + } + return StringRef(); +} + /// Add -fsyntax-only option and drop options that triggers output generation. ArgumentsAdjuster getClangSyntaxOnlyAdjuster() { return [](const CommandLineArguments &Args, StringRef /*unused*/) { @@ -93,20 +103,28 @@ ArgumentsAdjuster getClangStripSerializeDiagnosticAdjuster() { ArgumentsAdjuster getClangStripDependencyFileAdjuster() { return [](const CommandLineArguments &Args, StringRef /*unused*/) { + auto UsingClDriver = (getDriverMode(Args) == "cl"); + CommandLineArguments AdjustedArgs; for (size_t i = 0, e = Args.size(); i < e; ++i) { StringRef Arg = Args[i]; - // All dependency-file options begin with -M. These include -MM, - // -MF, -MG, -MP, -MT, -MQ, -MD, and -MMD. - if (!Arg.startswith("-M") && !Arg.startswith("/showIncludes") && - !Arg.startswith("-showIncludes")) { - AdjustedArgs.push_back(Args[i]); + + // These flags take an argument: -MX foo. Skip the next argument also. + if (!UsingClDriver && (Arg == "-MF" || Arg == "-MT" || Arg == "-MQ")) { + ++i; continue; } + // When not using the cl driver mode, dependency file generation options + // begin with -M. These include -MM, -MF, -MG, -MP, -MT, -MQ, -MD, and + // -MMD. + if (!UsingClDriver && Arg.startswith("-M")) + continue; + // Under MSVC's cl driver mode, dependency file generation is controlled + // using /showIncludes + if (Arg.startswith("/showIncludes") || Arg.startswith("-showIncludes")) + continue; - if (Arg == "-MF" || Arg == "-MT" || Arg == "-MQ") - // These flags take an argument: -MX foo. Skip the next argument also. - ++i; + AdjustedArgs.push_back(Args[i]); } return AdjustedArgs; }; diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index a9f326439a2a57..1942290b5abc58 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -13,6 +13,8 @@ #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/IgnoreExpr.h" +#include "clang/AST/OperationKinds.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/TypeLoc.h" @@ -44,8 +46,44 @@ using namespace clang; +// Ignores the implicit `CXXConstructExpr` for copy/move constructor calls +// generated by the compiler, as well as in implicit conversions like the one +// wrapping `1` in `X x = 1;`. +static Expr *IgnoreImplicitConstructorSingleStep(Expr *E) { + if (auto *C = dyn_cast(E)) { + auto NumArgs = C->getNumArgs(); + if (NumArgs == 1 || (NumArgs > 1 && isa(C->getArg(1)))) { + Expr *A = C->getArg(0); + if (C->getParenOrBraceRange().isInvalid()) + return A; + } + } + return E; +} + +// In: +// struct X { +// X(int) +// }; +// X x = X(1); +// Ignores the implicit `CXXFunctionalCastExpr` that wraps +// `CXXConstructExpr X(1)`. +static Expr *IgnoreCXXFunctionalCastExprWrappingConstructor(Expr *E) { + if (auto *F = dyn_cast(E)) { + if (F->getCastKind() == CK_ConstructorConversion) + return F->getSubExpr(); + } + return E; +} + +static Expr *IgnoreImplicit(Expr *E) { + return IgnoreExprNodes(E, IgnoreImplicitSingleStep, + IgnoreImplicitConstructorSingleStep, + IgnoreCXXFunctionalCastExprWrappingConstructor); +} + LLVM_ATTRIBUTE_UNUSED -static bool isImplicitExpr(Expr *E) { return E->IgnoreImplicit() != E; } +static bool isImplicitExpr(Expr *E) { return IgnoreImplicit(E) != E; } namespace { /// Get start location of the Declarator from the TypeLoc. @@ -116,6 +154,13 @@ struct GetStartLoc : TypeLocVisitor { }; } // namespace +static CallExpr::arg_range dropDefaultArgs(CallExpr::arg_range Args) { + auto firstDefaultArg = std::find_if(Args.begin(), Args.end(), [](auto it) { + return isa(it); + }); + return llvm::make_range(Args.begin(), firstDefaultArg); +} + static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { switch (E.getOperator()) { // Comparison @@ -740,7 +785,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { for (auto *D : DS->decls()) Builder.noticeDeclWithoutSemicolon(D); } else if (auto *E = dyn_cast_or_null(S)) { - return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit()); + return RecursiveASTVisitor::TraverseStmt(IgnoreImplicit(E)); } return RecursiveASTVisitor::TraverseStmt(S); } @@ -1073,7 +1118,11 @@ class BuildTreeVisitor : public RecursiveASTVisitor { return true; } - syntax::CallArguments *buildCallArguments(CallExpr::arg_range Args) { + /// Builds `CallArguments` syntax node from arguments that appear in source + /// code, i.e. not default arguments. + syntax::CallArguments * + buildCallArguments(CallExpr::arg_range ArgsAndDefaultArgs) { + auto Args = dropDefaultArgs(ArgsAndDefaultArgs); for (const auto &Arg : Args) { Builder.markExprChild(Arg, syntax::NodeRole::ListElement); const auto *DelimiterToken = @@ -1111,6 +1160,14 @@ class BuildTreeVisitor : public RecursiveASTVisitor { return true; } + bool WalkUpFromCXXConstructExpr(CXXConstructExpr *S) { + // Ignore the implicit calls to default constructors. + if ((S->getNumArgs() == 0 || isa(S->getArg(0))) && + S->getParenOrBraceRange().isInvalid()) + return true; + return RecursiveASTVisitor::WalkUpFromCXXConstructExpr(S); + } + bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { // To construct a syntax tree of the same shape for calls to built-in and // user-defined operators, ignore the `DeclRefExpr` that refers to the @@ -1187,6 +1244,8 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } } + bool WalkUpFromCXXDefaultArgExpr(CXXDefaultArgExpr *S) { return true; } + bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { auto Tokens = Builder.getDeclarationRange(S); if (Tokens.front().kind() == tok::coloncolon) { @@ -1579,7 +1638,7 @@ void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { if (!Child) return; - Child = Child->IgnoreImplicit(); + Child = IgnoreImplicit(Child); syntax::Tree *ChildNode = Mapping.find(Child); assert(ChildNode != nullptr); diff --git a/clang/test/Analysis/analyzer-enabled-checkers.c b/clang/test/Analysis/analyzer-enabled-checkers.c index 7c00e78c16acd8..bef786a1a59b6d 100644 --- a/clang/test/Analysis/analyzer-enabled-checkers.c +++ b/clang/test/Analysis/analyzer-enabled-checkers.c @@ -6,11 +6,11 @@ // CHECK: OVERVIEW: Clang Static Analyzer Enabled Checkers List // CHECK-EMPTY: -// CHECK-NEXT: core.CallAndMessageModeling // CHECK-NEXT: apiModeling.StdCLibraryFunctions // CHECK-NEXT: apiModeling.TrustNonnull // CHECK-NEXT: apiModeling.llvm.CastValue // CHECK-NEXT: apiModeling.llvm.ReturnValue +// CHECK-NEXT: core.CallAndMessageModeling // CHECK-NEXT: core.CallAndMessage // CHECK-NEXT: core.DivideZero // CHECK-NEXT: core.DynamicTypePropagation diff --git a/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c new file mode 100644 index 00000000000000..9ad1be05385172 --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c @@ -0,0 +1,66 @@ +// Here we test the order of the Checkers when StdCLibraryFunctionArgs is +// enabled. + +// RUN: %clang --analyze %s --target=x86_64-pc-linux-gnu \ +// RUN: -Xclang -analyzer-checker=core \ +// RUN: -Xclang -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -Xclang -analyzer-config \ +// RUN: -Xclang apiModeling.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -Xclang -analyzer-checker=alpha.unix.StdCLibraryFunctionArgs \ +// RUN: -Xclang -analyzer-checker=alpha.unix.Stream \ +// RUN: -Xclang -analyzer-list-enabled-checkers \ +// RUN: -Xclang -analyzer-display-progress \ +// RUN: 2>&1 | FileCheck %s --implicit-check-not=ANALYZE \ +// RUN: --implicit-check-not=\. + +// CHECK: OVERVIEW: Clang Static Analyzer Enabled Checkers List +// CHECK-EMPTY: +// CHECK-NEXT: core.CallAndMessageModeling +// CHECK-NEXT: core.CallAndMessage +// CHECK-NEXT: core.NonNullParamChecker +// CHECK-NEXT: alpha.unix.Stream +// CHECK-NEXT: apiModeling.StdCLibraryFunctions +// CHECK-NEXT: alpha.unix.StdCLibraryFunctionArgs +// CHECK-NEXT: apiModeling.TrustNonnull +// CHECK-NEXT: apiModeling.llvm.CastValue +// CHECK-NEXT: apiModeling.llvm.ReturnValue +// CHECK-NEXT: core.DivideZero +// CHECK-NEXT: core.DynamicTypePropagation +// CHECK-NEXT: core.NonnilStringConstants +// CHECK-NEXT: core.NullDereference +// CHECK-NEXT: core.StackAddrEscapeBase +// CHECK-NEXT: core.StackAddressEscape +// CHECK-NEXT: core.UndefinedBinaryOperatorResult +// CHECK-NEXT: core.VLASize +// CHECK-NEXT: core.builtin.BuiltinFunctions +// CHECK-NEXT: core.builtin.NoReturnFunctions +// CHECK-NEXT: core.uninitialized.ArraySubscript +// CHECK-NEXT: core.uninitialized.Assign +// CHECK-NEXT: core.uninitialized.Branch +// CHECK-NEXT: core.uninitialized.CapturedBlockVariable +// CHECK-NEXT: core.uninitialized.UndefReturn +// CHECK-NEXT: deadcode.DeadStores +// CHECK-NEXT: nullability.NullabilityBase +// CHECK-NEXT: nullability.NullPassedToNonnull +// CHECK-NEXT: nullability.NullReturnedFromNonnull +// CHECK-NEXT: security.insecureAPI.SecuritySyntaxChecker +// CHECK-NEXT: security.insecureAPI.UncheckedReturn +// CHECK-NEXT: security.insecureAPI.getpw +// CHECK-NEXT: security.insecureAPI.gets +// CHECK-NEXT: security.insecureAPI.mkstemp +// CHECK-NEXT: security.insecureAPI.mktemp +// CHECK-NEXT: security.insecureAPI.vfork +// CHECK-NEXT: unix.API +// CHECK-NEXT: unix.cstring.CStringModeling +// CHECK-NEXT: unix.DynamicMemoryModeling +// CHECK-NEXT: unix.Malloc +// CHECK-NEXT: unix.MallocSizeof +// CHECK-NEXT: unix.MismatchedDeallocator +// CHECK-NEXT: unix.Vfork +// CHECK-NEXT: unix.cstring.BadSizeArg +// CHECK-NEXT: unix.cstring.NullArg + +int main() { + int i; + (void)(10 / i); +} diff --git a/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c b/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c new file mode 100644 index 00000000000000..0ad3c277dfd7de --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c @@ -0,0 +1,64 @@ +// Check that the more specific checkers report and not the generic +// StdCLibraryFunctionArgs checker. + +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctionArgs \ +// RUN: -analyzer-checker=alpha.unix.Stream \ +// RUN: -triple x86_64-unknown-linux-gnu \ +// RUN: -verify + + +// Make sure that all used functions have their summary loaded. + +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctionArgs \ +// RUN: -analyzer-checker=alpha.unix.Stream \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -triple x86_64-unknown-linux 2>&1 | FileCheck %s + +// CHECK: Loaded summary for: int isalnum(int) +// CHECK: Loaded summary for: unsigned long fread(void *restrict, size_t, size_t, FILE *restrict) __attribute__((nonnull(1))) +// CHECK: Loaded summary for: int fileno(FILE *stream) + +void initializeSummaryMap(); +// We analyze this function first, and the call expression inside initializes +// the summary map. This way we force the loading of the summaries. The +// summaries would not be loaded without this because during the first bug +// report in WeakDependency::checkPreCall we stop further evaluation. And +// StdLibraryFunctionsChecker lazily initializes its summary map from its +// checkPreCall. +void analyzeThisFirst() { + initializeSummaryMap(); +} + +typedef __typeof(sizeof(int)) size_t; +struct FILE; +typedef struct FILE FILE; + +int isalnum(int); +size_t fread(void *restrict, size_t, size_t, FILE *restrict) __attribute__((nonnull(1))); +int fileno(FILE *stream); + +void test_uninit_arg() { + int v; + int r = isalnum(v); // \ + // expected-warning{{1st function call argument is an uninitialized value [core.CallAndMessage]}} + (void)r; +} + +void test_notnull_arg(FILE *F) { + int *p = 0; + fread(p, sizeof(int), 5, F); // \ + expected-warning{{Null pointer passed to 1st parameter expecting 'nonnull' [core.NonNullParamChecker]}} +} + +void test_notnull_stream_arg() { + fileno(0); // \ + // expected-warning{{Stream pointer might be NULL [alpha.unix.Stream]}} +} diff --git a/clang/test/CodeGen/x86-64-inline-asm.c b/clang/test/CodeGen/X86/x86-64-inline-asm.c similarity index 100% rename from clang/test/CodeGen/x86-64-inline-asm.c rename to clang/test/CodeGen/X86/x86-64-inline-asm.c diff --git a/clang/test/CodeGen/x86-GCC-inline-asm-Y-constraints.c b/clang/test/CodeGen/X86/x86-GCC-inline-asm-Y-constraints.c similarity index 100% rename from clang/test/CodeGen/x86-GCC-inline-asm-Y-constraints.c rename to clang/test/CodeGen/X86/x86-GCC-inline-asm-Y-constraints.c diff --git a/clang/test/CodeGen/x86-atomic-long_double.c b/clang/test/CodeGen/X86/x86-atomic-long_double.c similarity index 100% rename from clang/test/CodeGen/x86-atomic-long_double.c rename to clang/test/CodeGen/X86/x86-atomic-long_double.c diff --git a/clang/test/CodeGen/x86-bswap.c b/clang/test/CodeGen/X86/x86-bswap.c similarity index 100% rename from clang/test/CodeGen/x86-bswap.c rename to clang/test/CodeGen/X86/x86-bswap.c diff --git a/clang/test/CodeGen/x86-builtins-vector-width.c b/clang/test/CodeGen/X86/x86-builtins-vector-width.c similarity index 100% rename from clang/test/CodeGen/x86-builtins-vector-width.c rename to clang/test/CodeGen/X86/x86-builtins-vector-width.c diff --git a/clang/test/CodeGen/x86-builtins.c b/clang/test/CodeGen/X86/x86-builtins.c similarity index 100% rename from clang/test/CodeGen/x86-builtins.c rename to clang/test/CodeGen/X86/x86-builtins.c diff --git a/clang/test/CodeGen/x86-cf-protection.c b/clang/test/CodeGen/X86/x86-cf-protection.c similarity index 100% rename from clang/test/CodeGen/x86-cf-protection.c rename to clang/test/CodeGen/X86/x86-cf-protection.c diff --git a/clang/test/CodeGen/x86-crc-builtins.c b/clang/test/CodeGen/X86/x86-crc-builtins.c similarity index 100% rename from clang/test/CodeGen/x86-crc-builtins.c rename to clang/test/CodeGen/X86/x86-crc-builtins.c diff --git a/clang/test/CodeGen/x86-enqcmd-builtins.c b/clang/test/CodeGen/X86/x86-enqcmd-builtins.c similarity index 100% rename from clang/test/CodeGen/x86-enqcmd-builtins.c rename to clang/test/CodeGen/X86/x86-enqcmd-builtins.c diff --git a/clang/test/CodeGen/x86-inline-asm-min-vector-width.c b/clang/test/CodeGen/X86/x86-inline-asm-min-vector-width.c similarity index 100% rename from clang/test/CodeGen/x86-inline-asm-min-vector-width.c rename to clang/test/CodeGen/X86/x86-inline-asm-min-vector-width.c diff --git a/clang/test/CodeGen/x86-inline-asm-v-constraint.c b/clang/test/CodeGen/X86/x86-inline-asm-v-constraint.c similarity index 100% rename from clang/test/CodeGen/x86-inline-asm-v-constraint.c rename to clang/test/CodeGen/X86/x86-inline-asm-v-constraint.c diff --git a/clang/test/CodeGen/x86-long-double.cpp b/clang/test/CodeGen/X86/x86-long-double.cpp similarity index 100% rename from clang/test/CodeGen/x86-long-double.cpp rename to clang/test/CodeGen/X86/x86-long-double.cpp diff --git a/clang/test/CodeGen/x86-nontemporal.c b/clang/test/CodeGen/X86/x86-nontemporal.c similarity index 100% rename from clang/test/CodeGen/x86-nontemporal.c rename to clang/test/CodeGen/X86/x86-nontemporal.c diff --git a/clang/test/CodeGen/x86-serialize-intrin.c b/clang/test/CodeGen/X86/x86-serialize-intrin.c similarity index 100% rename from clang/test/CodeGen/x86-serialize-intrin.c rename to clang/test/CodeGen/X86/x86-serialize-intrin.c diff --git a/clang/test/CodeGen/x86-soft-float.c b/clang/test/CodeGen/X86/x86-soft-float.c similarity index 100% rename from clang/test/CodeGen/x86-soft-float.c rename to clang/test/CodeGen/X86/x86-soft-float.c diff --git a/clang/test/CodeGen/x86-tsxldtrk-builtins.c b/clang/test/CodeGen/X86/x86-tsxldtrk-builtins.c similarity index 100% rename from clang/test/CodeGen/x86-tsxldtrk-builtins.c rename to clang/test/CodeGen/X86/x86-tsxldtrk-builtins.c diff --git a/clang/test/CodeGen/x86-vec-i128.c b/clang/test/CodeGen/X86/x86-vec-i128.c similarity index 100% rename from clang/test/CodeGen/x86-vec-i128.c rename to clang/test/CodeGen/X86/x86-vec-i128.c diff --git a/clang/test/CodeGen/x86-vec-struct-packing.c b/clang/test/CodeGen/X86/x86-vec-struct-packing.c similarity index 100% rename from clang/test/CodeGen/x86-vec-struct-packing.c rename to clang/test/CodeGen/X86/x86-vec-struct-packing.c diff --git a/clang/test/CodeGen/x86-vector-width.c b/clang/test/CodeGen/X86/x86-vector-width.c similarity index 100% rename from clang/test/CodeGen/x86-vector-width.c rename to clang/test/CodeGen/X86/x86-vector-width.c diff --git a/clang/test/CodeGen/x86.c b/clang/test/CodeGen/X86/x86.c similarity index 100% rename from clang/test/CodeGen/x86.c rename to clang/test/CodeGen/X86/x86.c diff --git a/clang/test/CodeGen/x86_32-arguments-darwin.c b/clang/test/CodeGen/X86/x86_32-arguments-darwin.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-darwin.c rename to clang/test/CodeGen/X86/x86_32-arguments-darwin.c diff --git a/clang/test/CodeGen/x86_32-arguments-iamcu.c b/clang/test/CodeGen/X86/x86_32-arguments-iamcu.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-iamcu.c rename to clang/test/CodeGen/X86/x86_32-arguments-iamcu.c diff --git a/clang/test/CodeGen/x86_32-arguments-linux.c b/clang/test/CodeGen/X86/x86_32-arguments-linux.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-linux.c rename to clang/test/CodeGen/X86/x86_32-arguments-linux.c diff --git a/clang/test/CodeGen/x86_32-arguments-nommx.c b/clang/test/CodeGen/X86/x86_32-arguments-nommx.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-nommx.c rename to clang/test/CodeGen/X86/x86_32-arguments-nommx.c diff --git a/clang/test/CodeGen/x86_32-arguments-realign.c b/clang/test/CodeGen/X86/x86_32-arguments-realign.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-realign.c rename to clang/test/CodeGen/X86/x86_32-arguments-realign.c diff --git a/clang/test/CodeGen/x86_32-arguments-win32.c b/clang/test/CodeGen/X86/x86_32-arguments-win32.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-win32.c rename to clang/test/CodeGen/X86/x86_32-arguments-win32.c diff --git a/clang/test/CodeGen/x86_32-fpcc-struct-return.c b/clang/test/CodeGen/X86/x86_32-fpcc-struct-return.c similarity index 100% rename from clang/test/CodeGen/x86_32-fpcc-struct-return.c rename to clang/test/CodeGen/X86/x86_32-fpcc-struct-return.c diff --git a/clang/test/CodeGen/x86_32-inline-asm.c b/clang/test/CodeGen/X86/x86_32-inline-asm.c similarity index 100% rename from clang/test/CodeGen/x86_32-inline-asm.c rename to clang/test/CodeGen/X86/x86_32-inline-asm.c diff --git a/clang/test/CodeGen/x86_32-xsave.c b/clang/test/CodeGen/X86/x86_32-xsave.c similarity index 100% rename from clang/test/CodeGen/x86_32-xsave.c rename to clang/test/CodeGen/X86/x86_32-xsave.c diff --git a/clang/test/CodeGen/x86_64-PR42672.c b/clang/test/CodeGen/X86/x86_64-PR42672.c similarity index 100% rename from clang/test/CodeGen/x86_64-PR42672.c rename to clang/test/CodeGen/X86/x86_64-PR42672.c diff --git a/clang/test/CodeGen/x86_64-arguments-darwin.c b/clang/test/CodeGen/X86/x86_64-arguments-darwin.c similarity index 100% rename from clang/test/CodeGen/x86_64-arguments-darwin.c rename to clang/test/CodeGen/X86/x86_64-arguments-darwin.c diff --git a/clang/test/CodeGen/x86_64-arguments-nacl.c b/clang/test/CodeGen/X86/x86_64-arguments-nacl.c similarity index 100% rename from clang/test/CodeGen/x86_64-arguments-nacl.c rename to clang/test/CodeGen/X86/x86_64-arguments-nacl.c diff --git a/clang/test/CodeGen/x86_64-arguments-win32.c b/clang/test/CodeGen/X86/x86_64-arguments-win32.c similarity index 100% rename from clang/test/CodeGen/x86_64-arguments-win32.c rename to clang/test/CodeGen/X86/x86_64-arguments-win32.c diff --git a/clang/test/CodeGen/x86_64-arguments.c b/clang/test/CodeGen/X86/x86_64-arguments.c similarity index 100% rename from clang/test/CodeGen/x86_64-arguments.c rename to clang/test/CodeGen/X86/x86_64-arguments.c diff --git a/clang/test/CodeGen/x86_64-atomic-128.c b/clang/test/CodeGen/X86/x86_64-atomic-128.c similarity index 100% rename from clang/test/CodeGen/x86_64-atomic-128.c rename to clang/test/CodeGen/X86/x86_64-atomic-128.c diff --git a/clang/test/CodeGen/x86_64-floatvectors.c b/clang/test/CodeGen/X86/x86_64-floatvectors.c similarity index 100% rename from clang/test/CodeGen/x86_64-floatvectors.c rename to clang/test/CodeGen/X86/x86_64-floatvectors.c diff --git a/clang/test/CodeGen/x86_64-instrument-functions.c b/clang/test/CodeGen/X86/x86_64-instrument-functions.c similarity index 100% rename from clang/test/CodeGen/x86_64-instrument-functions.c rename to clang/test/CodeGen/X86/x86_64-instrument-functions.c diff --git a/clang/test/CodeGen/x86_64-longdouble.c b/clang/test/CodeGen/X86/x86_64-longdouble.c similarity index 100% rename from clang/test/CodeGen/x86_64-longdouble.c rename to clang/test/CodeGen/X86/x86_64-longdouble.c diff --git a/clang/test/CodeGen/x86_64-mno-sse.c b/clang/test/CodeGen/X86/x86_64-mno-sse.c similarity index 100% rename from clang/test/CodeGen/x86_64-mno-sse.c rename to clang/test/CodeGen/X86/x86_64-mno-sse.c diff --git a/clang/test/CodeGen/x86_64-mno-sse2.c b/clang/test/CodeGen/X86/x86_64-mno-sse2.c similarity index 100% rename from clang/test/CodeGen/x86_64-mno-sse2.c rename to clang/test/CodeGen/X86/x86_64-mno-sse2.c diff --git a/clang/test/CodeGen/x86_64-profiling-keep-fp.c b/clang/test/CodeGen/X86/x86_64-profiling-keep-fp.c similarity index 100% rename from clang/test/CodeGen/x86_64-profiling-keep-fp.c rename to clang/test/CodeGen/X86/x86_64-profiling-keep-fp.c diff --git a/clang/test/CodeGen/x86_64-xsave.c b/clang/test/CodeGen/X86/x86_64-xsave.c similarity index 100% rename from clang/test/CodeGen/x86_64-xsave.c rename to clang/test/CodeGen/X86/x86_64-xsave.c diff --git a/clang/test/CodeGen/x86_inlineasm_curly_bracket_escape.c b/clang/test/CodeGen/X86/x86_inlineasm_curly_bracket_escape.c similarity index 100% rename from clang/test/CodeGen/x86_inlineasm_curly_bracket_escape.c rename to clang/test/CodeGen/X86/x86_inlineasm_curly_bracket_escape.c diff --git a/clang/test/Driver/darwin-infer-simulator-sdkroot.c b/clang/test/Driver/darwin-infer-simulator-sdkroot.c index a084bf6346b621..7d4d4070b81a11 100644 --- a/clang/test/Driver/darwin-infer-simulator-sdkroot.c +++ b/clang/test/Driver/darwin-infer-simulator-sdkroot.c @@ -17,7 +17,7 @@ // // RUN: rm -rf %t/SDKs/iPhoneSimulator8.0.sdk // RUN: mkdir -p %t/SDKs/iPhoneSimulator8.0.sdk -// RUN: env SDKROOT=%t/SDKs/iPhoneSimulator8.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ +// RUN: env SDKROOT=%t/SDKs/iPhoneSimulator8.0.sdk %clang -arch x86_64 %s -mlinker-version=400 -### 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-SIMULATOR %s // // CHECK-SIMULATOR: clang @@ -27,6 +27,18 @@ // CHECK-SIMULATOR: "-ios_simulator_version_min" "8.0.0" // // +// RUN: rm -rf %t/SDKs/iPhoneSimulator14.0.sdk +// RUN: mkdir -p %t/SDKs/iPhoneSimulator14.0.sdk +// RUN: env SDKROOT=%t/SDKs/iPhoneSimulator14.0.sdk %clang -arch arm64 %s -mlinker-version=400 -### 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-SIMULATOR-ARM64 %s +// +// CHECK-SIMULATOR-ARM64: clang +// CHECK-SIMULATOR-ARM64: "-cc1" +// CHECK-SIMULATOR-ARM64: -apple-ios14.0.0-simulator" +// CHECK-SIMULATOR-ARM64: ld +// CHECK-SIMULATOR-ARM64: "-ios_simulator_version_min" "14.0.0" +// +// // RUN: rm -rf %t/SDKs/WatchOS3.0.sdk // RUN: mkdir -p %t/SDKs/WatchOS3.0.sdk // RUN: env SDKROOT=%t/SDKs/WatchOS3.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ @@ -43,7 +55,7 @@ // // RUN: rm -rf %t/SDKs/WatchSimulator3.0.sdk // RUN: mkdir -p %t/SDKs/WatchSimulator3.0.sdk -// RUN: env SDKROOT=%t/SDKs/WatchSimulator3.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ +// RUN: env SDKROOT=%t/SDKs/WatchSimulator3.0.sdk %clang -arch x86_64 %s -mlinker-version=400 -### 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-WATCH-SIMULATOR %s // // CHECK-WATCH-SIMULATOR: clang @@ -53,6 +65,18 @@ // CHECK-WATCH-SIMULATOR: "-watchos_simulator_version_min" "3.0.0" // // +// RUN: rm -rf %t/SDKs/WatchSimulator7.0.sdk +// RUN: mkdir -p %t/SDKs/WatchSimulator7.0.sdk +// RUN: env SDKROOT=%t/SDKs/WatchSimulator7.0.sdk %clang -arch arm64 %s -mlinker-version=400 -### 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-WATCH-SIMULATOR-ARM64 %s +// +// CHECK-WATCH-SIMULATOR-ARM64: clang +// CHECK-WATCH-SIMULATOR-ARM64: "-cc1" +// CHECK-WATCH-SIMULATOR-ARM64: -apple-watchos7.0.0-simulator" +// CHECK-WATCH-SIMULATOR-ARM64: ld +// CHECK-WATCH-SIMULATOR-ARM64: "-watchos_simulator_version_min" "7.0.0" +// +// // RUN: rm -rf %t/SDKs/AppleTVOS10.0.sdk // RUN: mkdir -p %t/SDKs/AppleTVOS10.0.sdk // RUN: env SDKROOT=%t/SDKs/AppleTVOS10.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ @@ -67,7 +91,7 @@ // // RUN: rm -rf %t/SDKs/AppleTVSimulator10.0.sdk // RUN: mkdir -p %t/SDKs/AppleTVSimulator10.0.sdk -// RUN: env SDKROOT=%t/SDKs/AppleTVSimulator10.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ +// RUN: env SDKROOT=%t/SDKs/AppleTVSimulator10.0.sdk %clang -arch x86_64 %s -mlinker-version=400 -### 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-TV-SIMULATOR %s // // CHECK-TV-SIMULATOR: clang @@ -75,3 +99,16 @@ // CHECK-TV-SIMULATOR: -apple-tvos10.0.0-simulator" // CHECK-TV-SIMULATOR: ld // CHECK-TV-SIMULATOR: "-tvos_simulator_version_min" "10.0.0" +// +// +// RUN: rm -rf %t/SDKs/AppleTVSimulator14.0.sdk +// RUN: mkdir -p %t/SDKs/AppleTVSimulator14.0.sdk +// RUN: env SDKROOT=%t/SDKs/AppleTVSimulator14.0.sdk %clang -arch arm64 %s -mlinker-version=400 -### 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-TV-SIMULATOR-ARM64 %s +// +// CHECK-TV-SIMULATOR-ARM64: clang +// CHECK-TV-SIMULATOR-ARM64: "-cc1" +// CHECK-TV-SIMULATOR-ARM64: -apple-tvos14.0.0-simulator" +// CHECK-TV-SIMULATOR-ARM64: ld +// CHECK-TV-SIMULATOR-ARM64: "-tvos_simulator_version_min" "14.0.0" + diff --git a/clang/test/Driver/hip-offload-arch.hip b/clang/test/Driver/hip-offload-arch.hip new file mode 100644 index 00000000000000..4cd37b5815f737 --- /dev/null +++ b/clang/test/Driver/hip-offload-arch.hip @@ -0,0 +1,10 @@ +// REQUIRES: clang-driver, x86-registered-target, amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --offload-arch=gfx1030 \ +// RUN: --offload-arch=gfx1031 \ +// RUN: -nogpuinc -nogpulib \ +// RUN: %s 2>&1 | FileCheck %s + +// CHECK: {{"[^"]*clang[^"]*".* "-target-cpu" "gfx1030"}} +// CHECK: {{"[^"]*clang[^"]*".* "-target-cpu" "gfx1031"}} diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 5326596fee93c5..3c369ace32d51f 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -2525,6 +2525,7 @@ // CHECK_AMDFAM10_M32: #define __SSE4A__ 1 // CHECK_AMDFAM10_M32: #define __SSE_MATH__ 1 // CHECK_AMDFAM10_M32: #define __SSE__ 1 +// CHECK_AMDFAM10_M32-NOT: #define __SSSE3__ 1 // CHECK_AMDFAM10_M32: #define __amdfam10 1 // CHECK_AMDFAM10_M32: #define __amdfam10__ 1 // CHECK_AMDFAM10_M32: #define __i386 1 @@ -2547,6 +2548,7 @@ // CHECK_AMDFAM10_M64: #define __SSE4A__ 1 // CHECK_AMDFAM10_M64: #define __SSE_MATH__ 1 // CHECK_AMDFAM10_M64: #define __SSE__ 1 +// CHECK_AMDFAM10_M64-NOT: #define __SSSE3__ 1 // CHECK_AMDFAM10_M64: #define __amd64 1 // CHECK_AMDFAM10_M64: #define __amd64__ 1 // CHECK_AMDFAM10_M64: #define __amdfam10 1 diff --git a/clang/test/SemaCXX/fold_expr_expansion_limit.cpp b/clang/test/SemaCXX/fold_expr_expansion_limit.cpp new file mode 100644 index 00000000000000..600278da78287c --- /dev/null +++ b/clang/test/SemaCXX/fold_expr_expansion_limit.cpp @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -fsyntax-only -fbracket-depth 2 -verify -std=c++17 %s + +template struct seq { + constexpr bool zero() { return (true && ... && (V == 0)); }; // expected-error {{instantiating fold expression with 3 arguments exceeded expression nesting limit of 2}} \ + expected-note {{use -fbracket-depth}} +}; +constexpr unsigned N = 3; +auto x = __make_integer_seq{}; +static_assert(!x.zero(), ""); // expected-note {{in instantiation of member function}} diff --git a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp index d1520b1decbd32..91bd15def577dd 100644 --- a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp @@ -5036,8 +5036,7 @@ void spawn_fake_flight_control_thread(void) { } extern const char *deque_log_msg(void) __attribute__((requires_capability(Logger))); -void logger_entry(void) __attribute__((requires_capability(Logger))) - __attribute__((requires_capability(!FlightControl))) { +void logger_entry(void) __attribute__((requires_capability(Logger))) { const char *msg; while ((msg = deque_log_msg())) { @@ -5045,13 +5044,13 @@ void logger_entry(void) __attribute__((requires_capability(Logger))) } } -void spawn_fake_logger_thread(void) __attribute__((requires_capability(!FlightControl))) { +void spawn_fake_logger_thread(void) { acquire(Logger); logger_entry(); release(Logger); } -int main(void) __attribute__((requires_capability(!FlightControl))) { +int main(void) { spawn_fake_flight_control_thread(); spawn_fake_logger_thread(); diff --git a/clang/test/SemaCXX/warn-thread-safety-negative.cpp b/clang/test/SemaCXX/warn-thread-safety-negative.cpp index 68e30f4a3225bd..456fe16e6574e4 100644 --- a/clang/test/SemaCXX/warn-thread-safety-negative.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-negative.cpp @@ -81,35 +81,6 @@ class Foo { } // end namespace SimpleTest -Mutex globalMutex; - -namespace ScopeTest { - -void f() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex); -void fq() EXCLUSIVE_LOCKS_REQUIRED(!::globalMutex); - -namespace ns { - Mutex globalMutex; - void f() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex); - void fq() EXCLUSIVE_LOCKS_REQUIRED(!ns::globalMutex); -} - -void testGlobals() EXCLUSIVE_LOCKS_REQUIRED(!ns::globalMutex) { - f(); // expected-warning {{calling function 'f' requires negative capability '!globalMutex'}} - fq(); // expected-warning {{calling function 'fq' requires negative capability '!globalMutex'}} - ns::f(); - ns::fq(); -} - -void testNamespaceGlobals() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex) { - f(); - fq(); - ns::f(); // expected-warning {{calling function 'f' requires negative capability '!globalMutex'}} - ns::fq(); // expected-warning {{calling function 'fq' requires negative capability '!globalMutex'}} -} - -} // end namespace ScopeTest - namespace DoubleAttribute { struct Foo { diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index b198efa4af9ecd..98e002003159c9 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -7565,6 +7565,21 @@ TEST_F(FormatTest, UnderstandsTemplateParameters) { verifyFormat("static_assert(is_convertible::value, \"AAA\");"); verifyFormat("Constructor(A... a) : a_(X{std::forward(a)}...) {}"); verifyFormat("< < < < < < < < < < < < < < < < < < < < < < < < < < < < < <"); + verifyFormat("some_templated_type"); +} + +TEST_F(FormatTest, UnderstandsShiftOperators) { + verifyFormat("if (i < x >> 1)"); + verifyFormat("while (i < x >> 1)"); + verifyFormat("for (unsigned i = 0; i < i; ++i, v = v >> 1)"); + verifyFormat("for (unsigned i = 0; i < x >> 1; ++i, v = v >> 1)"); + verifyFormat( + "for (std::vector::iterator i = 0; i < x >> 1; ++i, v = v >> 1)"); + verifyFormat("Foo.call>()"); + verifyFormat("if (Foo.call>() == 0)"); + verifyFormat("for (std::vector>::iterator i = 0; i < x >> 1; " + "++i, v = v >> 1)"); + verifyFormat("if (w>, 1>::t)"); } TEST_F(FormatTest, BitshiftOperatorWidth) { diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp index aab20008a49748..225885437267be 100644 --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -548,9 +548,6 @@ namespace n { struct S { }; } void test() { - // FIXME: Remove the `UnknownExpression` wrapping `s1` and `s2`. This - // `UnknownExpression` comes from a leaf `CXXConstructExpr` in the - // ClangAST. We need to ignore leaf implicit nodes. [[::n::S s1]]; [[n::S s2]]; } @@ -564,8 +561,7 @@ SimpleDeclaration | `-'::' ListDelimiter |-'S' `-SimpleDeclarator Declarator - `-UnknownExpression - `-'s1' + `-'s1' )txt", R"txt( SimpleDeclaration @@ -575,8 +571,7 @@ SimpleDeclaration | `-'::' ListDelimiter |-'S' `-SimpleDeclarator Declarator - `-UnknownExpression - `-'s2' + `-'s2' )txt"})); } @@ -608,8 +603,7 @@ SimpleDeclaration | `-'::' ListDelimiter |-'S' `-SimpleDeclarator Declarator - `-UnknownExpression - `-'s1' + `-'s1' )txt", R"txt( SimpleDeclaration @@ -623,8 +617,7 @@ SimpleDeclaration | `-'::' ListDelimiter |-'S' `-SimpleDeclarator Declarator - `-UnknownExpression - `-'s2' + `-'s2' )txt"})); } @@ -1745,19 +1738,15 @@ TEST_P(SyntaxTreeTest, OverloadedOperator_Plus) { struct X { friend X operator+(X, const X&); }; -// FIXME: Remove additional `UnknownExpression` wrapping `x`. For that, ignore -// implicit copy constructor called on `x`. This should've been ignored already, -// as we `IgnoreImplicit` when traversing an `Stmt`. void test(X x, X y) { [[x + y]]; } )cpp", {R"txt( BinaryOperatorExpression Expression -|-UnknownExpression LeftHandSide -| `-IdExpression -| `-UnqualifiedId UnqualifiedId -| `-'x' +|-IdExpression LeftHandSide +| `-UnqualifiedId UnqualifiedId +| `-'x' |-'+' OperatorToken `-IdExpression RightHandSide `-UnqualifiedId UnqualifiedId @@ -2744,6 +2733,54 @@ CallExpression Expression )txt"})); } +TEST_P(SyntaxTreeTest, CallExpression_DefaultArguments) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +void f(int i = 1, char c = '2'); +void test() { + [[f()]]; + [[f(1)]]; + [[f(1, '2')]]; +} +)cpp", + {R"txt( +CallExpression Expression +|-IdExpression Callee +| `-UnqualifiedId UnqualifiedId +| `-'f' +|-'(' OpenParen +`-')' CloseParen + )txt", + R"txt( +CallExpression Expression +|-IdExpression Callee +| `-UnqualifiedId UnqualifiedId +| `-'f' +|-'(' OpenParen +|-CallArguments Arguments +| `-IntegerLiteralExpression ListElement +| `-'1' LiteralToken +`-')' CloseParen + )txt", + R"txt( +CallExpression Expression +|-IdExpression Callee +| `-UnqualifiedId UnqualifiedId +| `-'f' +|-'(' OpenParen +|-CallArguments Arguments +| |-IntegerLiteralExpression ListElement +| | `-'1' LiteralToken +| |-',' ListDelimiter +| `-CharacterLiteralExpression ListElement +| `-''2'' LiteralToken +`-')' CloseParen +)txt"})); +} + TEST_P(SyntaxTreeTest, MultipleDeclaratorsGrouping) { EXPECT_TRUE(treeDumpEqual( R"cpp( @@ -3821,26 +3858,137 @@ TranslationUnit Detached )txt")); } +TEST_P(SyntaxTreeTest, InitDeclarator_Equal) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct S { S(int);}; +void test() { + [[S s = 1]]; +} +)cpp", + {R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + |-'s' + |-'=' + `-IntegerLiteralExpression + `-'1' LiteralToken +)txt"})); +} + TEST_P(SyntaxTreeTest, InitDeclarator_Brace) { if (!GetParam().isCXX11OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( -int a {}; +struct S { + S(); + S(int); + S(int, float); +}; +void test(){ + // FIXME: 's...' is a declarator and '{...}' is initializer + [[S s0{}]]; + [[S s1{1}]]; + [[S s2{1, 2.}]]; +} )cpp", - R"txt( -TranslationUnit Detached -`-SimpleDeclaration - |-'int' - |-SimpleDeclarator Declarator - | |-'a' - | `-UnknownExpression - | `-UnknownExpression - | |-'{' - | `-'}' - `-';' -)txt")); + {R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + `-UnknownExpression + |-'s0' + |-'{' + `-'}' + )txt", + R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + `-UnknownExpression + |-'s1' + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + `-'}' + )txt", + R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + `-UnknownExpression + |-'s2' + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + |-',' + |-FloatingLiteralExpression + | `-'2.' LiteralToken + `-'}' +)txt"})); +} + +TEST_P(SyntaxTreeTest, InitDeclarator_EqualBrace) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct S { + S(); + S(int); + S(int, float); +}; +void test() { + // FIXME: '= {...}' is initializer + [[S s0 = {}]]; + [[S s1 = {1}]]; + [[S s2 = {1, 2.}]]; +} +)cpp", + {R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + |-'s0' + |-'=' + `-UnknownExpression + |-'{' + `-'}' + )txt", + R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + |-'s1' + |-'=' + `-UnknownExpression + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + `-'}' + )txt", + R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + |-'s2' + |-'=' + `-UnknownExpression + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + |-',' + |-FloatingLiteralExpression + | `-'2.' LiteralToken + `-'}' +)txt"})); } TEST_P(SyntaxTreeTest, InitDeclarator_Paren) { @@ -3851,23 +3999,284 @@ TEST_P(SyntaxTreeTest, InitDeclarator_Paren) { R"cpp( struct S { S(int); + S(int, float); +}; +// FIXME: 's...' is a declarator and '(...)' is initializer +[[S s1(1);]] +[[S s2(1, 2.);]] +)cpp", + {R"txt( +SimpleDeclaration +|-'S' +|-SimpleDeclarator Declarator +| `-UnknownExpression +| |-'s1' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| `-')' +`-';' + )txt", + R"txt( +SimpleDeclaration +|-'S' +|-SimpleDeclarator Declarator +| `-UnknownExpression +| |-'s2' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| |-',' +| |-FloatingLiteralExpression +| | `-'2.' LiteralToken +| `-')' +`-';' +)txt"})); +} + +TEST_P(SyntaxTreeTest, InitDeclarator_Paren_DefaultArguments) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct S { + S(int i = 1, float = 2.); }; -[[S s(1);]] +[[S s0;]] +// FIXME: 's...' is a declarator and '(...)' is initializer +[[S s1(1);]] +[[S s2(1, 2.);]] )cpp", {R"txt( SimpleDeclaration |-'S' |-SimpleDeclarator Declarator +| `-'s0' +`-';' + )txt", + R"txt( +SimpleDeclaration +|-'S' +|-SimpleDeclarator Declarator +| `-UnknownExpression +| |-'s1' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| `-')' +`-';' + )txt", + R"txt( +SimpleDeclaration +|-'S' +|-SimpleDeclarator Declarator | `-UnknownExpression -| |-'s' +| |-'s2' | |-'(' | |-IntegerLiteralExpression | | `-'1' LiteralToken +| |-',' +| |-FloatingLiteralExpression +| | `-'2.' LiteralToken | `-')' `-';' )txt"})); } +TEST_P(SyntaxTreeTest, ImplicitConversion_Argument) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int); +}; +void TakeX(const X&); +void test() { + [[TakeX(1)]]; +} +)cpp", + {R"txt( +CallExpression Expression +|-IdExpression Callee +| `-UnqualifiedId UnqualifiedId +| `-'TakeX' +|-'(' OpenParen +|-CallArguments Arguments +| `-IntegerLiteralExpression ListElement +| `-'1' LiteralToken +`-')' CloseParen +)txt"})); +} + +TEST_P(SyntaxTreeTest, ImplicitConversion_Return) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int); +}; +X CreateX(){ + [[return 1;]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-IntegerLiteralExpression ReturnValue +| `-'1' LiteralToken +`-';' +)txt"})); +} + +TEST_P(SyntaxTreeTest, ConstructorCall_ZeroArguments) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(); +}; +X test() { + [[return X();]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-UnknownExpression ReturnValue +| |-'X' +| |-'(' +| `-')' +`-';' +)txt"})); +} + +TEST_P(SyntaxTreeTest, ConstructorCall_OneArgument) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int); +}; +X test() { + [[return X(1);]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-UnknownExpression ReturnValue +| |-'X' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| `-')' +`-';' +)txt"})); +} + +TEST_P(SyntaxTreeTest, ConstructorCall_MultipleArguments) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int, char); +}; +X test() { + [[return X(1, '2');]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-UnknownExpression ReturnValue +| |-'X' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| |-',' +| |-CharacterLiteralExpression +| | `-''2'' LiteralToken +| `-')' +`-';' +)txt"})); +} + +TEST_P(SyntaxTreeTest, ConstructorCall_DefaultArguments) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int i = 1, char c = '2'); +}; +X test() { + auto x0 = [[X()]]; + auto x1 = [[X(1)]]; + auto x2 = [[X(1, '2')]]; +} +)cpp", + {R"txt( +UnknownExpression +|-'X' +|-'(' +`-')' +)txt", + R"txt( +UnknownExpression +|-'X' +|-'(' +|-IntegerLiteralExpression +| `-'1' LiteralToken +`-')' +)txt", + R"txt( +UnknownExpression +|-'X' +|-'(' +|-IntegerLiteralExpression +| `-'1' LiteralToken +|-',' +|-CharacterLiteralExpression +| `-''2'' LiteralToken +`-')' +)txt"})); +} + +TEST_P(SyntaxTreeTest, TypeConversion_FunctionalNotation) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +float test() { + [[return float(1);]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-UnknownExpression ReturnValue +| |-'float' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| `-')' +`-';' +)txt"})); +} + TEST_P(SyntaxTreeTest, ArrayDeclarator_Simple) { EXPECT_TRUE(treeDumpEqual( R"cpp( @@ -4106,6 +4515,61 @@ TranslationUnit Detached )txt")); } +TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Default_One) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +int func1([[int a = 1]]); +)cpp", + {R"txt( +ParameterDeclarationList Parameters +`-SimpleDeclaration ListElement + |-'int' + `-SimpleDeclarator Declarator + |-'a' + |-'=' + `-IntegerLiteralExpression + `-'1' LiteralToken +)txt"})); +} + +TEST_P(SyntaxTreeTest, + ParametersAndQualifiers_InFreeFunctions_Default_Multiple) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +int func2([[int *ap, int a = 1, char c = '2']]); +)cpp", + {R"txt( +ParameterDeclarationList Parameters +|-SimpleDeclaration ListElement +| |-'int' +| `-SimpleDeclarator Declarator +| |-'*' +| `-'ap' +|-',' ListDelimiter +|-SimpleDeclaration ListElement +| |-'int' +| `-SimpleDeclarator Declarator +| |-'a' +| |-'=' +| `-IntegerLiteralExpression +| `-'1' LiteralToken +|-',' ListDelimiter +`-SimpleDeclaration ListElement + |-'char' + `-SimpleDeclarator Declarator + |-'c' + |-'=' + `-CharacterLiteralExpression + `-''2'' LiteralToken +)txt"})); +} + TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InVariadicFunctionTemplate_ParameterPack) { if (!GetParam().isCXX11OrLater() || GetParam().hasDelayedTemplateParsing()) { diff --git a/clang/unittests/Tooling/ToolingTest.cpp b/clang/unittests/Tooling/ToolingTest.cpp index cc6f453284d719..691a847d5a7158 100644 --- a/clang/unittests/Tooling/ToolingTest.cpp +++ b/clang/unittests/Tooling/ToolingTest.cpp @@ -563,6 +563,40 @@ TEST(ClangToolTest, StripDependencyFileAdjusterShowIncludes) { EXPECT_TRUE(HasFlag("-c")); } +// Check getClangStripDependencyFileAdjuster doesn't strip args when using the +// MSVC cl.exe driver +TEST(ClangToolTest, StripDependencyFileAdjusterMsvc) { + FixedCompilationDatabase Compilations( + "/", {"--driver-mode=cl", "-MD", "-MDd", "-MT", "-O1", "-MTd", "-MP"}); + + ClangTool Tool(Compilations, std::vector(1, "/a.cc")); + Tool.mapVirtualFile("/a.cc", "void a() {}"); + + std::unique_ptr Action( + newFrontendActionFactory()); + + CommandLineArguments FinalArgs; + ArgumentsAdjuster CheckFlagsAdjuster = + [&FinalArgs](const CommandLineArguments &Args, StringRef /*unused*/) { + FinalArgs = Args; + return Args; + }; + Tool.clearArgumentsAdjusters(); + Tool.appendArgumentsAdjuster(getClangStripDependencyFileAdjuster()); + Tool.appendArgumentsAdjuster(CheckFlagsAdjuster); + Tool.run(Action.get()); + + auto HasFlag = [&FinalArgs](const std::string &Flag) { + return llvm::find(FinalArgs, Flag) != FinalArgs.end(); + }; + EXPECT_TRUE(HasFlag("-MD")); + EXPECT_TRUE(HasFlag("-MDd")); + EXPECT_TRUE(HasFlag("-MT")); + EXPECT_TRUE(HasFlag("-O1")); + EXPECT_TRUE(HasFlag("-MTd")); + EXPECT_TRUE(HasFlag("-MP")); +} + // Check getClangStripPluginsAdjuster strips plugin related args. TEST(ClangToolTest, StripPluginsAdjuster) { FixedCompilationDatabase Compilations( diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index 0a0294f937dbab..9967e293749bd8 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -81,34 +81,19 @@ if (COMPILER_RT_STANDALONE_BUILD) set_target_properties(intrinsics_gen PROPERTIES FOLDER "Compiler-RT Misc") endif() - if(CMAKE_VERSION VERSION_LESS 3.12) - # Find Python interpreter. - include(FindPythonInterp) - if(NOT PYTHONINTERP_FOUND) - message(FATAL_ERROR " - Unable to find Python interpreter required testing. Please install Python - or specify the PYTHON_EXECUTABLE CMake variable.") + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() + # Treat python2 as python3 add_executable(Python3::Interpreter IMPORTED) set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) - else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() # Ensure that fat libraries are built correctly on Darwin diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 7334b7200fc4c8..a15c569b42ba08 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -730,6 +730,9 @@ struct Allocator { // -------------------------- Chunk lookup ---------------------- // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg). + // Returns nullptr if AsanChunk is not yet initialized just after + // get_allocator().Allocate(), or is being destroyed just before + // get_allocator().Deallocate(). AsanChunk *GetAsanChunk(void *alloc_beg) { if (!alloc_beg) return nullptr; @@ -747,26 +750,6 @@ struct Allocator { return reinterpret_cast(alloc_beg); } - AsanChunk *GetAsanChunkDebug(void *alloc_beg) { - if (!alloc_beg) - return nullptr; - if (!allocator.FromPrimary(alloc_beg)) { - uptr *meta = reinterpret_cast(allocator.GetMetaData(alloc_beg)); - AsanChunk *m = reinterpret_cast(meta[1]); - Printf("GetAsanChunkDebug1 alloc_beg %p meta %p m %p\n", alloc_beg, meta, - m); - return m; - } - uptr *alloc_magic = reinterpret_cast(alloc_beg); - Printf( - "GetAsanChunkDebug2 alloc_beg %p alloc_magic %p alloc_magic[0] %p " - "alloc_magic[1] %p\n", - alloc_beg, alloc_magic, alloc_magic[0], alloc_magic[1]); - if (alloc_magic[0] == kAllocBegMagic) - return reinterpret_cast(alloc_magic[1]); - return reinterpret_cast(alloc_beg); - } - AsanChunk *GetAsanChunkByAddr(uptr p) { void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast(p)); return GetAsanChunk(alloc_beg); @@ -779,14 +762,6 @@ struct Allocator { return GetAsanChunk(alloc_beg); } - AsanChunk *GetAsanChunkByAddrFastLockedDebug(uptr p) { - void *alloc_beg = - allocator.GetBlockBeginFastLockedDebug(reinterpret_cast(p)); - Printf("GetAsanChunkByAddrFastLockedDebug p %p alloc_beg %p\n", p, - alloc_beg); - return GetAsanChunkDebug(alloc_beg); - } - uptr AllocationSize(uptr p) { AsanChunk *m = GetAsanChunkByAddr(p); if (!m) return 0; @@ -1090,38 +1065,19 @@ uptr PointsIntoChunk(void* p) { return 0; } -// Debug code. Delete once issue #1193 is chased down. -extern "C" SANITIZER_WEAK_ATTRIBUTE const char *__lsan_current_stage; - -void GetUserBeginDebug(uptr chunk) { - Printf("GetUserBeginDebug1 chunk %p\n", chunk); - __asan::AsanChunk *m = - __asan::instance.GetAsanChunkByAddrFastLockedDebug(chunk); - Printf("GetUserBeginDebug2 m %p\n", m); -} - uptr GetUserBegin(uptr chunk) { __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLocked(chunk); - if (!m) { - Printf( - "ASAN is about to crash with a CHECK failure.\n" - "The ASAN developers are trying to chase down this bug,\n" - "so if you've encountered this bug please let us know.\n" - "See also: https://github.com/google/sanitizers/issues/1193\n" - "Internal ref b/149237057\n" - "chunk: %p caller %p __lsan_current_stage %s\n", - chunk, GET_CALLER_PC(), __lsan_current_stage); - GetUserBeginDebug(chunk); - } - CHECK(m); - return m->Beg(); + return m ? m->Beg() : 0; } LsanMetadata::LsanMetadata(uptr chunk) { - metadata_ = reinterpret_cast(chunk - __asan::kChunkHeaderSize); + metadata_ = chunk ? reinterpret_cast(chunk - __asan::kChunkHeaderSize) + : nullptr; } bool LsanMetadata::allocated() const { + if (!metadata_) + return false; __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); return atomic_load(&m->chunk_state, memory_order_relaxed) == __asan::CHUNK_ALLOCATED; diff --git a/compiler-rt/lib/asan/asan_flags.cpp b/compiler-rt/lib/asan/asan_flags.cpp index c5c70eaed737fe..cb6a89fe32ce75 100644 --- a/compiler-rt/lib/asan/asan_flags.cpp +++ b/compiler-rt/lib/asan/asan_flags.cpp @@ -26,10 +26,6 @@ namespace __asan { Flags asan_flags_dont_use_directly; // use via flags(). -static const char *MaybeCallAsanDefaultOptions() { - return (&__asan_default_options) ? __asan_default_options() : ""; -} - static const char *MaybeUseAsanDefaultOptionsCompileDefinition() { #ifdef ASAN_DEFAULT_OPTIONS return SANITIZER_STRINGIFY(ASAN_DEFAULT_OPTIONS); @@ -108,14 +104,14 @@ void InitializeFlags() { asan_parser.ParseString(asan_compile_def); // Override from user-specified string. - const char *asan_default_options = MaybeCallAsanDefaultOptions(); + const char *asan_default_options = __asan_default_options(); asan_parser.ParseString(asan_default_options); #if CAN_SANITIZE_UB - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); #endif #if CAN_SANITIZE_LEAKS - const char *lsan_default_options = __lsan::MaybeCallLsanDefaultOptions(); + const char *lsan_default_options = __lsan_default_options(); lsan_parser.ParseString(lsan_default_options); #endif diff --git a/compiler-rt/lib/asan/asan_interface_internal.h b/compiler-rt/lib/asan/asan_interface_internal.h index f14cbbcb76a358..3e6e6602887465 100644 --- a/compiler-rt/lib/asan/asan_interface_internal.h +++ b/compiler-rt/lib/asan/asan_interface_internal.h @@ -173,8 +173,8 @@ extern "C" { SANITIZER_INTERFACE_ATTRIBUTE void __asan_print_accumulated_stats(); - SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE - const char* __asan_default_options(); + SANITIZER_INTERFACE_ATTRIBUTE + const char *__asan_default_options(); SANITIZER_INTERFACE_ATTRIBUTE extern uptr __asan_shadow_memory_dynamic_address; diff --git a/compiler-rt/lib/builtins/paritydi2.c b/compiler-rt/lib/builtins/paritydi2.c index 58e85f89e04371..350dceb8cef592 100644 --- a/compiler-rt/lib/builtins/paritydi2.c +++ b/compiler-rt/lib/builtins/paritydi2.c @@ -17,5 +17,9 @@ COMPILER_RT_ABI int __paritydi2(di_int a) { dwords x; x.all = a; - return __paritysi2(x.s.high ^ x.s.low); + su_int x2 = x.s.high ^ x.s.low; + x2 ^= x2 >> 16; + x2 ^= x2 >> 8; + x2 ^= x2 >> 4; + return (0x6996 >> (x2 & 0xF)) & 1; } diff --git a/compiler-rt/lib/builtins/parityti2.c b/compiler-rt/lib/builtins/parityti2.c index 79e920d8a02df5..011c8dd455620c 100644 --- a/compiler-rt/lib/builtins/parityti2.c +++ b/compiler-rt/lib/builtins/parityti2.c @@ -18,8 +18,14 @@ COMPILER_RT_ABI int __parityti2(ti_int a) { twords x; + dwords x2; x.all = a; - return __paritydi2(x.s.high ^ x.s.low); + x2.all = x.s.high ^ x.s.low; + su_int x3 = x2.s.high ^ x2.s.low; + x3 ^= x3 >> 16; + x3 ^= x3 >> 8; + x3 ^= x3 >> 4; + return (0x6996 >> (x3 & 0xF)) & 1; } #endif // CRT_HAS_128BIT diff --git a/compiler-rt/lib/cfi/cfi.cpp b/compiler-rt/lib/cfi/cfi.cpp index fd48f71643b6fe..b75c72b215c275 100644 --- a/compiler-rt/lib/cfi/cfi.cpp +++ b/compiler-rt/lib/cfi/cfi.cpp @@ -379,7 +379,7 @@ void InitializeFlags() { __ubsan::RegisterUbsanFlags(&ubsan_parser, uf); RegisterCommonFlags(&ubsan_parser); - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); ubsan_parser.ParseStringFromEnv("UBSAN_OPTIONS"); #endif diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp index caafd1dbb0a7ba..57df1238c398ca 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -755,6 +755,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.FeaturesDir = Flags.features_dir; ValidateDirectoryExists(Options.FeaturesDir, Flags.create_missing_dirs); } + if (Flags.mutation_graph_file) + Options.MutationGraphFile = Flags.mutation_graph_file; if (Flags.collect_data_flow) Options.CollectDataFlow = Flags.collect_data_flow; if (Flags.stop_file) diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def index fdb8362cef9d4f..c9a787e03833d5 100644 --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -88,6 +88,11 @@ FUZZER_FLAG_STRING(features_dir, "internal flag. Used to dump feature sets on di "Every time a new input is added to the corpus, a corresponding file in the features_dir" " is created containing the unique features of that input." " Features are stored in binary format.") +FUZZER_FLAG_STRING(mutation_graph_file, "Saves a graph (in DOT format) to" + " mutation_graph_file. The graph contains a vertex for each input that has" + " unique coverage; directed edges are provided between parents and children" + " where the child has unique coverage, and are recorded with the type of" + " mutation that caused the child.") FUZZER_FLAG_INT(use_counters, 1, "Use coverage counters") FUZZER_FLAG_INT(use_memmem, 1, "Use hints from intercepting memmem, strstr, etc") diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.cpp b/compiler-rt/lib/fuzzer/FuzzerIO.cpp index c3330c3425d091..54a7219fc0e0fc 100644 --- a/compiler-rt/lib/fuzzer/FuzzerIO.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerIO.cpp @@ -77,6 +77,19 @@ void WriteToFile(const uint8_t *Data, size_t Size, const std::string &Path) { fclose(Out); } +void AppendToFile(const std::string &Data, const std::string &Path) { + AppendToFile(reinterpret_cast(Data.data()), Data.size(), + Path); +} + +void AppendToFile(const uint8_t *Data, size_t Size, const std::string &Path) { + FILE *Out = fopen(Path.c_str(), "a"); + if (!Out) + return; + fwrite(Data, sizeof(Data[0]), Size, Out); + fclose(Out); +} + void ReadDirToVectorOfUnits(const char *Path, Vector *V, long *Epoch, size_t MaxSize, bool ExitOnError) { long E = Epoch ? *Epoch : 0; diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.h b/compiler-rt/lib/fuzzer/FuzzerIO.h index 6e3a0b470c5f6a..abd25110d07d49 100644 --- a/compiler-rt/lib/fuzzer/FuzzerIO.h +++ b/compiler-rt/lib/fuzzer/FuzzerIO.h @@ -29,6 +29,9 @@ void WriteToFile(const uint8_t *Data, size_t Size, const std::string &Path); void WriteToFile(const std::string &Data, const std::string &Path); void WriteToFile(const Unit &U, const std::string &Path); +void AppendToFile(const uint8_t *Data, size_t Size, const std::string &Path); +void AppendToFile(const std::string &Data, const std::string &Path); + void ReadDirToVectorOfUnits(const char *Path, Vector *V, long *Epoch, size_t MaxSize, bool ExitOnError); diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp index f9986dd8eea51c..ce8c2fb7471448 100644 --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -463,6 +463,37 @@ static void RenameFeatureSetFile(const std::string &FeaturesDir, DirPlusFile(FeaturesDir, NewFile)); } +static void WriteEdgeToMutationGraphFile(const std::string &MutationGraphFile, + const InputInfo *II, + const InputInfo *BaseII, + const std::string &MS) { + if (MutationGraphFile.empty()) + return; + + std::string Sha1 = Sha1ToString(II->Sha1); + + std::string OutputString; + + // Add a new vertex. + OutputString.append("\""); + OutputString.append(Sha1); + OutputString.append("\"\n"); + + // Add a new edge if there is base input. + if (BaseII) { + std::string BaseSha1 = Sha1ToString(BaseII->Sha1); + OutputString.append("\""); + OutputString.append(BaseSha1); + OutputString.append("\" -> \""); + OutputString.append(Sha1); + OutputString.append("\" [label=\""); + OutputString.append(MS); + OutputString.append("\"];\n"); + } + + AppendToFile(OutputString, MutationGraphFile); +} + bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, InputInfo *II, bool ForceAddToCorpus, bool *FoundUniqFeatures) { @@ -497,6 +528,8 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, TimeOfUnit, UniqFeatureSetTmp, DFT, II); WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1), NewII->UniqFeatureSet); + WriteEdgeToMutationGraphFile(Options.MutationGraphFile, NewII, II, + MD.MutationSequence()); return true; } if (II && FoundUniqFeaturesOfII && diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp index df9ada45bb0391..121b450e8b8c56 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp @@ -494,6 +494,15 @@ void MutationDispatcher::PrintMutationSequence() { } } +std::string MutationDispatcher::MutationSequence() { + std::string MS; + for (auto M : CurrentMutatorSequence) { + MS += M.Name; + MS += "-"; + } + return MS; +} + size_t MutationDispatcher::Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { return MutateImpl(Data, Size, MaxSize, Mutators); } diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.h b/compiler-rt/lib/fuzzer/FuzzerMutate.h index 6cbce80276248c..3ce3159f6893be 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.h +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.h @@ -26,6 +26,8 @@ class MutationDispatcher { void StartMutationSequence(); /// Print the current sequence of mutations. void PrintMutationSequence(); + /// Return the current sequence of mutations. + std::string MutationSequence(); /// Indicate that the current sequence of mutations was successful. void RecordSuccessfulMutationSequence(); /// Mutates data by invoking user-provided mutator. diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h index b17a7474d38f05..706e1c64c706ca 100644 --- a/compiler-rt/lib/fuzzer/FuzzerOptions.h +++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -59,6 +59,7 @@ struct FuzzingOptions { std::string DataFlowTrace; std::string CollectDataFlow; std::string FeaturesDir; + std::string MutationGraphFile; std::string StopFile; bool SaveArtifacts = true; bool PrintNEW = true; // Print a status line when new units are found; diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp index 11b4d3891bc2cf..c5322110cb662a 100644 --- a/compiler-rt/lib/hwasan/hwasan.cpp +++ b/compiler-rt/lib/hwasan/hwasan.cpp @@ -112,7 +112,7 @@ static void InitializeFlags() { if (__hwasan_default_options) parser.ParseString(__hwasan_default_options()); #if HWASAN_CONTAINS_UBSAN - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); #endif diff --git a/compiler-rt/lib/lsan/lsan.cpp b/compiler-rt/lib/lsan/lsan.cpp index 80a6e2fa70169d..c8cc045783d451 100644 --- a/compiler-rt/lib/lsan/lsan.cpp +++ b/compiler-rt/lib/lsan/lsan.cpp @@ -73,7 +73,7 @@ static void InitializeFlags() { RegisterCommonFlags(&parser); // Override from user-specified string. - const char *lsan_default_options = MaybeCallLsanDefaultOptions(); + const char *lsan_default_options = __lsan_default_options(); parser.ParseString(lsan_default_options); parser.ParseStringFromEnv("LSAN_OPTIONS"); diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp index 67f85f2f31de46..41b5ae5483299b 100644 --- a/compiler-rt/lib/lsan/lsan_common.cpp +++ b/compiler-rt/lib/lsan/lsan_common.cpp @@ -25,8 +25,6 @@ #include "sanitizer_common/sanitizer_thread_registry.h" #include "sanitizer_common/sanitizer_tls_get_addr.h" -extern "C" const char *__lsan_current_stage = "unknown"; - #if CAN_SANITIZE_LEAKS namespace __lsan { @@ -110,10 +108,6 @@ void InitializeRootRegions() { root_regions = new (placeholder) InternalMmapVector(); } -const char *MaybeCallLsanDefaultOptions() { - return (&__lsan_default_options) ? __lsan_default_options() : ""; -} - void InitCommonLsan() { InitializeRootRegions(); if (common_flags()->detect_leaks) { @@ -366,7 +360,6 @@ static void FloodFillTag(Frontier *frontier, ChunkTag tag) { // ForEachChunk callback. If the chunk is marked as leaked, marks all chunks // which are reachable from it as indirectly leaked. static void MarkIndirectlyLeakedCb(uptr chunk, void *arg) { - __lsan_current_stage = "MarkIndirectlyLeakedCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (m.allocated() && m.tag() != kReachable) { @@ -379,7 +372,6 @@ static void MarkIndirectlyLeakedCb(uptr chunk, void *arg) { // frontier. static void CollectIgnoredCb(uptr chunk, void *arg) { CHECK(arg); - __lsan_current_stage = "CollectIgnoredCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (m.allocated() && m.tag() == kIgnored) { @@ -409,7 +401,6 @@ struct InvalidPCParam { static void MarkInvalidPCCb(uptr chunk, void *arg) { CHECK(arg); InvalidPCParam *param = reinterpret_cast(arg); - __lsan_current_stage = "MarkInvalidPCCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (m.allocated() && m.tag() != kReachable && m.tag() != kIgnored) { @@ -485,7 +476,6 @@ static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads, // ForEachChunk callback. Resets the tags to pre-leak-check state. static void ResetTagsCb(uptr chunk, void *arg) { (void)arg; - __lsan_current_stage = "ResetTagsCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (m.allocated() && m.tag() != kIgnored) @@ -502,7 +492,6 @@ static void PrintStackTraceById(u32 stack_trace_id) { static void CollectLeaksCb(uptr chunk, void *arg) { CHECK(arg); LeakReport *leak_report = reinterpret_cast(arg); - __lsan_current_stage = "CollectLeaksCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (!m.allocated()) return; @@ -900,12 +889,11 @@ int __lsan_do_recoverable_leak_check() { return 0; } -#if !SANITIZER_SUPPORTS_WEAK_HOOKS -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -const char * __lsan_default_options() { +SANITIZER_INTERFACE_WEAK_DEF(const char *, __lsan_default_options, void) { return ""; } +#if !SANITIZER_SUPPORTS_WEAK_HOOKS SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE int __lsan_is_turned_off() { return 0; diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp index 3028f79f041c33..d651a376789bd7 100644 --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -172,10 +172,9 @@ static void InitializeFlags() { #endif // Override from user-specified string. - if (__msan_default_options) - parser.ParseString(__msan_default_options()); + parser.ParseString(__msan_default_options()); #if MSAN_CONTAINS_UBSAN - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); #endif @@ -726,12 +725,9 @@ void __msan_finish_switch_fiber(const void **bottom_old, uptr *size_old) { } } -#if !SANITIZER_SUPPORTS_WEAK_HOOKS -extern "C" { -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -const char* __msan_default_options() { return ""; } -} // extern "C" -#endif +SANITIZER_INTERFACE_WEAK_DEF(const char *, __msan_default_options, void) { + return ""; +} extern "C" { SANITIZER_INTERFACE_ATTRIBUTE diff --git a/compiler-rt/lib/msan/msan_interface_internal.h b/compiler-rt/lib/msan/msan_interface_internal.h index 17922a888b9c91..1edacbc7504f5d 100644 --- a/compiler-rt/lib/msan/msan_interface_internal.h +++ b/compiler-rt/lib/msan/msan_interface_internal.h @@ -129,8 +129,8 @@ void __msan_set_keep_going(int keep_going); SANITIZER_INTERFACE_ATTRIBUTE int __msan_set_poison_in_malloc(int do_poison); -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -/* OPTIONAL */ const char* __msan_default_options(); +SANITIZER_INTERFACE_ATTRIBUTE +const char *__msan_default_options(); // For testing. SANITIZER_INTERFACE_ATTRIBUTE diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h index 0cf483da1e5c8c..33f89d6d49928c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h @@ -142,12 +142,6 @@ class CombinedAllocator { return secondary_.GetBlockBeginFastLocked(p); } - void *GetBlockBeginFastLockedDebug(void *p) { - if (primary_.PointerIsMine(p)) - return primary_.GetBlockBeginDebug(p); - return secondary_.GetBlockBeginFastLocked(p); - } - uptr GetActuallyAllocatedSize(void *p) { if (primary_.PointerIsMine(p)) return primary_.GetActuallyAllocatedSize(p); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h index 2c25a687c5f088..b90dabbf776929 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h @@ -211,7 +211,6 @@ class SizeClassAllocator32 { uptr res = beg + (n * (u32)size); return reinterpret_cast(res); } - void *GetBlockBeginDebug(const void *p) { return GetBlockBegin(p); } uptr GetActuallyAllocatedSize(void *p) { CHECK(PointerIsMine(p)); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h index a6126fc6265eb5..774c09e4249526 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h @@ -199,30 +199,6 @@ class SizeClassAllocator64 { return nullptr; } - void *GetBlockBeginDebug(const void *p) { - uptr class_id = GetSizeClass(p); - uptr size = ClassIdToSize(class_id); - Printf("GetBlockBeginDebug1 p %p class_id %p size %p\n", p, class_id, size); - if (!size) - return nullptr; - uptr chunk_idx = GetChunkIdx((uptr)p, size); - uptr reg_beg = GetRegionBegin(p); - uptr beg = chunk_idx * size; - uptr next_beg = beg + size; - Printf( - "GetBlockBeginDebug2 chunk_idx %p reg_beg %p beg %p next_beg %p " - "kNumClasses %p\n", - chunk_idx, reg_beg, beg, next_beg, kNumClasses); - if (class_id >= kNumClasses) - return nullptr; - const RegionInfo *region = AddressSpaceView::Load(GetRegionInfo(class_id)); - Printf("GetBlockBeginDebug3 region %p region->mapped_user %p\n", region, - region->mapped_user); - if (region->mapped_user >= next_beg) - return reinterpret_cast(reg_beg + beg); - return nullptr; - } - uptr GetActuallyAllocatedSize(void *p) { CHECK(PointerIsMine(p)); return ClassIdToSize(GetSizeClass(p)); diff --git a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp index 44bf325cd35bb6..49e4a9c21da9c7 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp @@ -87,7 +87,7 @@ void InitializeFlags(Flags *f, const char *env, const char *env_option_name) { // Let a frontend override. parser.ParseString(__tsan_default_options()); #if TSAN_CONTAINS_UBSAN - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); #endif // Override from command line. diff --git a/compiler-rt/lib/ubsan/ubsan_flags.cpp b/compiler-rt/lib/ubsan/ubsan_flags.cpp index 721c2273f133a3..25cefd46ce27ce 100644 --- a/compiler-rt/lib/ubsan/ubsan_flags.cpp +++ b/compiler-rt/lib/ubsan/ubsan_flags.cpp @@ -21,10 +21,6 @@ namespace __ubsan { -const char *MaybeCallUbsanDefaultOptions() { - return (&__ubsan_default_options) ? __ubsan_default_options() : ""; -} - static const char *GetFlag(const char *flag) { // We cannot call getenv() from inside a preinit array initializer if (SANITIZER_CAN_USE_PREINIT_ARRAY) { @@ -66,7 +62,7 @@ void InitializeFlags() { RegisterUbsanFlags(&parser, f); // Override from user-specified string. - parser.ParseString(MaybeCallUbsanDefaultOptions()); + parser.ParseString(__ubsan_default_options()); // Override from environment variable. parser.ParseStringFromEnv("UBSAN_OPTIONS"); InitializeCommonFlags(); diff --git a/compiler-rt/lib/ubsan/ubsan_flags.h b/compiler-rt/lib/ubsan/ubsan_flags.h index daa0d7c701e041..c47009bafe5399 100644 --- a/compiler-rt/lib/ubsan/ubsan_flags.h +++ b/compiler-rt/lib/ubsan/ubsan_flags.h @@ -34,8 +34,6 @@ inline Flags *flags() { return &ubsan_flags; } void InitializeFlags(); void RegisterUbsanFlags(FlagParser *parser, Flags *f); -const char *MaybeCallUbsanDefaultOptions(); - } // namespace __ubsan extern "C" { diff --git a/compiler-rt/test/asan/TestCases/Linux/asan_prelink_test.cpp b/compiler-rt/test/asan/TestCases/Linux/asan_prelink_test.cpp index e00c215e92b117..9c70b61291b36d 100644 --- a/compiler-rt/test/asan/TestCases/Linux/asan_prelink_test.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/asan_prelink_test.cpp @@ -1,11 +1,12 @@ // Test if asan works with prelink. -// It does not actually use prelink, but relies on ld's flag -Ttext-segment -// or gold's flag -Ttext (we try the first flag first, if that fails we +// It does not actually use prelink, but relies on GNU ld's -Ttext-segment, +// LLD's --image-base, or gold's -Ttext (we try the first flag first, if that fails we // try the second flag). // // RUN: %clangxx_asan -c %s -o %t.o // RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t.so -Wl,-Ttext-segment=0x3600000000 ||\ -// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t.so -Wl,-Ttext=0x3600000000 +// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t.so -Wl,--image-base=0x3600000000 ||\ +// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t.so -Wl,-Ttext=0x3600000000 // RUN: %clangxx_asan %t.o %t.so -Wl,-R. -o %t // RUN: %env_asan_opts=verbosity=1 %run %t 2>&1 | FileCheck %s diff --git a/compiler-rt/test/asan/TestCases/lsan_crash.cpp b/compiler-rt/test/asan/TestCases/lsan_crash.cpp new file mode 100644 index 00000000000000..23c2569a0b73c8 --- /dev/null +++ b/compiler-rt/test/asan/TestCases/lsan_crash.cpp @@ -0,0 +1,31 @@ +// RUN: %clangxx_asan -O2 %s -o %t && %run %t + +#include +#include +#include +#include +#include + +std::atomic done; + +void foo() { + std::unique_ptr mem; + + while (!done) + mem.reset(new char[1000000]); +} + +int main() { + std::vector threads; + for (int i = 0; i < 10; ++i) + threads.emplace_back(foo); + + for (int i = 0; i < 100; ++i) + __lsan_do_recoverable_leak_check(); + + done = true; + for (auto &t : threads) + t.join(); + + return 0; +} diff --git a/compiler-rt/test/fuzzer/mutation-graph.test b/compiler-rt/test/fuzzer/mutation-graph.test new file mode 100644 index 00000000000000..7774a500395e02 --- /dev/null +++ b/compiler-rt/test/fuzzer/mutation-graph.test @@ -0,0 +1,17 @@ +REQUIRES: linux, x86_64 +RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest + +RUN: rm -rf %t-SimpleTestGraph + +RUN: not %run %t-SimpleTest -seed=1 -max_len=3 -mutation_graph_file=%t-SimpleTestGraph 2>&1 | FileCheck %s +CHECK: BINGO + +RUN: cat %t-SimpleTestGraph | FileCheck %s --check-prefix=GRAPH + +# A vertex and edge that correspond to the discovery of "H" +GRAPH: "7cf184f4c67ad58283ecb19349720b0cae756829" +GRAPH: {{.*}} -> "7cf184f4c67ad58283ecb19349720b0cae756829" [label="{{.*}}"]; + +# A vertex and edge that correspond to the discovery of "Hi" +GRAPH: "94dd9e08c129c785f7f256e82fbe0a30e6d1ae40" +GRAPH: {{.*}} -> "94dd9e08c129c785f7f256e82fbe0a30e6d1ae40" [label="{{.*}}"]; diff --git a/compiler-rt/test/profile/Inputs/instrprof-gcov-multiple-bbs-single-line.c.gcov b/compiler-rt/test/profile/Inputs/instrprof-gcov-multiple-bbs-single-line.c.gcov index d1104b7f5bbf2b..4debf8fc1b680d 100644 --- a/compiler-rt/test/profile/Inputs/instrprof-gcov-multiple-bbs-single-line.c.gcov +++ b/compiler-rt/test/profile/Inputs/instrprof-gcov-multiple-bbs-single-line.c.gcov @@ -3,7 +3,7 @@ // CHECK-NEXT: -: 0:Data:instrprof-gcov-multiple-bbs-single-line.gcda // CHECK-NEXT: -: 0:Runs:1 // CHECK-NEXT: -: 0:Programs:1 -// CHECK-NEXT:function main called 1 returned 100% blocks executed 80% +// CHECK-NEXT:function main called 1 returned 100% blocks executed 77% // CHECK-NEXT: 1: 1:int main(void) // CHECK-NEXT: -: 2:{ // CHECK-NEXT: -: 3: int var; diff --git a/flang/docs/OpenMP-4.5-grammar.txt b/flang/docs/OpenMP-4.5-grammar.txt index c74072ba1ef27c..180494bbf509ea 100644 --- a/flang/docs/OpenMP-4.5-grammar.txt +++ b/flang/docs/OpenMP-4.5-grammar.txt @@ -344,6 +344,8 @@ ATOMIC [seq_cst] atomic-clause -> READ | WRITE | UPDATE | CAPTURE +2.13.6 end-atomic -> END ATOMIC + 2.13.7 flush -> FLUSH [(variable-name-list)] 2.13.8 ordered -> ORDERED ordered-construct-clause [[[,] ordered-construct-clause]...] diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md new file mode 100644 index 00000000000000..b4b00ee65ffb20 --- /dev/null +++ b/flang/docs/ReleaseNotes.md @@ -0,0 +1,87 @@ +# Flang 12.0.0 (In-Progress) Release Notes + +> **warning** +> +> These are in-progress notes for the upcoming LLVM 12.0.0 release. +> Release notes for previous releases can be found on [the Download +> Page](https://releases.llvm.org/download.html). + +## Introduction + +This document contains the release notes for the Flang Fortran frontend, +part of the LLVM Compiler Infrastructure, release 12.0.0. Here we +describe the status of Flang in some detail, including major +improvements from the previous release and new feature work. For the +general LLVM release notes, see [the LLVM +documentation](https://llvm.org/docs/ReleaseNotes.html). All LLVM +releases may be downloaded from the [LLVM releases web +site](https://llvm.org/releases/). + +Note that if you are reading this file from a Git checkout, this +document applies to the *next* release, not the current one. To see the +release notes for a specific release, please see the [releases +page](https://llvm.org/releases/). + +## Known Issues + +These are issues that couldn't be fixed before the release. See the bug +reports for the latest status. + + * ... + +## Introducing Flang + +Flang is LLVM's Fortran front end and is new for the LLVM 11 release. + +Flang is still a work in progress for this release and is included for +experimentation and feedback. + +Flang is able to parse a comprehensive subset of the Fortran language +and check it for correctness. Flang is not yet able to generate LLVM IR +for the source code and thus is unable to compile a running binary. + +Flang is able to unparse the input source code into a canonical form and +emit it to allow testing. Flang can also invoke an external Fortran +compiler on this canonical input. + +Flang's parser has comprehensive support for: + * Fortran 2018 + * OpenMP 4.5 + * OpenACC 3.0 + +Interested users are invited to try to compile their Fortran codes with +flang in and report any issues in parsing or semantic checking in +[bugzilla](https://bugs.llvm.org/enter_bug.cgi?product=flang). + +### Major missing features + + * Flang is not supported on Windows platforms. + +## Using Flang + +Usage: `flang hello.f90 -o hello.bin` + +By default, Flang will parse the Fortran file `hello.f90` then unparse it to a +canonical Fortran source file. Flang will then invoke an external +Fortran compiler to compile this source file and link it, placing the +resulting executable in `hello.bin`. + +To specify the external Fortran compiler, set the `F18_FC` environment +variable to the name of the compiler binary and ensure that it is on your +`PATH`. The default value for `F18_FC` is `gfortran`. + +When invoked with no source input, Flang will wait for input on stdin. +When invoked in this way, Flang performs the same actions as if +called with `-fdebug-measure-parse-tree -funparse` and does not invoke +`F18_FC`. + +For a full list of options that Flang supports, run `flang --help`. + +## Additional Information + +Flang's documentation is located in the `flang/docs/` directory in the +LLVM monorepo. + +If you have any questions or comments about Flang, please feel free to +contact us via the [mailing +list](https://lists.llvm.org/mailman/listinfo/flang-dev). diff --git a/flang/docs/ReleaseNotes.rst b/flang/docs/ReleaseNotes.rst deleted file mode 100644 index bbc7377412d631..00000000000000 --- a/flang/docs/ReleaseNotes.rst +++ /dev/null @@ -1,96 +0,0 @@ -======================================== -Flang 11.0.0 (In-Progress) Release Notes -======================================== - -.. contents:: - :local: - :depth: 2 - -.. warning:: - - These are in-progress notes for the upcoming LLVM 11.0.0 release. - Release notes for previous releases can be found on - `the Download Page `_. - -Introduction -============ - -This document contains the release notes for the Flang Fortran -frontend, part of the LLVM Compiler Infrastructure, release 11.0.0. Here we -describe the status of Flang in some detail, including major -improvements from the previous release and new feature work. For the -general LLVM release notes, see `the LLVM -documentation `_. All LLVM -releases may be downloaded from the `LLVM releases web -site `_. - -Note that if you are reading this file from a Git checkout, this document -applies to the *next* release, not -the current one. To see the release notes for a specific release, please -see the `releases page `_. - -Known Issues -============ - -These are issues that couldn't be fixed before the release. See the bug reports for the latest status. - -- ... - -Introducing Flang -================= - -Flang is LLVM's Fortran front end and is new for the LLVM 11 release. - -Flang is still a work in progress for this release and is included for -experimentation and feedback. - -Flang status ------------- - -Flang is able to parse a comprehensive subset of the Fortran language -and check it for correctness. Flang is not yet able to generate LLVM IR for -the source code and thus is unable to compile a running binary. - -Flang is able to unparse the input source code into a canonical form and emit -it to allow testing. Flang can also invoke an external Fortran compiler on this -canonical input. - -Flang's parser has comprehensive support for: -- Fortran 2018 -- OpenMP 4.5 -- OpenACC 3.0 - -Major missing features ----------------------- - -- Flang is not supported on Windows platforms. - -Using Flang -=========== - -Usage: ``flang hello.f90 -o hello.bin`` - -Flang will parse the Fortran file ``hello.f90`` then unparse it to a canonical -Fortran source file. Flang will then invoke an external Fortran compiler to -compile this source file and link it, placing the resulting executable -in ``hello.bin``. - -To specify the external Fortran compiler, set the ``F18_FC`` environment -variable to the name of the compiler binary and ensure it is on your ``PATH``. -The default value for ``F18_FC`` is ``gfortran``. - -When invoked with no source input, Flang will wait for input on standard in. -When invoked in this way, Flang performs the same actions as if called with -``-fdebug-measure-parse-tree -funparse`` and does not invoke ``F18_FC``. - -For a full list of options that Flang supports, run ``flang --help``. - -Additional Information -====================== - -Flang's documentation is located in the ``flang/docs/`` directory in -the LLVM monorepo. - -If you have any questions or comments about Flang, please feel free to -contact us via the `mailing -list `_. diff --git a/flang/include/flang/Common/enum-set.h b/flang/include/flang/Common/enum-set.h index a7bdc757a1c97b..5d2eda57aa8197 100644 --- a/flang/include/flang/Common/enum-set.h +++ b/flang/include/flang/Common/enum-set.h @@ -37,8 +37,8 @@ template class EnumSet { constexpr EnumSet() {} constexpr EnumSet(const std::initializer_list &enums) { - for (auto x : enums) { - set(x); + for (auto it{enums.begin()}; it != enums.end(); ++it) { + set(*it); } } constexpr EnumSet(const EnumSet &) = default; diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 41ff9631d1011c..921e6172bf89ba 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -445,6 +445,9 @@ class ParseTreeDumper { NODE(parser, OmpAtomicCapture) NODE(OmpAtomicCapture, Stmt1) NODE(OmpAtomicCapture, Stmt2) + NODE(parser, OmpAtomicMemoryOrderClause) + NODE(parser, OmpAtomicMemoryOrderClauseList) + NODE(parser, OmpAtomicMemoryOrderClausePostList) NODE(parser, OmpAtomicRead) NODE(parser, OmpAtomicUpdate) NODE(parser, OmpAtomicWrite) @@ -464,7 +467,6 @@ class ParseTreeDumper { #include "llvm/Frontend/OpenMP/OMP.cpp.inc" NODE(parser, OmpClauseList) NODE(parser, OmpCriticalDirective) - NODE(OmpCriticalDirective, Hint) NODE(parser, OmpDeclareTargetSpecifier) NODE(parser, OmpDeclareTargetWithClause) NODE(parser, OmpDeclareTargetWithList) @@ -487,6 +489,7 @@ class ParseTreeDumper { NODE(parser, OmpEndCriticalDirective) NODE(parser, OmpEndLoopDirective) NODE(parser, OmpEndSectionsDirective) + NODE(parser, OmpHintExpr) NODE(parser, OmpIfClause) NODE_ENUM(OmpIfClause, DirectiveNameModifier) NODE(parser, OmpLinearClause) @@ -499,10 +502,12 @@ class ParseTreeDumper { NODE(parser, OmpMapType) NODE(OmpMapType, Always) NODE_ENUM(OmpMapType, Type) - NODE(parser, OmpMemoryClause) - NODE_ENUM(OmpMemoryClause, MemoryOrder) - NODE(parser, OmpMemoryClauseList) - NODE(parser, OmpMemoryClausePostList) + NODE(parser, OmpMemoryOrderClause) + static std::string GetNodeName(const llvm::omp::Clause &x) { + return llvm::Twine( + "llvm::omp::Clause = ", llvm::omp::getOpenMPClauseName(x)) + .str(); + } NODE(parser, OmpNowait) NODE(parser, OmpObject) NODE(parser, OmpObjectList) @@ -549,7 +554,6 @@ class ParseTreeDumper { NODE(parser, OpenMPDeclareSimdConstruct) NODE(parser, OpenMPDeclareTargetConstruct) NODE(parser, OmpFlushMemoryClause) - NODE_ENUM(OmpFlushMemoryClause, FlushMemoryOrder) NODE(parser, OpenMPFlushConstruct) NODE(parser, OpenMPLoopConstruct) NODE(parser, OpenMPSimpleStandaloneConstruct) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 166e573b5cec34..a9fb92cf2584b7 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3591,12 +3591,14 @@ struct OpenMPDeclarativeConstruct { u; }; +// HINT(hint-expression) +WRAPPER_CLASS(OmpHintExpr, ConstantExpr); + // 2.13.2 CRITICAL [Name] END CRITICAL [Name] struct OmpCriticalDirective { TUPLE_CLASS_BOILERPLATE(OmpCriticalDirective); - WRAPPER_CLASS(Hint, ConstantExpr); CharBlock source; - std::tuple, std::optional> t; + std::tuple, std::optional> t; }; struct OmpEndCriticalDirective { TUPLE_CLASS_BOILERPLATE(OmpEndCriticalDirective); @@ -3608,44 +3610,56 @@ struct OpenMPCriticalConstruct { std::tuple t; }; -// 2.13.6 atomic -> ATOMIC [seq_cst[,]] atomic-clause [[,]seq_cst] | -// ATOMIC [seq_cst] +// 2.17.7 atomic -> ATOMIC [clause[,]] atomic-clause [[,]clause] | +// ATOMIC [clause] +// clause -> memory-order-clause | HINT(hint-expression) +// memory-order-clause -> SEQ_CST | ACQ_REL | RELEASE | ACQUIRE | RELAXED // atomic-clause -> READ | WRITE | UPDATE | CAPTURE // END ATOMIC EMPTY_CLASS(OmpEndAtomic); -// ATOMIC Memory related clause -struct OmpMemoryClause { - ENUM_CLASS(MemoryOrder, SeqCst) - WRAPPER_CLASS_BOILERPLATE(OmpMemoryClause, MemoryOrder); +// Memory order clause +struct OmpMemoryOrderClause { + WRAPPER_CLASS_BOILERPLATE(OmpMemoryOrderClause, llvm::omp::Clause); CharBlock source; }; -WRAPPER_CLASS(OmpMemoryClauseList, std::list); -WRAPPER_CLASS(OmpMemoryClausePostList, std::list); +// ATOMIC Memory order clause or hint expression +struct OmpAtomicMemoryOrderClause { + UNION_CLASS_BOILERPLATE(OmpAtomicMemoryOrderClause); + std::variant u; +}; + +WRAPPER_CLASS( + OmpAtomicMemoryOrderClauseList, std::list); +WRAPPER_CLASS( + OmpAtomicMemoryOrderClausePostList, std::list); // ATOMIC READ struct OmpAtomicRead { TUPLE_CLASS_BOILERPLATE(OmpAtomicRead); - std::tuple, std::optional> + std::tuple, + std::optional> t; }; // ATOMIC WRITE struct OmpAtomicWrite { TUPLE_CLASS_BOILERPLATE(OmpAtomicWrite); - std::tuple, std::optional> + std::tuple, + std::optional> t; }; // ATOMIC UPDATE struct OmpAtomicUpdate { TUPLE_CLASS_BOILERPLATE(OmpAtomicUpdate); - std::tuple, std::optional> + std::tuple, + std::optional> t; }; @@ -3654,16 +3668,16 @@ struct OmpAtomicCapture { TUPLE_CLASS_BOILERPLATE(OmpAtomicCapture); WRAPPER_CLASS(Stmt1, Statement); WRAPPER_CLASS(Stmt2, Statement); - std::tuple + std::tuple t; }; // ATOMIC struct OmpAtomic { TUPLE_CLASS_BOILERPLATE(OmpAtomic); - std::tuple, - std::optional> + std::tuple, std::optional> t; }; @@ -3707,8 +3721,7 @@ struct OpenMPCancelConstruct { // release // acquire struct OmpFlushMemoryClause { - ENUM_CLASS(FlushMemoryOrder, AcqRel, Release, Acquire) - WRAPPER_CLASS_BOILERPLATE(OmpFlushMemoryClause, FlushMemoryOrder); + WRAPPER_CLASS_BOILERPLATE(OmpFlushMemoryClause, llvm::omp::Clause); CharBlock source; }; diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index cd5ee0de556dc0..a7f4a1ae492c78 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -300,9 +300,9 @@ TYPE_PARSER(sourced(construct(verbatim("CANCEL"_tok), // release // acquire TYPE_PARSER(sourced(construct( - "ACQ_REL" >> pure(OmpFlushMemoryClause::FlushMemoryOrder::AcqRel) || - "RELEASE" >> pure(OmpFlushMemoryClause::FlushMemoryOrder::Release) || - "ACQUIRE" >> pure(OmpFlushMemoryClause::FlushMemoryOrder::Acquire)))) + "ACQ_REL" >> pure(llvm::omp::Clause::OMPC_acq_rel) || + "RELEASE" >> pure(llvm::omp::Clause::OMPC_release) || + "ACQUIRE" >> pure(llvm::omp::Clause::OMPC_acquire)))) TYPE_PARSER(sourced(construct(verbatim("FLUSH"_tok), maybe(Parser{}), @@ -384,51 +384,74 @@ TYPE_PARSER(construct(Parser{}) || construct(Parser{}, parenthesized(optionalList(actualArgSpec)))))) -// 2.13.6 ATOMIC [seq_cst[,]] atomic-clause [[,]seq_cst] | ATOMIC [seq_cst] -// atomic-clause -> READ | WRITE | UPDATE | CAPTURE +// Hint Expression => HINT(hint-expression) +TYPE_PARSER("HINT" >> construct(parenthesized(constantExpr))) + +// 2.17.7 atomic -> ATOMIC [clause [,]] atomic-clause [[,] clause] | +// ATOMIC [clause] +// clause -> memory-order-clause | HINT(hint-expression) +// memory-order-clause -> SEQ_CST | ACQ_REL | RELEASE | ACQUIRE | RELAXED +// atomic-clause -> READ | WRITE | UPDATE | CAPTURE // OMP END ATOMIC TYPE_PARSER(construct(startOmpLine >> "END ATOMIC"_tok)) -// ATOMIC Memory related clause -TYPE_PARSER(sourced(construct( - "SEQ_CST" >> pure(OmpMemoryClause::MemoryOrder::SeqCst)))) +// Memory order clause +TYPE_PARSER(sourced(construct( + "SEQ_CST" >> pure(llvm::omp::Clause::OMPC_seq_cst) || + "ACQ_REL" >> pure(llvm::omp::Clause::OMPC_acq_rel) || + "RELEASE" >> pure(llvm::omp::Clause::OMPC_release) || + "ACQUIRE" >> pure(llvm::omp::Clause::OMPC_acquire) || + "RELAXED" >> pure(llvm::omp::Clause::OMPC_relaxed)))) -// ATOMIC Memory Clause List -TYPE_PARSER(construct( - many(maybe(","_tok) >> Parser{}))) +// ATOMIC Memory order clause or Hint expression +TYPE_PARSER( + construct(Parser{}) || + construct(Parser{})) -TYPE_PARSER(construct( - many(maybe(","_tok) >> Parser{}))) +// ATOMIC Memory order Clause List +TYPE_PARSER(construct( + many(maybe(","_tok) >> Parser{}))) -// OMP [SEQ_CST] ATOMIC READ [SEQ_CST] -TYPE_PARSER("ATOMIC" >> - construct(Parser{} / maybe(","_tok), - verbatim("READ"_tok), Parser{} / endOmpLine, - statement(assignmentStmt), maybe(Parser{} / endOmpLine))) +TYPE_PARSER(construct( + many(maybe(","_tok) >> Parser{}))) -// OMP ATOMIC [SEQ_CST] CAPTURE [SEQ_CST] +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] READ [MEMORY-ORDER-CLAUSE-LIST] TYPE_PARSER("ATOMIC" >> - construct(Parser{} / maybe(","_tok), - verbatim("CAPTURE"_tok), Parser{} / endOmpLine, - statement(assignmentStmt), statement(assignmentStmt), - Parser{} / endOmpLine)) + construct( + Parser{} / maybe(","_tok), + verbatim("READ"_tok), + Parser{} / endOmpLine, + statement(assignmentStmt), maybe(Parser{} / endOmpLine))) -// OMP ATOMIC [SEQ_CST] UPDATE [SEQ_CST] +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] CAPTURE [MEMORY-ORDER-CLAUSE-LIST] +TYPE_PARSER( + "ATOMIC" >> construct( + Parser{} / maybe(","_tok), + verbatim("CAPTURE"_tok), + Parser{} / endOmpLine, + statement(assignmentStmt), statement(assignmentStmt), + Parser{} / endOmpLine)) + +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] UPDATE [MEMORY-ORDER-CLAUSE-LIST] TYPE_PARSER("ATOMIC" >> - construct(Parser{} / maybe(","_tok), - verbatim("UPDATE"_tok), Parser{} / endOmpLine, + construct( + Parser{} / maybe(","_tok), + verbatim("UPDATE"_tok), + Parser{} / endOmpLine, statement(assignmentStmt), maybe(Parser{} / endOmpLine))) -// OMP ATOMIC [SEQ_CST] +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] TYPE_PARSER(construct(verbatim("ATOMIC"_tok), - Parser{} / endOmpLine, statement(assignmentStmt), - maybe(Parser{} / endOmpLine))) + Parser{} / endOmpLine, + statement(assignmentStmt), maybe(Parser{} / endOmpLine))) -// ATOMIC [SEQ_CST] WRITE [SEQ_CST] +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] WRITE [MEMORY-ORDER-CLAUSE-LIST] TYPE_PARSER("ATOMIC" >> - construct(Parser{} / maybe(","_tok), - verbatim("WRITE"_tok), Parser{} / endOmpLine, + construct( + Parser{} / maybe(","_tok), + verbatim("WRITE"_tok), + Parser{} / endOmpLine, statement(assignmentStmt), maybe(Parser{} / endOmpLine))) // Atomic Construct @@ -444,9 +467,7 @@ TYPE_PARSER(startOmpLine >> verbatim("END CRITICAL"_tok), maybe(parenthesized(name)))) / endOmpLine) TYPE_PARSER(sourced(construct(verbatim("CRITICAL"_tok), - maybe(parenthesized(name)), - maybe("HINT" >> construct( - parenthesized(constantExpr))))) / + maybe(parenthesized(name)), maybe(Parser{}))) / endOmpLine) TYPE_PARSER(construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index e26795d0825bb6..ab94aa2e00c262 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2222,19 +2222,36 @@ class UnparseVisitor { break; } } - void Unparse(const OmpMemoryClause &x) { + void Unparse(const OmpHintExpr &x) { Word("HINT("), Walk(x.v), Put(')'); } + void Unparse(const OmpMemoryOrderClause &x) { switch (x.v) { - case OmpMemoryClause::MemoryOrder::SeqCst: + case llvm::omp::Clause::OMPC_seq_cst: Word("SEQ_CST"); break; + case llvm::omp::Clause::OMPC_acq_rel: + Word("ACQ_REL"); + break; + case llvm::omp::Clause::OMPC_release: + Word("RELEASE"); + break; + case llvm::omp::Clause::OMPC_acquire: + Word("ACQUIRE"); + break; + case llvm::omp::Clause::OMPC_relaxed: + Word("RELAXED"); + break; + default: + break; } } - void Unparse(const OmpMemoryClauseList &x) { Walk(" ", x.v, " "); } - void Unparse(const OmpMemoryClausePostList &x) { Walk(" ", x.v, " "); } + void Unparse(const OmpAtomicMemoryOrderClauseList &x) { Walk(" ", x.v, " "); } + void Unparse(const OmpAtomicMemoryOrderClausePostList &x) { + Walk(" ", x.v, " "); + } void Unparse(const OmpAtomic &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get>(x.t)); @@ -2245,9 +2262,9 @@ class UnparseVisitor { void Unparse(const OmpAtomicCapture &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Word(" CAPTURE"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get(x.t)); @@ -2260,9 +2277,9 @@ class UnparseVisitor { void Unparse(const OmpAtomicRead &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Word(" READ"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get>(x.t)); @@ -2273,9 +2290,9 @@ class UnparseVisitor { void Unparse(const OmpAtomicUpdate &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Word(" UPDATE"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get>(x.t)); @@ -2286,9 +2303,9 @@ class UnparseVisitor { void Unparse(const OmpAtomicWrite &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Word(" WRITE"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get>(x.t)); @@ -2300,8 +2317,7 @@ class UnparseVisitor { BeginOpenMP(); Word("!$OMP CRITICAL"); Walk(" (", std::get>(x.t), ")"); - Walk(" HINT(", std::get>(x.t), - ")"); + Walk(std::get>(x.t)); Put("\n"); EndOpenMP(); } @@ -2431,15 +2447,17 @@ class UnparseVisitor { } void Unparse(const OmpFlushMemoryClause &x) { switch (x.v) { - case OmpFlushMemoryClause::FlushMemoryOrder::AcqRel: + case llvm::omp::Clause::OMPC_acq_rel: Word("ACQ_REL "); break; - case OmpFlushMemoryClause::FlushMemoryOrder::Release: + case llvm::omp::Clause::OMPC_release: Word("RELEASE "); break; - case OmpFlushMemoryClause::FlushMemoryOrder::Acquire: + case llvm::omp::Clause::OMPC_acquire: Word("ACQUIRE "); break; + default: + break; } } void Unparse(const OpenMPFlushConstruct &x) { diff --git a/flang/test/Semantics/omp-atomic.f90 b/flang/test/Semantics/omp-atomic.f90 index d5cb87aaba32da..8d3f95a770454f 100644 --- a/flang/test/Semantics/omp-atomic.f90 +++ b/flang/test/Semantics/omp-atomic.f90 @@ -1,5 +1,5 @@ ! RUN: %S/test_errors.sh %s %t %f18 -fopenmp - +use omp_lib ! Check OpenMP 2.13.6 atomic Construct a = 1.0 @@ -11,12 +11,32 @@ a = b !$omp end atomic + !$omp atomic read acquire hint(OMP_LOCK_HINT_CONTENDED) + a = b + + !$omp atomic release hint(OMP_LOCK_HINT_UNCONTENDED) write + a = b + !$omp atomic capture seq_cst b = a a = a + 1 !$omp end atomic + !$omp atomic hint(1) acq_rel capture + b = a + a = a + 1 + !$omp end atomic + + !ERROR: expected end of line + !ERROR: expected end of line + !$omp atomic read write + a = a + 1 + !$omp atomic a = a + 1 + + !$omp atomic relaxed + a = a + 1 + !$omp end parallel end diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index ea0aa0a259a22c..8e7df5d19610e6 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -41,33 +41,19 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUIL endif() if (LIBCXX_STANDALONE_BUILD) - if(CMAKE_VERSION VERSION_LESS 3.12) - include(FindPythonInterp) - if( NOT PYTHONINTERP_FOUND ) - message(WARNING "Failed to find python interpreter. " - "The libc++ test suite will be disabled.") - set(LLVM_INCLUDE_TESTS OFF) - else() - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() - else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + # Treat python2 as python3 + add_executable(Python3::Interpreter IMPORTED) + set_target_properties(Python3::Interpreter PROPERTIES + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() endif() @@ -110,7 +96,7 @@ option(LIBCXX_INCLUDE_TESTS "Build the libc++ tests." ${LLVM_INCLUDE_TESTS}) option(LIBCXX_ENABLE_PARALLEL_ALGORITHMS "Enable the parallel algorithms library. This requires the PSTL to be available." OFF) option(LIBCXX_TEST_GDB_PRETTY_PRINTERS "Test gdb pretty printers." OFF) set(LIBCXX_TEST_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/test/configs/legacy.cfg.in" CACHE STRING - "The Lit testing configuration to use when running the tests." FORCE) # TODO: Stop using 'FORCE' once we can assume all CMake build dirs have been re-generated + "The Lit testing configuration to use when running the tests.") set(LIBCXX_TEST_PARAMS "" CACHE STRING "A list of parameters to run the Lit test suite with.") diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index d54ee8fa32913e..82b696f76eec78 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -148,6 +148,8 @@ def configure(self): self.lit_config ) + self.lit_config.note("All available features: {}".format(self.config.available_features)) + def print_config_info(self): if self.cxx.use_modules: self.lit_config.note('Using modules flags: %s' % diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp index e6f2609d679b93..cc298c9bbb8386 100644 --- a/libunwind/src/AddressSpace.hpp +++ b/libunwind/src/AddressSpace.hpp @@ -98,22 +98,15 @@ extern char __eh_frame_hdr_end; extern char __exidx_start; extern char __exidx_end; -#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - -// ELF-based systems may use dl_iterate_phdr() to access sections -// containing unwinding information. The ElfW() macro for pointer-size -// independent ELF header traversal is not provided by on some -// systems (e.g., FreeBSD). On these systems the data structures are -// just called Elf_XXX. Define ElfW() locally. -#ifndef _WIN32 -#include -#else +#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32) + #include #include -#endif -#if !defined(ElfW) -#define ElfW(type) Elf_##type -#endif + +#elif defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) || \ + defined(_LIBUNWIND_USE_DL_UNWIND_FIND_EXIDX) + +#include #endif @@ -351,23 +344,14 @@ LocalAddressSpace::getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, return result; } -#ifdef __APPLE__ -#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL) -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) -#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32) -#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32) -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__) -// Code inside findUnwindSections handles all these cases. -// -// Although the above ifdef chain is ugly, there doesn't seem to be a cleaner -// way to handle it. The generalized boolean expression is: -// -// A OR (B AND C) OR (D AND C) OR (B AND E) OR (F AND E) OR (D AND G) -// -// Running it through various boolean expression simplifiers gives expressions -// that don't help at all. -#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +#if defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) +// The ElfW() macro for pointer-size independent ELF header traversal is not +// provided by on some systems (e.g., FreeBSD). On these systems the +// data structures are just called Elf_XXX. Define ElfW() locally. +#if !defined(ElfW) + #define ElfW(type) Elf_##type +#endif #if !defined(Elf_Half) typedef ElfW(Half) Elf_Half; #endif @@ -482,9 +466,7 @@ static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, return 0; } -#else // defined(LIBUNWIND_SUPPORT_DWARF_UNWIND) -// Given all the #ifdef's above, the code here is for -// defined(LIBUNWIND_ARM_EHABI) +#elif defined(_LIBUNWIND_ARM_EHABI) static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) { @@ -516,8 +498,9 @@ static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, } return found_obj && found_hdr; } -#endif // defined(LIBUNWIND_SUPPORT_DWARF_UNWIND) -#endif // defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + +#endif +#endif // defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, @@ -601,16 +584,14 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, (void)targetAddr; (void)info; return true; -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__) - // For ARM EHABI, Bionic didn't implement dl_iterate_phdr until API 21. After - // API 21, dl_iterate_phdr exists, but dl_unwind_find_exidx is much faster. +#elif defined(_LIBUNWIND_USE_DL_UNWIND_FIND_EXIDX) int length = 0; info.arm_section = (uintptr_t)dl_unwind_find_exidx((_Unwind_Ptr)targetAddr, &length); info.arm_section_length = (uintptr_t)length * sizeof(EHABIIndexEntry); if (info.arm_section && info.arm_section_length) return true; -#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +#elif defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) dl_iterate_cb_data cb_data = {this, &info, targetAddr}; int found = dl_iterate_phdr(findUnwindSectionsByPhdr, &cb_data); return static_cast(found); diff --git a/libunwind/src/config.h b/libunwind/src/config.h index fd177dd7338c15..0885dccda07eba 100644 --- a/libunwind/src/config.h +++ b/libunwind/src/config.h @@ -34,7 +34,18 @@ #else #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 #endif +#elif defined(_LIBUNWIND_IS_BAREMETAL) + #if !defined(_LIBUNWIND_ARM_EHABI) + #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 + #define _LIBUNWIND_SUPPORT_DWARF_INDEX 1 + #endif +#elif defined(__BIONIC__) && defined(_LIBUNWIND_ARM_EHABI) + // For ARM EHABI, Bionic didn't implement dl_iterate_phdr until API 21. After + // API 21, dl_iterate_phdr exists, but dl_unwind_find_exidx is much faster. + #define _LIBUNWIND_USE_DL_UNWIND_FIND_EXIDX 1 #else + // Assume an ELF system with a dl_iterate_phdr function. + #define _LIBUNWIND_USE_DL_ITERATE_PHDR 1 #if !defined(_LIBUNWIND_ARM_EHABI) #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 #define _LIBUNWIND_SUPPORT_DWARF_INDEX 1 diff --git a/libunwind/test/frameheadercache_test.pass.cpp b/libunwind/test/frameheadercache_test.pass.cpp index ebbc00464e0727..7f2d8e22b9f578 100644 --- a/libunwind/test/frameheadercache_test.pass.cpp +++ b/libunwind/test/frameheadercache_test.pass.cpp @@ -3,27 +3,10 @@ #include "../src/config.h" // Only run this test under supported configurations. -// The frame header cache should work fine for other architectures, -// but the #ifdefs end up being even more complicated than this. -#if defined(__x86_64__) && defined(_LIBUNWIND_USE_FRAME_HEADER_CACHE) - -// This #if chain is ugly, but see the comments in AddressSpace.hpp for -// the reasoning. - -#ifdef __APPLE__ -int main() { return 0; } -#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL) -int main() { return 0; } -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) -int main() { return 0; } -#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32) -int main() { return 0; } -#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32) -int main() { return 0; } -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__) -int main() { return 0; } -#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +#if defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) && \ + defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) && \ + defined(_LIBUNWIND_USE_FRAME_HEADER_CACHE) #include #include @@ -84,9 +67,7 @@ int main() { abort(); return 0; } -#else -int main() { return 0; } -#endif + #else int main() { return 0;} #endif diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt index 7dae682cdef07a..34a7a68da42c50 100644 --- a/lld/CMakeLists.txt +++ b/lld/CMakeLists.txt @@ -57,38 +57,19 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) include(CheckAtomic) if(LLVM_INCLUDE_TESTS) - if(CMAKE_VERSION VERSION_LESS 3.12) - include(FindPythonInterp) - if(NOT PYTHONINTERP_FOUND) - message(FATAL_ERROR - "Unable to find Python interpreter, required for testing. - - Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") - endif() - - if(${PYTHON_VERSION_STRING} VERSION_LESS 2.7) - message(FATAL_ERROR "Python 2.7 or newer is required") + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() - add_executable(Python3::Interpeter IMPORTED) + # Treat python2 as python3 + add_executable(Python3::Interpreter IMPORTED) set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) - else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() # Check prebuilt llvm/utils. diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index 28e13e8c1234bf..af6c08c2158165 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -152,9 +152,9 @@ void MarkLive::scanEhFrameSection(EhInputSection &eh, // a LSDA. We only need to keep the LSDA alive, so ignore anything that // points to executable sections. uint64_t pieceEnd = piece.inputOff + piece.size; - for (size_t j = firstRelI, end2 = rels.size(); j < end2; ++j) - if (rels[j].r_offset < pieceEnd) - resolveReloc(eh, rels[j], true); + for (size_t j = firstRelI, end2 = rels.size(); + j < end2 && rels[j].r_offset < pieceEnd; ++j) + resolveReloc(eh, rels[j], true); } } diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index b26817b66e2711..5ef37e9ecb895f 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1346,9 +1346,11 @@ static DenseMap buildSectionOrder() { addSym(*sym); for (InputFile *file : objectFiles) - for (Symbol *sym : file->getSymbols()) - if (sym->isLocal()) - addSym(*sym); + for (Symbol *sym : file->getSymbols()) { + if (!sym->isLocal()) + break; + addSym(*sym); + } if (config->warnSymbolOrdering) for (auto orderEntry : symbolOrder) diff --git a/lldb/docs/resources/build.rst b/lldb/docs/resources/build.rst index c1cb6ec1a9343a..b5c1fb8cb00124 100644 --- a/lldb/docs/resources/build.rst +++ b/lldb/docs/resources/build.rst @@ -71,7 +71,7 @@ commands below. :: > yum install libedit-devel libxml2-devel ncurses-devel python-devel swig - > sudo apt-get install build-essential subversion swig python2.7-dev libedit-dev libncurses5-dev + > sudo apt-get install build-essential subversion swig python3-dev libedit-dev libncurses5-dev > pkg install swig python > pkgin install swig python27 cmake ninja-build > brew install swig cmake ninja diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h b/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h index 769b18d54cedd6..b70ec223df4dff 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h @@ -359,15 +359,12 @@ class SemaSourceWithPriorities : public clang::ExternalSemaSource { } void CompleteType(clang::TagDecl *Tag) override { - while (!Tag->isCompleteDefinition()) - for (size_t i = 0; i < Sources.size(); ++i) { - // FIXME: We are technically supposed to loop here too until - // Tag->isCompleteDefinition() is true, but if our low quality source - // is failing to complete the tag this code will deadlock. - Sources[i]->CompleteType(Tag); - if (Tag->isCompleteDefinition()) - break; - } + for (clang::ExternalSemaSource *S : Sources) { + S->CompleteType(Tag); + // Stop after the first source completed the type. + if (Tag->isCompleteDefinition()) + break; + } } void CompleteType(clang::ObjCInterfaceDecl *Class) override { diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp index 73042c205a5ae7..e2601a059bb77f 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp @@ -216,7 +216,12 @@ namespace { /// imported while completing the original Decls). class CompleteTagDeclsScope : public ClangASTImporter::NewDeclListener { ClangASTImporter::ImporterDelegateSP m_delegate; - llvm::SmallVector m_decls_to_complete; + /// List of declarations in the target context that need to be completed. + /// Every declaration should only be completed once and therefore should only + /// be once in this list. + llvm::SetVector m_decls_to_complete; + /// Set of declarations that already were successfully completed (not just + /// added to m_decls_to_complete). llvm::SmallPtrSet m_decls_already_completed; clang::ASTContext *m_dst_ctx; clang::ASTContext *m_src_ctx; @@ -244,6 +249,9 @@ class CompleteTagDeclsScope : public ClangASTImporter::NewDeclListener { NamedDecl *decl = m_decls_to_complete.pop_back_val(); m_decls_already_completed.insert(decl); + // The decl that should be completed has to be imported into the target + // context from some other context. + assert(to_context_md->hasOrigin(decl)); // We should only complete decls coming from the source context. assert(to_context_md->getOrigin(decl).ctx == m_src_ctx); @@ -287,7 +295,8 @@ class CompleteTagDeclsScope : public ClangASTImporter::NewDeclListener { // Check if we already completed this type. if (m_decls_already_completed.count(to_named_decl) != 0) return; - m_decls_to_complete.push_back(to_named_decl); + // Queue this type to be completed. + m_decls_to_complete.insert(to_named_decl); } }; } // namespace diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp b/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp index 2f8cf1846ee774..38d9f8d1e4b805 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp @@ -34,6 +34,7 @@ CxxModuleHandler::CxxModuleHandler(ASTImporter &importer, ASTContext *target) "weak_ptr", // utility "allocator", + "pair", }; m_supported_templates.insert(supported_names.begin(), supported_names.end()); } diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/Makefile b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/Makefile new file mode 100644 index 00000000000000..4915cdae876413 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/Makefile @@ -0,0 +1,9 @@ +# We don't have any standard include directories, so we can't +# parse the test_common.h header we usually inject as it includes +# system headers. +NO_TEST_COMMON_H := 1 + +CXXFLAGS_EXTRAS = -I $(SRCDIR)/root/usr/include/c++/v1/ -I $(SRCDIR)/root/usr/include/ -nostdinc -nostdinc++ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/TestForwardDeclFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/TestForwardDeclFromStdModule.py new file mode 100644 index 00000000000000..48459abb926686 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/TestForwardDeclFromStdModule.py @@ -0,0 +1,39 @@ +""" +Tests forward declarations coming from the `std` module. +""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import os + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + # We only emulate a fake libc++ in this test and don't use the real libc++, + # but we still add the libc++ category so that this test is only run in + # test configurations where libc++ is actually supposed to be tested. + @add_test_categories(["libc++"]) + @skipIfRemote + @skipIf(compiler=no_match("clang")) + def test(self): + self.build() + + sysroot = os.path.join(os.getcwd(), "root") + + # Set the sysroot where our dummy libc++ exists. + self.runCmd("platform select --sysroot '" + sysroot + "' host", CURRENT_EXECUTABLE_SET) + + lldbutil.run_to_source_breakpoint(self, + "// Set break point at this line.", lldb.SBFileSpec("main.cpp")) + + self.runCmd("settings set target.import-std-module true") + + # Print the dummy `std::vector`. It only has the dummy member in it + # so the standard `std::vector` formatter can't format it. Instead use + # the raw output so LLDB has to show the member variable. + # Both `std::vector` and the type of the member have forward + # declarations before their definitions. + self.expect("expr --raw -- v", + substrs=['(std::__1::vector) $0 = {', 'f = 0x', '}']) diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/main.cpp b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/main.cpp new file mode 100644 index 00000000000000..a0b02d5c68141b --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/main.cpp @@ -0,0 +1,8 @@ +#include + +int main(int argc, char **argv) { + // Makes sure we have the mock libc headers in the debug information. + libc_struct s; + std::vector v; + return 0; // Set break point at this line. +} diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/module.modulemap b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/module.modulemap new file mode 100644 index 00000000000000..f149be7b7d21ac --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/module.modulemap @@ -0,0 +1,3 @@ +module std { + module "vector" { header "vector" export * } +} diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/vector b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/vector new file mode 100644 index 00000000000000..c2d77aab071109 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/vector @@ -0,0 +1,14 @@ +#include "libc_header.h" + +namespace std { + inline namespace __1 { + // A forward decl of `vector`. + template class vector; + // Pretend to be a std::vector template we need to instantiate in LLDB + // when import-std-module is enabled. + template + struct vector { class F; F *f; }; + // The definition of our forward declared nested class. + template class vector::F { int x; }; + } +} diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/libc_header.h b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/libc_header.h new file mode 100644 index 00000000000000..47525c9db3467f --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/libc_header.h @@ -0,0 +1 @@ +struct libc_struct {}; diff --git a/lldb/test/API/commands/expression/import-std-module/pair/Makefile b/lldb/test/API/commands/expression/import-std-module/pair/Makefile new file mode 100644 index 00000000000000..f938f7428468ab --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/pair/Makefile @@ -0,0 +1,3 @@ +USE_LIBCPP := 1 +CXX_SOURCES := main.cpp +include Makefile.rules diff --git a/lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py new file mode 100644 index 00000000000000..4f5b1ea8028b07 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py @@ -0,0 +1,25 @@ +""" +Test basic std::pair functionality. +""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @add_test_categories(["libc++"]) + @skipIf(compiler=no_match("clang")) + def test(self): + self.build() + + lldbutil.run_to_source_breakpoint(self, + "// Set break point at this line.", lldb.SBFileSpec("main.cpp")) + + self.runCmd("settings set target.import-std-module true") + + self.expect_expr("pair_int.first", result_type="int", result_value="1234") + self.expect_expr("pair_int.second", result_type="int", result_value="5678") + self.expect("expr pair_int", substrs=['first = 1234, second = 5678']) \ No newline at end of file diff --git a/lldb/test/API/commands/expression/import-std-module/pair/main.cpp b/lldb/test/API/commands/expression/import-std-module/pair/main.cpp new file mode 100644 index 00000000000000..1363698f1fc7f6 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/pair/main.cpp @@ -0,0 +1,6 @@ +#include + +int main(int argc, char **argv) { + std::pair pair_int(1234, 5678); + return 0; // Set break point at this line. +} diff --git a/lldb/test/API/lang/c/record_decl_in_expr/TestRecordDeclInExpr.py b/lldb/test/API/lang/c/record_decl_in_expr/TestRecordDeclInExpr.py new file mode 100644 index 00000000000000..16bf098dce8f31 --- /dev/null +++ b/lldb/test/API/lang/c/record_decl_in_expr/TestRecordDeclInExpr.py @@ -0,0 +1,34 @@ +""" +Tests declaring RecordDecls in non-top-level expressions. +""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @no_debug_info_test + def test_fwd_decl(self): + # Declare a forward decl and import it to the scratch AST. + self.expect_expr("struct S; S *s = nullptr; s", result_type="S *") + + @no_debug_info_test + def test_struct(self): + # Declare a struct and import it to the scratch AST. + self.expect("expr struct S {}; S s; s", substrs=["= {}"]) + + @no_debug_info_test + def test_struct_with_fwd_decl(self): + # Import the forward decl to the scratch AST. + self.expect_expr("struct S; S *s = nullptr; s", result_type="S *") + # Merge the definition into the scratch AST. + self.expect("expr struct S {}; S s; s", substrs=["= {}"]) + + @no_debug_info_test + def test_struct_with_fwd_decl_same_expr(self): + # Test both a forward decl and a definition in one expression and + # import them into the scratch AST. + self.expect("expr struct S; struct S{}; S s; s", substrs=["= {}"]) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 4a7639c51121d9..410103b0bfd687 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -696,38 +696,19 @@ option(LLVM_ENABLE_PLUGINS "Enable plugin support" ${LLVM_ENABLE_PLUGINS_default include(HandleLLVMOptions) -if(CMAKE_VERSION VERSION_LESS 3.12) - include(FindPythonInterp) - if( NOT PYTHONINTERP_FOUND ) - message(FATAL_ERROR - "Unable to find Python interpreter, required for builds and testing. - - Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") - endif() - - if( ${PYTHON_VERSION_STRING} VERSION_LESS 2.7 ) - message(FATAL_ERROR "Python 2.7 or newer is required") +find_package(Python3 COMPONENTS Interpreter) +if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() + # Treat python2 as python3 add_executable(Python3::Interpreter IMPORTED) set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) -else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() ###### diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 59897806c37a5b..47ce9fa10d908a 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -69,10 +69,13 @@ Changes to building LLVM Changes to TableGen ------------------- +* The new "TableGen Programmer's Reference" replaces the "TableGen Language + Introduction" and "TableGen Language Reference" documents. + * The syntax for specifying an integer range in a range list has changed. The old syntax used a hyphen in the range (e.g., ``{0-9}``). The new syntax - uses the "`...`" range punctuator (e.g., ``{0...9}``). The hyphen syntax - is deprecated. The "TableGen Language Reference" document has been updated. + uses the "`...`" range punctuation (e.g., ``{0...9}``). The hyphen syntax + is deprecated. Changes to the ARM Backend -------------------------- diff --git a/llvm/docs/TableGen/BackEnds.rst b/llvm/docs/TableGen/BackEnds.rst index 8b313383566894..a93f2ace78808e 100644 --- a/llvm/docs/TableGen/BackEnds.rst +++ b/llvm/docs/TableGen/BackEnds.rst @@ -226,16 +226,14 @@ SearchableTables **Purpose**: Generate custom searchable tables. -**Output**: Enums, global tables and lookup helper functions. +**Output**: Enums, global tables, and lookup helper functions. **Usage**: This backend allows generating free-form, target-specific tables from TableGen records. The ARM and AArch64 targets use this backend to generate tables of system registers; the AMDGPU target uses it to generate meta-data about complex image and memory buffer instructions. -More documentation is available in ``include/llvm/TableGen/SearchableTable.td``, -which also contains the definitions of TableGen classes which must be -instantiated in order to define the enums and tables emitted by this backend. +See `SearchableTables Reference`_ for a detailed description. CTags ----- @@ -438,6 +436,381 @@ used for documenting user-facing attributes. General BackEnds ================ +SearchableTables Reference +-------------------------- + +A TableGen include file, ``SearchableTable.td``, provides classes for +generating C++ searchable tables. These tables are described in the +following sections. To generate the C++ code, run ``llvm-tblgen`` with the +``--gen-searchable-tables`` option, which invokes the backend that generates +the tables from the records you provide. + +Each of the data structures generated for searchable tables is guarded by an +``#ifdef``. This allows you to include the generated ``.inc`` file and select only +certain data structures for inclusion. The examples below show the macro +names used in these guards. + +Generic Enumerated Types +~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``GenericEnum`` class makes it easy to define a C++ enumerated type and +the enumerated *elements* of that type. To define the type, define a record +whose parent class is ``GenericEnum`` and whose name is the desired enum +type. This class provides three fields, which you can set in the record +using the ``let`` statement. + +* ``string FilterClass``. The enum type will have one element for each record + that derives from this class. These records are collected to assemble the + complete set of elements. + +* ``string NameField``. The name of a field *in the collected records* that specifies + the name of the element. If a record has no such field, the record's + name will be used. + +* ``string ValueField``. The name of a field *in the collected records* that + specifies the numerical value of the element. If a record has no such + field, it will be assigned an integer value. Values are assigned in + alphabetical order starting with 0. + +Here is an example where the values of the elements are specified +explicitly, as a template argument to the ``BEntry`` class. The resulting +C++ code is shown. + +.. code-block:: text + + def BValues : GenericEnum { + let FilterClass = "BEntry"; + let NameField = "Name"; + let ValueField = "Encoding"; + } + + class BEntry enc> { + string Name = NAME; + bits<16> Encoding = enc; + } + + def BFoo : BEntry<0xac>; + def BBar : BEntry<0x14>; + def BZoo : BEntry<0x80>; + def BSnork : BEntry<0x4c>; + +.. code-block:: text + + #ifdef GET_BValues_DECL + enum BValues { + BBar = 20, + BFoo = 172, + BSnork = 76, + BZoo = 128, + }; + #endif + +In the following example, the values of the elements are assigned +automatically. Note that values are assigned from 0, in alphabetical order +by element name. + +.. code-block:: text + + def CEnum : GenericEnum { + let FilterClass = "CEnum"; + } + + class CEnum; + + def CFoo : CEnum; + def CBar : CEnum; + def CBaz : CEnum; + +.. code-block:: text + + #ifdef GET_CEnum_DECL + enum CEnum { + CBar = 0, + CBaz = 1, + CFoo = 2, + }; + #endif + + +Generic Tables +~~~~~~~~~~~~~~ + +The ``GenericTable`` class is used to define a searchable generic table. +TableGen produces C++ code to define the table entries and also produces +the declaration and definition of a function to search the table based on a +primary key. To define the table, define a record whose parent class is +``GenericTable`` and whose name is the name of the global table of entries. +This class provides six fields. + +* ``string FilterClass``. The table will have one entry for each record + that derives from this class. + +* ``string CppTypeName``. The name of the C++ struct/class type of the + table that holds the entries. If unspecified, the ``FilterClass`` name is + used. + +* ``list Fields``. A list of the names of the fields in the + collected records that contain the data for the table entries. The order of + this list determines the order of the values in the C++ initializers. See + below for information about the types of these fields. + +* ``list PrimaryKey``. The list of fields that make up the + primary key. + +* ``string PrimaryKeyName``. The name of the generated C++ function + that performs a lookup on the primary key. + +* ``bit PrimaryKeyEarlyOut``. See the third example below. + +TableGen attempts to deduce the type of each of the table fields. It can +deduce ``bit``, ``bits``, ``string``, ``Intrinsic``, and ``Instruction``. +These can be used in the primary key. TableGen also deduces ``code``, but it +cannot be used in the primary key. Any other field types must be specified +explicitly; this is done as shown in the second example below. Such fields +cannot be used in the primary key. + +Here is an example where TableGen can deduce the field types. Note that the +table entry records are anonymous; the names of entry records are +irrelevant. + +.. code-block:: text + + def ATable : GenericTable { + let FilterClass = "AEntry"; + let Fields = ["Str", "Val1", "Val2"]; + let PrimaryKey = ["Val1", "Val2"]; + let PrimaryKeyName = "lookupATableByValues"; + } + + class AEntry { + string Str = str; + bits<8> Val1 = val1; + bits<10> Val2 = val2; + } + + def : AEntry<"Bob", 5, 3>; + def : AEntry<"Carol", 2, 6>; + def : AEntry<"Ted", 4, 4>; + def : AEntry<"Alice", 4, 5>; + def : AEntry<"Costa", 2, 1>; + +Here is the generated C++ code. The declaration of ``lookupATableByValues`` +is guarded by ``GET_ATable_DECL``, while the definitions are guarded by +``GET_ATable_IMPL``. + +.. code-block:: text + + #ifdef GET_ATable_DECL + const AEntry *lookupATableByValues(uint8_t Val1, uint16_t Val2); + #endif + + #ifdef GET_ATable_IMPL + constexpr AEntry ATable[] = { + { "Costa", 0x2, 0x1 }, // 0 + { "Carol", 0x2, 0x6 }, // 1 + { "Ted", 0x4, 0x4 }, // 2 + { "Alice", 0x4, 0x5 }, // 3 + { "Bob", 0x5, 0x3 }, // 4 + }; + + const AEntry *lookupATableByValues(uint8_t Val1, uint16_t Val2) { + struct KeyType { + uint8_t Val1; + uint16_t Val2; + }; + KeyType Key = { Val1, Val2 }; + auto Table = makeArrayRef(ATable); + auto Idx = std::lower_bound(Table.begin(), Table.end(), Key, + [](const AEntry &LHS, const KeyType &RHS) { + if (LHS.Val1 < RHS.Val1) + return true; + if (LHS.Val1 > RHS.Val1) + return false; + if (LHS.Val2 < RHS.Val2) + return true; + if (LHS.Val2 > RHS.Val2) + return false; + return false; + }); + + if (Idx == Table.end() || + Key.Val1 != Idx->Val1 || + Key.Val2 != Idx->Val2) + return nullptr; + return &*Idx; + } + #endif + +The table entries in ``ATable`` are sorted in order by ``Val1``, and within +each of those values, by ``Val2``. This allows a binary search of the table, +which is performed in the lookup function by ``std::lower_bound``. The +lookup function returns a reference to the found table entry, or the null +pointer if no entry is found. + +This example includes a field whose type TableGen cannot deduce. The ``Kind`` +field uses the enumerated type ``CEnum`` defined above. To inform TableGen +of the type, the class derived from ``GenericTable`` must include a field +named ``TypeOf_``\ *field*, where *field* is the name of the field whose type +is required. + +.. code-block:: text + + def CTable : GenericTable { + let FilterClass = "CEntry"; + let Fields = ["Name", "Kind", "Encoding"]; + GenericEnum TypeOf_Kind = CEnum; + let PrimaryKey = ["Encoding"]; + let PrimaryKeyName = "lookupCEntryByEncoding"; + } + + class CEntry { + string Name = name; + CEnum Kind = kind; + bits<16> Encoding = enc; + } + + def : CEntry<"Apple", CFoo, 10>; + def : CEntry<"Pear", CBaz, 15>; + def : CEntry<"Apple", CBar, 13>; + +Here is the generated C++ code. + +.. code-block:: text + + #ifdef GET_CTable_DECL + const CEntry *lookupCEntryByEncoding(uint16_t Encoding); + #endif + + #ifdef GET_CTable_IMPL + constexpr CEntry CTable[] = { + { "Apple", CFoo, 0xA }, // 0 + { "Apple", CBar, 0xD }, // 1 + { "Pear", CBaz, 0xF }, // 2 + }; + + const CEntry *lookupCEntryByEncoding(uint16_t Encoding) { + struct KeyType { + uint16_t Encoding; + }; + KeyType Key = { Encoding }; + auto Table = makeArrayRef(CTable); + auto Idx = std::lower_bound(Table.begin(), Table.end(), Key, + [](const CEntry &LHS, const KeyType &RHS) { + if (LHS.Encoding < RHS.Encoding) + return true; + if (LHS.Encoding > RHS.Encoding) + return false; + return false; + }); + + if (Idx == Table.end() || + Key.Encoding != Idx->Encoding) + return nullptr; + return &*Idx; + } + +The ``PrimaryKeyEarlyOut`` field, when set to 1, modifies the lookup +function so that it tests the first field of the primary key to determine +whether it is within the range of the collected records' primary keys. If +not, the function returns the null pointer without performing the binary +search. This is useful for tables that provide data for only some of the +elements of a larger enum-based space. The first field of the primary key +must be an integral type; it cannot be a string. + +Adding ``let PrimaryKeyEarlyOut = 1`` to the ``ATable`` above: + +.. code-block:: text + + def ATable : GenericTable { + let FilterClass = "AEntry"; + let Fields = ["Str", "Val1", "Val2"]; + let PrimaryKey = ["Val1", "Val2"]; + let PrimaryKeyName = "lookupATableByValues"; + let PrimaryKeyEarlyOut = 1; + } + +causes the lookup function to change as follows: + +.. code-block:: text + + const AEntry *lookupATableByValues(uint8_t Val1, uint16_t Val2) { + if ((Val1 < 0x2) || + (Val1 > 0x5)) + return nullptr; + + struct KeyType { + ... + +Search Indexes +~~~~~~~~~~~~~~ + +The ``SearchIndex`` class is used to define additional lookup functions for +generic tables. To define an additional function, define a record whose parent +class is ``SearchIndex`` and whose name is the name of the desired lookup +function. This class provides three fields. + +* ``GenericTable Table``. The name of the table that is to receive another + lookup function. + +* ``list Key``. The list of fields that make up the secondary key. + +* ``bit EarlyOut``. See the third example in `Generic Tables`_. + +Here is an example of a secondary key added to the ``CTable`` above. The +generated function looks up entries based on the ``Name`` and ``Kind`` fields. + +.. code-block:: text + + def lookupCEntry : SearchIndex { + let Table = CTable; + let Key = ["Name", "Kind"]; + } + +This use of ``SearchIndex`` generates the following additional C++ code. + +.. code-block:: text + + const CEntry *lookupCEntry(StringRef Name, unsigned Kind); + + ... + + const CEntry *lookupCEntryByName(StringRef Name, unsigned Kind) { + struct IndexType { + const char * Name; + unsigned Kind; + unsigned _index; + }; + static const struct IndexType Index[] = { + { "APPLE", CBar, 1 }, + { "APPLE", CFoo, 0 }, + { "PEAR", CBaz, 2 }, + }; + + struct KeyType { + std::string Name; + unsigned Kind; + }; + KeyType Key = { Name.upper(), Kind }; + auto Table = makeArrayRef(Index); + auto Idx = std::lower_bound(Table.begin(), Table.end(), Key, + [](const IndexType &LHS, const KeyType &RHS) { + int CmpName = StringRef(LHS.Name).compare(RHS.Name); + if (CmpName < 0) return true; + if (CmpName > 0) return false; + if ((unsigned)LHS.Kind < (unsigned)RHS.Kind) + return true; + if ((unsigned)LHS.Kind > (unsigned)RHS.Kind) + return false; + return false; + }); + + if (Idx == Table.end() || + Key.Name != Idx->Name || + Key.Kind != Idx->Kind) + return nullptr; + return &CTable[Idx->_index]; + } + JSON ---- diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst index 83684ab41c2802..07f0ba8a54dd05 100644 --- a/llvm/docs/TableGen/ProgRef.rst +++ b/llvm/docs/TableGen/ProgRef.rst @@ -140,7 +140,7 @@ the file is printed for review. The following are the basic punctuation tokens:: - - + [ ] { } ( ) < > : ; . = ? # + - + [ ] { } ( ) < > : ; . ... = ? # Literals -------- @@ -328,8 +328,8 @@ to an entity of type ``bits<4>``. .. warning:: The peculiar last form of :token:`RangePiece` is due to the fact that the "``-``" is included in the :token:`TokInteger`, hence ``1-5`` gets lexed as - two consecutive tokens, with values ``1`` and ``-5``, - instead of "1", "-", and "5". + two consecutive tokens, with values ``1`` and ``-5``, instead of "1", "-", + and "5". The use of hyphen as the range punctuation is deprecated. Simple values ------------- @@ -431,7 +431,7 @@ sense after reading the remainder of this guide. * The iteration variable of a ``foreach``, such as the use of ``i`` in:: - foreach i = 0..5 in + foreach i = 0...5 in def Foo#i; .. productionlist:: @@ -466,11 +466,11 @@ primary value. Here are the possible suffixes for some primary *value*. *value*\ ``{17}`` The final value is bit 17 of the integer *value* (note the braces). -*value*\ ``{8..15}`` +*value*\ ``{8...15}`` The final value is bits 8--15 of the integer *value*. The order of the - bits can be reversed by specifying ``{15..8}``. + bits can be reversed by specifying ``{15...8}``. -*value*\ ``[4..7,17,2..3,4]`` +*value*\ ``[4...7,17,2...3,4]`` The final value is a new list that is a slice of the list *value* (note the brackets). The new list contains elements 4, 5, 6, 7, 17, 2, 3, and 4. Elements may be @@ -827,10 +827,13 @@ template that expands into multiple records. MultiClassID: `TokIdentifier` As with regular classes, the multiclass has a name and can accept template -arguments. The body of the multiclass contains a series of statements that -define records, using :token:`Def` and :token:`Defm`. In addition, -:token:`Defvar`, :token:`Foreach`, and :token:`Let` -statements can be used to factor out even more common elements. +arguments. A multiclass can inherit from other multiclasses, which causes +the other multiclasses to be expanded and contribute to the record +definitions in the inheriting multiclass. The body of the multiclass +contains a series of statements that define records, using :token:`Def` and +:token:`Defm`. In addition, :token:`Defvar`, :token:`Foreach`, and +:token:`Let` statements can be used to factor out even more common elements. +The :token:`If` statement can also be used. Also as with regular classes, the multiclass has the implicit template argument ``NAME`` (see NAME_). When a named (non-anonymous) record is @@ -1128,8 +1131,8 @@ the next iteration. The following ``defvar`` will not work:: Variables can also be defined with ``defvar`` in a record body. See `Defvar in Record Body`_ for more details. -``foreach`` --- iterate over a sequence ---------------------------------------- +``foreach`` --- iterate over a sequence of statements +----------------------------------------------------- The ``foreach`` statement iterates over a series of statements, varying a variable over a sequence of values. @@ -1529,7 +1532,7 @@ and non-0 as true. ``!shl(``\ *a*\ ``,`` *count*\ ``)`` This operator shifts *a* left logically by *count* bits and produces the resulting value. The operation is performed on a 64-bit integer; the result - is undefined for shift counts outside 0..63. + is undefined for shift counts outside 0...63. ``!size(``\ *a*\ ``)`` This operator produces the number of elements in the list *a*. @@ -1537,12 +1540,12 @@ and non-0 as true. ``!sra(``\ *a*\ ``,`` *count*\ ``)`` This operator shifts *a* right arithmetically by *count* bits and produces the resulting value. The operation is performed on a 64-bit integer; the result - is undefined for shift counts outside 0..63. + is undefined for shift counts outside 0...63. ``!srl(``\ *a*\ ``,`` *count*\ ``)`` This operator shifts *a* right logically by *count* bits and produces the resulting value. The operation is performed on a 64-bit integer; the result - is undefined for shift counts outside 0..63. + is undefined for shift counts outside 0...63. ``!strconcat(``\ *str1*\ ``,`` *str2*\ ``, ...)`` This operator concatenates the string arguments *str1*, *str2*, etc., and diff --git a/llvm/docs/TransformMetadata.rst b/llvm/docs/TransformMetadata.rst index 817b41b43711d1..3c0e10b3eb7a52 100644 --- a/llvm/docs/TransformMetadata.rst +++ b/llvm/docs/TransformMetadata.rst @@ -196,7 +196,7 @@ is transformed into (using an unroll factor of 4): .. code-block:: c int i = 0; - for (; i + 3 < n; i+=4) // unrolled loop + for (; i + 3 < n; i+=4) { // unrolled loop Stmt(i); Stmt(i+1); Stmt(i+2); diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 81c5fc9325884d..ea841440e18034 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -768,6 +768,11 @@ class ScalarEvolution { return getBackedgeTakenCount(L, ConstantMaximum); } + /// Return a symbolic upper bound for the backedge taken count of the loop. + /// This is more general than getConstantMaxBackedgeTakenCount as it returns + /// an arbitrary expression as opposed to only constants. + const SCEV* computeMaxBackedgeTakenCount(const Loop *L); + /// Return true if the backedge taken count is either the value returned by /// getConstantMaxBackedgeTakenCount or zero. bool isBackedgeTakenCountMaxOrZero(const Loop *L); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 8607ad02d50637..cff6b496cca279 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -321,6 +321,9 @@ class CombinerHelper { /// Check if operand \p OpIdx is zero. bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx); + /// Check if operand \p OpIdx is undef. + bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx); + /// Erase \p MI bool eraseInst(MachineInstr &MI); diff --git a/llvm/include/llvm/CodeGen/LiveRegUnits.h b/llvm/include/llvm/CodeGen/LiveRegUnits.h index 1ed091e3bb5e9d..e20e04cad35cc8 100644 --- a/llvm/include/llvm/CodeGen/LiveRegUnits.h +++ b/llvm/include/llvm/CodeGen/LiveRegUnits.h @@ -15,7 +15,7 @@ #define LLVM_CODEGEN_LIVEREGUNITS_H #include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h index 4a1b04ab3e8866..a5dbbdb4fdcd22 100644 --- a/llvm/include/llvm/CodeGen/MachineOutliner.h +++ b/llvm/include/llvm/CodeGen/MachineOutliner.h @@ -15,10 +15,11 @@ #ifndef LLVM_MACHINEOUTLINER_H #define LLVM_MACHINEOUTLINER_H +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/LivePhysRegs.h" namespace llvm { namespace outliner { diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 5607e785e349a2..8db5249743064f 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1049,8 +1049,8 @@ class SelectionDAG { /// Helper function to make it easier to build SetCC's if you just have an /// ISD::CondCode instead of an SDValue. SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, - ISD::CondCode Cond, SDValue Chain = SDValue(), - bool IsSignaling = false) { + ISD::CondCode Cond, SDNodeFlags Flags = SDNodeFlags(), + SDValue Chain = SDValue(), bool IsSignaling = false) { assert(LHS.getValueType().isVector() == RHS.getValueType().isVector() && "Cannot compare scalars to vectors"); assert(LHS.getValueType().isVector() == VT.isVector() && @@ -1060,7 +1060,7 @@ class SelectionDAG { if (Chain) return getNode(IsSignaling ? ISD::STRICT_FSETCCS : ISD::STRICT_FSETCC, DL, {VT, MVT::Other}, {Chain, LHS, RHS, getCondCode(Cond)}); - return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond)); + return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond), Flags); } /// Helper function to make it easier to build Select's if you just have diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 6eef79162f8a79..fa150831bdbd03 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -357,10 +357,6 @@ template<> struct simplify_type { /// the backend. struct SDNodeFlags { private: - // This bit is used to determine if the flags are in a defined state. It is - // only used by SelectionDAGBuilder. - bool AnyDefined : 1; - bool NoUnsignedWrap : 1; bool NoSignedWrap : 1; bool Exact : 1; @@ -382,9 +378,8 @@ struct SDNodeFlags { public: /// Default constructor turns off all optimization flags. SDNodeFlags() - : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false), - Exact(false), NoNaNs(false), NoInfs(false), - NoSignedZeros(false), AllowReciprocal(false), + : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false), + NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), AllowContract(false), ApproximateFuncs(false), AllowReassociation(false), NoFPExcept(false) {} @@ -399,56 +394,18 @@ struct SDNodeFlags { setAllowReassociation(FPMO.hasAllowReassoc()); } - /// Sets the state of the flags to the defined state. - void setDefined() { AnyDefined = true; } - /// Returns true if the flags are in a defined state. - bool isDefined() const { return AnyDefined; } - // These are mutators for each flag. - void setNoUnsignedWrap(bool b) { - setDefined(); - NoUnsignedWrap = b; - } - void setNoSignedWrap(bool b) { - setDefined(); - NoSignedWrap = b; - } - void setExact(bool b) { - setDefined(); - Exact = b; - } - void setNoNaNs(bool b) { - setDefined(); - NoNaNs = b; - } - void setNoInfs(bool b) { - setDefined(); - NoInfs = b; - } - void setNoSignedZeros(bool b) { - setDefined(); - NoSignedZeros = b; - } - void setAllowReciprocal(bool b) { - setDefined(); - AllowReciprocal = b; - } - void setAllowContract(bool b) { - setDefined(); - AllowContract = b; - } - void setApproximateFuncs(bool b) { - setDefined(); - ApproximateFuncs = b; - } - void setAllowReassociation(bool b) { - setDefined(); - AllowReassociation = b; - } - void setNoFPExcept(bool b) { - setDefined(); - NoFPExcept = b; - } + void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } + void setNoSignedWrap(bool b) { NoSignedWrap = b; } + void setExact(bool b) { Exact = b; } + void setNoNaNs(bool b) { NoNaNs = b; } + void setNoInfs(bool b) { NoInfs = b; } + void setNoSignedZeros(bool b) { NoSignedZeros = b; } + void setAllowReciprocal(bool b) { AllowReciprocal = b; } + void setAllowContract(bool b) { AllowContract = b; } + void setApproximateFuncs(bool b) { ApproximateFuncs = b; } + void setAllowReassociation(bool b) { AllowReassociation = b; } + void setNoFPExcept(bool b) { NoFPExcept = b; } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } diff --git a/llvm/include/llvm/CodeGen/StableHashing.h b/llvm/include/llvm/CodeGen/StableHashing.h index c6113aa93c8001..caf27e152e78f6 100644 --- a/llvm/include/llvm/CodeGen/StableHashing.h +++ b/llvm/include/llvm/CodeGen/StableHashing.h @@ -40,7 +40,7 @@ inline void stable_hash_append(stable_hash &Hash, const char Value) { inline void stable_hash_append(stable_hash &Hash, stable_hash Value) { for (unsigned I = 0; I < 8; ++I) { - stable_hash_append(Hash, (const char)Value); + stable_hash_append(Hash, static_cast(Value)); Value >>= 8; } } diff --git a/llvm/include/llvm/CodeGen/StackMaps.h b/llvm/include/llvm/CodeGen/StackMaps.h index ce4eb85d645251..578bc0e161a64a 100644 --- a/llvm/include/llvm/CodeGen/StackMaps.h +++ b/llvm/include/llvm/CodeGen/StackMaps.h @@ -261,6 +261,10 @@ class StackMaps { StackMaps(AsmPrinter &AP); + /// Get index of next meta operand. + /// Similar to parseOperand, but does not actually parses operand meaning. + static unsigned getNextMetaArgIdx(MachineInstr *MI, unsigned CurIdx); + void reset() { CSInfos.clear(); ConstPool.clear(); diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h index 661d30d04c94e1..ae78fe912188de 100644 --- a/llvm/include/llvm/DebugInfo/DIContext.h +++ b/llvm/include/llvm/DebugInfo/DIContext.h @@ -35,6 +35,7 @@ struct DILineInfo { static constexpr const char *const Addr2LineBadString = "??"; std::string FileName; std::string FunctionName; + std::string StartFileName; Optional Source; uint32_t Line = 0; uint32_t Column = 0; @@ -43,12 +44,15 @@ struct DILineInfo { // DWARF-specific. uint32_t Discriminator = 0; - DILineInfo() : FileName(BadString), FunctionName(BadString) {} + DILineInfo() + : FileName(BadString), FunctionName(BadString), StartFileName(BadString) { + } bool operator==(const DILineInfo &RHS) const { return Line == RHS.Line && Column == RHS.Column && FileName == RHS.FileName && FunctionName == RHS.FunctionName && - StartLine == RHS.StartLine && Discriminator == RHS.Discriminator; + StartFileName == RHS.StartFileName && StartLine == RHS.StartLine && + Discriminator == RHS.Discriminator; } bool operator!=(const DILineInfo &RHS) const { @@ -56,10 +60,10 @@ struct DILineInfo { } bool operator<(const DILineInfo &RHS) const { - return std::tie(FileName, FunctionName, Line, Column, StartLine, - Discriminator) < - std::tie(RHS.FileName, RHS.FunctionName, RHS.Line, RHS.Column, - RHS.StartLine, RHS.Discriminator); + return std::tie(FileName, FunctionName, StartFileName, Line, Column, + StartLine, Discriminator) < + std::tie(RHS.FileName, RHS.FunctionName, RHS.StartFileName, RHS.Line, + RHS.Column, RHS.StartLine, RHS.Discriminator); } explicit operator bool() const { return *this != DILineInfo(); } @@ -72,6 +76,8 @@ struct DILineInfo { OS << "function '" << FunctionName << "', "; OS << "line " << Line << ", "; OS << "column " << Column << ", "; + if (StartFileName != BadString) + OS << "start file '" << StartFileName << "', "; OS << "start line " << StartLine << '\n'; } }; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h index 05a6056e8e21f2..5789421e530440 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -262,6 +262,7 @@ class DWARFDie { /// for this subprogram by resolving DW_AT_sepcification or /// DW_AT_abstract_origin references if necessary. uint64_t getDeclLine() const; + std::string getDeclFile(DILineInfoSpecifier::FileLineInfoKind Kind) const; /// Retrieves values of DW_AT_call_file, DW_AT_call_line and DW_AT_call_column /// from DIE (or zeroes if they are missing). This function looks for diff --git a/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/llvm/include/llvm/ObjectYAML/DWARFYAML.h index 99a7af87d2c78d..3e5be41b8fa3bb 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFYAML.h +++ b/llvm/include/llvm/ObjectYAML/DWARFYAML.h @@ -214,7 +214,7 @@ struct Data { Optional> DebugStrings; Optional> DebugStrOffsets; Optional> DebugAranges; - std::vector DebugRanges; + Optional> DebugRanges; Optional> DebugAddr; Optional PubNames; Optional PubTypes; diff --git a/llvm/include/llvm/ProfileData/GCOV.h b/llvm/include/llvm/ProfileData/GCOV.h index 7b9ba4410b654a..f87eab6d3ead2f 100644 --- a/llvm/include/llvm/ProfileData/GCOV.h +++ b/llvm/include/llvm/ProfileData/GCOV.h @@ -212,12 +212,13 @@ class GCOVFile { }; struct GCOVArc { - GCOVArc(GCOVBlock &src, GCOVBlock &dst, bool fallthrough) - : src(src), dst(dst), fallthrough(fallthrough) {} + GCOVArc(GCOVBlock &src, GCOVBlock &dst, uint32_t flags) + : src(src), dst(dst), flags(flags) {} + bool onTree() const; GCOVBlock &src; GCOVBlock &dst; - bool fallthrough; + uint32_t flags; uint64_t Count = 0; uint64_t CyclesCount = 0; }; @@ -234,7 +235,7 @@ class GCOVFunction { StringRef getFilename() const; size_t getNumBlocks() const { return Blocks.size(); } uint64_t getEntryCount() const; - uint64_t getExitCount() const; + GCOVBlock &getExitBlock() const; BlockIterator block_begin() const { return Blocks.begin(); } BlockIterator block_end() const { return Blocks.end(); } @@ -242,6 +243,7 @@ class GCOVFunction { return make_range(block_begin(), block_end()); } + uint64_t propagateCounts(const GCOVBlock &v, GCOVArc *arc); void print(raw_ostream &OS) const; void dump() const; void collectLineCounts(FileInfo &FI); diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h index d1c2147536a721..48a09ac48005df 100644 --- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -162,39 +162,49 @@ struct kernel_descriptor_t { uint8_t reserved2[6]; }; +enum : uint32_t { + GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0, + PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4, + RESERVED0_OFFSET = 8, + KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16, + RESERVED1_OFFSET = 24, + COMPUTE_PGM_RSRC3_OFFSET = 44, + COMPUTE_PGM_RSRC1_OFFSET = 48, + COMPUTE_PGM_RSRC2_OFFSET = 52, + KERNEL_CODE_PROPERTIES_OFFSET = 56, + RESERVED2_OFFSET = 58, +}; + static_assert( sizeof(kernel_descriptor_t) == 64, "invalid size for kernel_descriptor_t"); -static_assert( - offsetof(kernel_descriptor_t, group_segment_fixed_size) == 0, - "invalid offset for group_segment_fixed_size"); -static_assert( - offsetof(kernel_descriptor_t, private_segment_fixed_size) == 4, - "invalid offset for private_segment_fixed_size"); -static_assert( - offsetof(kernel_descriptor_t, reserved0) == 8, - "invalid offset for reserved0"); -static_assert( - offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == 16, - "invalid offset for kernel_code_entry_byte_offset"); -static_assert( - offsetof(kernel_descriptor_t, reserved1) == 24, - "invalid offset for reserved1"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44, - "invalid offset for compute_pgm_rsrc3"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48, - "invalid offset for compute_pgm_rsrc1"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == 52, - "invalid offset for compute_pgm_rsrc2"); -static_assert( - offsetof(kernel_descriptor_t, kernel_code_properties) == 56, - "invalid offset for kernel_code_properties"); -static_assert( - offsetof(kernel_descriptor_t, reserved2) == 58, - "invalid offset for reserved2"); +static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) == + GROUP_SEGMENT_FIXED_SIZE_OFFSET, + "invalid offset for group_segment_fixed_size"); +static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) == + PRIVATE_SEGMENT_FIXED_SIZE_OFFSET, + "invalid offset for private_segment_fixed_size"); +static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET, + "invalid offset for reserved0"); +static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == + KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET, + "invalid offset for kernel_code_entry_byte_offset"); +static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET, + "invalid offset for reserved1"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == + COMPUTE_PGM_RSRC3_OFFSET, + "invalid offset for compute_pgm_rsrc3"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == + COMPUTE_PGM_RSRC1_OFFSET, + "invalid offset for compute_pgm_rsrc1"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == + COMPUTE_PGM_RSRC2_OFFSET, + "invalid offset for compute_pgm_rsrc2"); +static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) == + KERNEL_CODE_PROPERTIES_OFFSET, + "invalid offset for kernel_code_properties"); +static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET, + "invalid offset for reserved2"); } // end namespace amdhsa } // end namespace llvm diff --git a/llvm/include/llvm/Support/CFGUpdate.h b/llvm/include/llvm/Support/CFGUpdate.h index af4cd6ed1f1df4..3a12b9d86c18a8 100644 --- a/llvm/include/llvm/Support/CFGUpdate.h +++ b/llvm/include/llvm/Support/CFGUpdate.h @@ -14,7 +14,6 @@ #ifndef LLVM_SUPPORT_CFGUPDATE_H #define LLVM_SUPPORT_CFGUPDATE_H -#include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/Compiler.h" diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 6a6f97ae78b04d..5b940551dad595 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -194,6 +194,12 @@ def undef_to_negative_one: GICombineRule< [{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]), (apply [{ Helper.replaceInstWithConstant(*${root}, -1); }])>; +def binop_left_undef_to_zero: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_SHL):$root, + [{ return Helper.matchOperandIsUndef(*${root}, 1); }]), + (apply [{ Helper.replaceInstWithConstant(*${root}, 0); }])>; + // Instructions where if any source operand is undef, the instruction can be // replaced with undef. def propagate_undef_any_op: GICombineRule< @@ -384,6 +390,7 @@ def not_cmp_fold : GICombineRule< // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, + binop_left_undef_to_zero, propagate_undef_any_op, propagate_undef_all_ops, propagate_undef_shuffle_mask, diff --git a/llvm/include/llvm/Testing/Support/SupportHelpers.h b/llvm/include/llvm/Testing/Support/SupportHelpers.h index 3517361041b940..2419fc95d8178c 100644 --- a/llvm/include/llvm/Testing/Support/SupportHelpers.h +++ b/llvm/include/llvm/Testing/Support/SupportHelpers.h @@ -152,6 +152,9 @@ class TempDir { /// The path to the temporary directory. StringRef path() const { return Path; } + /// The null-terminated C string pointing to the path. + const char *c_str() { return Path.c_str(); } + /// Creates a new path by appending the argument to the path of the managed /// directory using the native path separator. SmallString<128> path(StringRef component) const { diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 75e7ccde4dba75..5c0a90339150fb 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -133,8 +133,10 @@ struct AAIsDead; class Function; -/// Simple enum classes that forces properties to be spelled out explicitly. -/// +/// The value passed to the line option that defines the maximal initialization +/// chain length. +extern unsigned MaxInitializationChainLength; + ///{ enum class ChangeStatus { CHANGED, @@ -337,8 +339,14 @@ struct IRPosition { /// Return the associated function, if any. Function *getAssociatedFunction() const { - if (auto *CB = dyn_cast(&getAnchorValue())) + if (auto *CB = dyn_cast(&getAnchorValue())) { + // We reuse the logic that associates callback calles to arguments of a + // call site here to identify the callback callee as the associated + // function. + if (Argument *Arg = getAssociatedArgument()) + return Arg->getParent(); return CB->getCalledFunction(); + } return getAnchorScope(); } @@ -386,10 +394,11 @@ struct IRPosition { /// Return the value this abstract attribute is associated with. Value &getAssociatedValue() const { - if (getArgNo() < 0 || isa(&getAnchorValue())) + if (getCallSiteArgNo() < 0 || isa(&getAnchorValue())) return getAnchorValue(); assert(isa(&getAnchorValue()) && "Expected a call base!"); - return *cast(&getAnchorValue())->getArgOperand(getArgNo()); + return *cast(&getAnchorValue()) + ->getArgOperand(getCallSiteArgNo()); } /// Return the type this abstract attribute is associated with. @@ -399,19 +408,22 @@ struct IRPosition { return getAssociatedValue().getType(); } - /// Return the argument number of the associated value if it is an argument or - /// call site argument, otherwise a negative value. - int getArgNo() const { - switch (getPositionKind()) { - case IRPosition::IRP_ARGUMENT: - return cast(getAsValuePtr())->getArgNo(); - case IRPosition::IRP_CALL_SITE_ARGUMENT: { - Use &U = *getAsUsePtr(); - return cast(U.getUser())->getArgOperandNo(&U); - } - default: - return -1; - } + /// Return the callee argument number of the associated value if it is an + /// argument or call site argument, otherwise a negative value. In contrast to + /// `getCallSiteArgNo` this method will always return the "argument number" + /// from the perspective of the callee. This may not the same as the call site + /// if this is a callback call. + int getCalleeArgNo() const { + return getArgNo(/* CallbackCalleeArgIfApplicable */ true); + } + + /// Return the call site argument number of the associated value if it is an + /// argument or call site argument, otherwise a negative value. In contrast to + /// `getCalleArgNo` this method will always return the "operand number" from + /// the perspective of the call site. This may not the same as the callee + /// perspective if this is a callback call. + int getCallSiteArgNo() const { + return getArgNo(/* CallbackCalleeArgIfApplicable */ false); } /// Return the index in the attribute list for this position. @@ -428,7 +440,7 @@ struct IRPosition { return AttributeList::ReturnIndex; case IRPosition::IRP_ARGUMENT: case IRPosition::IRP_CALL_SITE_ARGUMENT: - return getArgNo() + AttributeList::FirstArgIndex; + return getCallSiteArgNo() + AttributeList::FirstArgIndex; } llvm_unreachable( "There is no attribute index for a floating or invalid position!"); @@ -513,6 +525,17 @@ struct IRPosition { } } + /// Return true if the position is an argument or call site argument. + bool isArgumentPosition() const { + switch (getPositionKind()) { + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + return true; + default: + return false; + } + } + /// Special DenseMap key values. /// ///{ @@ -559,6 +582,25 @@ struct IRPosition { verify(); } + /// Return the callee argument number of the associated value if it is an + /// argument or call site argument. See also `getCalleeArgNo` and + /// `getCallSiteArgNo`. + int getArgNo(bool CallbackCalleeArgIfApplicable) const { + if (CallbackCalleeArgIfApplicable) + if (Argument *Arg = getAssociatedArgument()) + return Arg->getArgNo(); + switch (getPositionKind()) { + case IRPosition::IRP_ARGUMENT: + return cast(getAsValuePtr())->getArgNo(); + case IRPosition::IRP_CALL_SITE_ARGUMENT: { + Use &U = *getAsUsePtr(); + return cast(U.getUser())->getArgOperandNo(&U); + } + default: + return -1; + } + } + /// IRPosition for the use \p U. The position kind \p PK needs to be /// IRP_CALL_SITE_ARGUMENT, the anchor value is the user, the associated value /// the used value. @@ -1071,6 +1113,9 @@ struct Attributor { Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) || FnScope->hasFnAttribute(Attribute::OptimizeNone); + // Avoid too many nested initializations to prevent a stack overflow. + Invalidate |= InitializationChainLength > MaxInitializationChainLength; + // Bootstrap the new attribute with an initial update to propagate // information, e.g., function -> call site. If it is not on a given // Allowed we will not perform updates at all. @@ -1081,7 +1126,9 @@ struct Attributor { { TimeTraceScope TimeScope(AA.getName() + "::initialize"); + ++InitializationChainLength; AA.initialize(*this); + --InitializationChainLength; } // Initialize and update is allowed for code outside of the current function @@ -1615,6 +1662,9 @@ struct Attributor { CLEANUP, } Phase = AttributorPhase::SEEDING; + /// The current initialization chain length. Tracked to avoid stack overflows. + unsigned InitializationChainLength = 0; + /// Functions, blocks, and instructions we delete after manifest is done. /// ///{ diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h index 46f6ca0462f8b2..fb3a7490346f40 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h @@ -25,7 +25,7 @@ struct SimplifyCFGOptions { bool ForwardSwitchCondToPhi = false; bool ConvertSwitchToLookupTable = false; bool NeedCanonicalLoop = true; - bool HoistCommonInsts = true; + bool HoistCommonInsts = false; bool SinkCommonInsts = false; bool SimplifyCondBranch = true; bool FoldTwoEntryPHINode = true; diff --git a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h index ff70446e163d48..ce7cb16b3886d2 100644 --- a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h +++ b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // This pass is used to ensure that functions have at most one return and one -// unwind instruction in them. Additionally, it keeps track of which node is -// the new exit node of the CFG. If there are no return or unwind instructions -// in the function, the getReturnBlock/getUnwindBlock methods will return a null -// pointer. +// unreachable instruction in them. // //===----------------------------------------------------------------------===// @@ -24,10 +21,6 @@ namespace llvm { class BasicBlock; struct UnifyFunctionExitNodes : public FunctionPass { - BasicBlock *ReturnBlock = nullptr; - BasicBlock *UnwindBlock = nullptr; - BasicBlock *UnreachableBlock; - public: static char ID; // Pass identification, replacement for typeid UnifyFunctionExitNodes(); @@ -35,13 +28,6 @@ struct UnifyFunctionExitNodes : public FunctionPass { // We can preserve non-critical-edgeness when we unify function exit nodes void getAnalysisUsage(AnalysisUsage &AU) const override; - // getReturn|Unwind|UnreachableBlock - Return the new single (or nonexistent) - // return, unwind, or unreachable basic blocks in the CFG. - // - BasicBlock *getReturnBlock() const { return ReturnBlock; } - BasicBlock *getUnwindBlock() const { return UnwindBlock; } - BasicBlock *getUnreachableBlock() const { return UnreachableBlock; } - bool runOnFunction(Function &F) override; }; diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 7c13b41bc7e648..e59c0a84044aae 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5274,9 +5274,6 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, // on the outer abs. if (match(Op0, m_Intrinsic(m_Value(), m_Value()))) return Op0; - // If the sign bit is clear already, then abs does not do anything. - if (isKnownNonNegative(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) - return Op0; break; case Intrinsic::smax: diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 2428d57d2809fa..a19c1d78526b23 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -166,6 +166,12 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. return ModRefInfo::Mod; + case Intrinsic::masked_load: + Loc = MemoryLocation::getForArgument(II, 0, TLI); + return ModRefInfo::Ref; + case Intrinsic::masked_store: + Loc = MemoryLocation::getForArgument(II, 1, TLI); + return ModRefInfo::Mod; default: break; } @@ -442,7 +448,9 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( if (IntrinsicInst *II = dyn_cast(Inst)) { // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. - if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + Intrinsic::ID ID = II->getIntrinsicID(); + switch (ID) { + case Intrinsic::lifetime_start: // FIXME: This only considers queries directly on the invariant-tagged // pointer, not on query pointers that are indexed off of them. It'd // be nice to handle that at some point (the right approach is to use @@ -450,6 +458,19 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( if (BatchAA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc)) return MemDepResult::getDef(II); continue; + case Intrinsic::masked_load: + case Intrinsic::masked_store: { + MemoryLocation Loc; + /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI); + AliasResult R = BatchAA.alias(Loc, MemLoc); + if (R == NoAlias) + continue; + if (R == MustAlias) + return MemDepResult::getDef(II); + if (ID == Intrinsic::masked_load) + continue; + return MemDepResult::getClobber(II); + } } } diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp index 9694036ce4767c..fcea03a118bfc7 100644 --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -176,6 +176,21 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call, cast(II->getArgOperand(0))->getZExtValue()), AATags); + case Intrinsic::masked_load: + assert(ArgIdx == 0 && "Invalid argument index"); + return MemoryLocation( + Arg, + LocationSize::upperBound(DL.getTypeStoreSize(II->getType())), + AATags); + + case Intrinsic::masked_store: + assert(ArgIdx == 1 && "Invalid argument index"); + return MemoryLocation( + Arg, + LocationSize::upperBound( + DL.getTypeStoreSize(II->getArgOperand(0)->getType())), + AATags); + case Intrinsic::invariant_end: // The first argument to an invariant.end is a "descriptor" type (e.g. a // pointer to a empty struct) which is never actually dereferenced. diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 40d89fff045878..11d92bc816e9f8 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -12506,3 +12506,28 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0))); return false; } + +const SCEV* ScalarEvolution::computeMaxBackedgeTakenCount(const Loop *L) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // Form an expression for the maximum exit count possible for this loop. We + // merge the max and exact information to approximate a version of + // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. + SmallVector ExitCounts; + for (BasicBlock *ExitingBB : ExitingBlocks) { + const SCEV *ExitCount = getExitCount(L, ExitingBB); + if (isa(ExitCount)) + ExitCount = getExitCount(L, ExitingBB, + ScalarEvolution::ConstantMaximum); + if (!isa(ExitCount)) { + assert(DT.dominates(ExitingBB, L->getLoopLatch()) && + "We should only have known counts for exiting blocks that " + "dominate latch!"); + ExitCounts.push_back(ExitCount); + } + } + if (ExitCounts.empty()) + return getCouldNotCompute(); + return getUMinFromMismatchedTypes(ExitCounts); +} diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 6e5a7195bb1943..5eb66e96e1d858 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1739,6 +1739,26 @@ static void computeKnownBitsFromOperator(const Operator *I, } break; } + case Intrinsic::umin: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::umin(Known, Known2); + break; + case Intrinsic::umax: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::umax(Known, Known2); + break; + case Intrinsic::smin: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::smin(Known, Known2); + break; + case Intrinsic::smax: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::smax(Known, Known2); + break; case Intrinsic::x86_sse42_crc32_64_64: Known.Zero.setBitsFrom(32); break; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 538107cecd8b3b..57bf500ba89235 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -547,22 +548,23 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS); // Emit warnings if we use reserved registers on the clobber list, as - // that might give surprising results. - std::vector RestrRegs; + // that might lead to undefined behaviour. + SmallVector RestrRegs; + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); // Start with the first operand descriptor, and iterate over them. for (unsigned I = InlineAsm::MIOp_FirstOperand, NumOps = MI->getNumOperands(); I < NumOps; ++I) { const MachineOperand &MO = MI->getOperand(I); - if (MO.isImm()) { - unsigned Flags = MO.getImm(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber && - !TRI->isAsmClobberable(*MF, MI->getOperand(I + 1).getReg())) { - RestrRegs.push_back(TRI->getName(MI->getOperand(I + 1).getReg())); - } - // Skip to one before the next operand descriptor, if it exists. - I += InlineAsm::getNumOperandRegisters(Flags); + if (!MO.isImm()) + continue; + unsigned Flags = MO.getImm(); + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber) { + Register Reg = MI->getOperand(I + 1).getReg(); + if (!TRI->isAsmClobberable(*MF, Reg)) + RestrRegs.push_back(Reg); } + // Skip to one before the next operand descriptor, if it exists. + I += InlineAsm::getNumOperandRegisters(Flags); } if (!RestrRegs.empty()) { @@ -572,14 +574,15 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin()); std::string Msg = "inline asm clobber list contains reserved registers: "; - for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; I++) { + for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; ++I) { if(I != RestrRegs.begin()) Msg += ", "; - Msg += *I; + Msg += TRI->getName(*I); } - std::string Note = "Reserved registers on the clobber list may not be " - "preserved across the asm statement, and clobbering them may " - "lead to undefined behaviour."; + const char *Note = + "Reserved registers on the clobber list may not be " + "preserved across the asm statement, and clobbering them may " + "lead to undefined behaviour."; SrcMgr.PrintMessage(Loc, SourceMgr::DK_Warning, Msg); SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note); } diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 10cd58f17e9aaa..d58ba7cf5a8c65 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1989,6 +1989,12 @@ bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) { MRI); } +bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + return MO.isReg() && + getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); +} + bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); diff --git a/llvm/lib/CodeGen/LiveRegUnits.cpp b/llvm/lib/CodeGen/LiveRegUnits.cpp index b2731aa0e7dbca..ea2075bc139dfd 100644 --- a/llvm/lib/CodeGen/LiveRegUnits.cpp +++ b/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -11,15 +11,11 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveRegUnits.h" - #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/MC/MCRegisterInfo.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index f9d099e029956d..715a2ba4667d23 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -59,10 +59,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 286d54386357f7..e5c5e5341a6801 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7011,12 +7011,15 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { // Check if the offsets line up for the native data layout of this target. bool NeedBswap = false; + bool NeedRotate = false; if (!checkOffsets(Layout.isLittleEndian())) { // Special-case: check if byte offsets line up for the opposite endian. - // TODO: We could use rotates for 16/32-bit merge pairs. - if (NarrowNumBits != 8 || !checkOffsets(Layout.isBigEndian())) + if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian())) + NeedBswap = true; + else if (NumStores == 2 && checkOffsets(Layout.isBigEndian())) + NeedRotate = true; + else return SDValue(); - NeedBswap = true; } SDLoc DL(N); @@ -7026,11 +7029,16 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue); } - // Before legalize we can introduce illegal bswaps which will be later + // Before legalize we can introduce illegal bswaps/rotates which will be later // converted to an explicit bswap sequence. This way we end up with a single // store and byte shuffling instead of several stores and byte shuffling. - if (NeedBswap) + if (NeedBswap) { SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue); + } else if (NeedRotate) { + assert(WideNumBits % 2 == 0 && "Unexpected type for rotate"); + SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT); + SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt); + } SDValue NewStore = DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(), @@ -7390,9 +7398,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N0.hasOneUse()) { // FIXME Can we handle multiple uses? Could we token factor the chain // results from the new/old setcc? - SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC, - N0.getOperand(0), - N0Opcode == ISD::STRICT_FSETCCS); + SDValue SetCC = + DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC, SDNodeFlags(), + N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS); CombineTo(N, SetCC); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1)); recursivelyDeleteUnusedNodes(N0.getNode()); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index ff84fdd62075cc..e2da367cfe3f6a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -89,18 +89,9 @@ static unsigned getStatepointGCArgStartIdx(MachineInstr *MI) { "STATEPOINT node expected"); unsigned OperIdx = StatepointOpers(MI).getNumDeoptArgsIdx(); unsigned NumDeopts = MI->getOperand(OperIdx).getImm(); - // At this point stack references has not been lowered yet, so they - // take single operand. ++OperIdx; - while (NumDeopts--) { - MachineOperand &MO = MI->getOperand(OperIdx); - if (MO.isImm() && MO.getImm() == StackMaps::ConstantOp) { - ++OperIdx; - assert(MI->getOperand(OperIdx).isImm() && - "Unexpected statepoint operand"); - } - ++OperIdx; - } + while (NumDeopts--) + OperIdx = StackMaps::getNextMetaArgIdx(MI, OperIdx); return OperIdx; } @@ -1002,11 +993,14 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, assert(!HasPhysRegOuts && "STATEPOINT mishandled"); MachineInstr *MI = MIB; unsigned Def = 0; - unsigned Use = getStatepointGCArgStartIdx(MI) + 1; + unsigned Use = getStatepointGCArgStartIdx(MI); + Use = StackMaps::getNextMetaArgIdx(MI, Use); // first derived + assert(Use < MI->getNumOperands()); while (Def < NumDefs) { if (MI->getOperand(Use).isReg()) MI->tieOperands(Def++, Use); - Use += 2; + Use = StackMaps::getNextMetaArgIdx(MI, Use); // next base + Use = StackMaps::getNextMetaArgIdx(MI, Use); // next derived } } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f6e4b9363d1a13..7751ebb7705a3f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1735,12 +1735,16 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode( if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). - SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling); + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, SDNodeFlags(), Chain, + IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, SDNodeFlags(), Chain, + IsSignaling); } else { // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) - SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling); + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, SDNodeFlags(), Chain, + IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, SDNodeFlags(), Chain, + IsSignaling); } if (Chain) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1), diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 09b5f14bdb7b45..2399525de66591 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1777,17 +1777,18 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, // The following can be improved, but not that much. SDValue Tmp1, Tmp2, Tmp3, OutputChain; Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, - RHSHi, ISD::SETOEQ, Chain, IsSignaling); + RHSHi, ISD::SETOEQ, SDNodeFlags(), Chain, IsSignaling); OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue(); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, - RHSLo, CCCode, OutputChain, IsSignaling); + RHSLo, CCCode, SDNodeFlags(), OutputChain, IsSignaling); OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue(); Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); - Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, - RHSHi, ISD::SETUNE, OutputChain, IsSignaling); + Tmp1 = + DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, + ISD::SETUNE, SDNodeFlags(), OutputChain, IsSignaling); OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue(); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, - RHSHi, CCCode, OutputChain, IsSignaling); + RHSHi, CCCode, SDNodeFlags(), OutputChain, IsSignaling); OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue(); Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 77a79a0479ef76..e1881c20e5b3b5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2789,16 +2789,38 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); + SDValue N0 = N->getOperand(0); + GetExpandedInteger(N0, Lo, Hi); + EVT NVT = Lo.getValueType(); + + // If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we + // use in LegalizeDAG. The ADD part of the expansion is based on + // ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that + // ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded + // if needed. Shift expansion has a special case for filling with sign bits + // so that we will only end up with one SRA. + bool HasAddCarry = TLI.isOperationLegalOrCustom( + ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (HasAddCarry) { + EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG); + SDValue Sign = + DAG.getNode(ISD::SRA, dl, NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy)); + SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT)); + Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign); + Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1)); + Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign); + Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign); + return; + } + // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo) EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), N0); SDValue NegLo, NegHi; SplitInteger(Neg, NegLo, NegHi); - GetExpandedInteger(N0, Lo, Hi); - EVT NVT = Lo.getValueType(); SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT); Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 093f7b1680edd6..764472e570c047 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3307,19 +3307,34 @@ SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) { } SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { + LLVMContext &Ctx = *DAG.getContext(); SDValue InOp = N->getOperand(0); SDLoc DL(N); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); EVT InVT = InOp.getValueType(); - EVT InEltVT = InVT.getVectorElementType(); - EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); unsigned Opcode = N->getOpcode(); - unsigned InVTNumElts = InVT.getVectorNumElements(); const SDNodeFlags Flags = N->getFlags(); + + // Handle the case of ZERO_EXTEND where the promoted InVT element size does + // not equal that of WidenVT. + if (N->getOpcode() == ISD::ZERO_EXTEND && + getTypeAction(InVT) == TargetLowering::TypePromoteInteger && + TLI.getTypeToTransformTo(Ctx, InVT).getScalarSizeInBits() != + WidenVT.getScalarSizeInBits()) { + InOp = ZExtPromotedInteger(InOp); + InVT = InOp.getValueType(); + if (WidenVT.getScalarSizeInBits() < InVT.getScalarSizeInBits()) + Opcode = ISD::TRUNCATE; + } + + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts); + unsigned InVTNumElts = InVT.getVectorNumElements(); + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5e6cb03f3839c8..7bcbb7ccddc8d9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1120,27 +1120,6 @@ void SelectionDAGBuilder::visit(const Instruction &I) { visit(I.getOpcode(), I); - if (auto *FPMO = dyn_cast(&I)) { - // ConstrainedFPIntrinsics handle their own FMF. - if (!isa(&I)) { - // Propagate the fast-math-flags of this IR instruction to the DAG node that - // maps to this instruction. - // TODO: We could handle all flags (nsw, etc) here. - // TODO: If an IR instruction maps to >1 node, only the final node will have - // flags set. - // TODO: The handling of flags should be improved, see - // https://reviews.llvm.org/D86871 - if (SDNode *Node = getNodeForIRValue(&I)) { - SDNodeFlags IncomingFlags; - IncomingFlags.copyFMF(*FPMO); - if (!Node->getFlags().isDefined()) - Node->setFlags(IncomingFlags); - else - Node->intersectFlagsWith(IncomingFlags); - } - } - } - if (!I.isTerminator() && !HasTailCall && !isa(I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); @@ -3023,9 +3002,10 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap()); Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap()); } - if (auto *ExactOp = dyn_cast(&I)) { + if (auto *ExactOp = dyn_cast(&I)) Flags.setExact(ExactOp->isExact()); - } + if (auto *FPOp = dyn_cast(&I)) + Flags.copyFMF(*FPOp); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -3135,13 +3115,16 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); - auto *FPMO = dyn_cast(&I); - if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath) + auto *FPMO = cast(&I); + if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); + SDNodeFlags Flags; + Flags.copyFMF(*FPMO); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); - setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition, Flags)); } // Check if the condition of the select has one use or two users that are both @@ -3169,6 +3152,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) { bool IsUnaryAbs = false; + SDNodeFlags Flags; + if (auto *FPOp = dyn_cast(&I)) + Flags.copyFMF(*FPOp); + // Min/max matching is only viable if all output VTs are the same. if (is_splat(ValueVTs)) { EVT VT = ValueVTs[0]; @@ -3272,7 +3259,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); Values[i] = DAG.getNode( OpCode, getCurSDLoc(), - LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops); + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops, Flags); } } @@ -4876,7 +4863,7 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -4892,13 +4879,13 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op, Flags); } /// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4991,13 +4978,13 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op, Flags); } /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -5088,13 +5075,13 @@ static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op, Flags); } /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -5178,25 +5165,26 @@ static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op, Flags); } /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) return getLimitedPrecisionExp2(Op, dl, DAG); // No special expansion. - return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op, Flags); } /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, - SelectionDAG &DAG, const TargetLowering &TLI) { + SelectionDAG &DAG, const TargetLowering &TLI, + SDNodeFlags Flags) { bool IsExp10 = false; if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -5219,7 +5207,7 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, } // No special expansion. - return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); + return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS, Flags); } /// ExpandPowI - Expand a llvm.powi intrinsic. @@ -5640,6 +5628,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DebugLoc dl = getCurDebugLoc(); SDValue Res; + SDNodeFlags Flags; + if (auto *FPOp = dyn_cast(&I)) + Flags.copyFMF(*FPOp); + switch (Intrinsic) { default: // By default, turn this into a target intrinsic node. @@ -6054,23 +6046,26 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(1)), DAG)); return; case Intrinsic::log: - setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::log2: - setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::log10: - setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::exp: - setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::exp2: - setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), DAG, TLI)); + getValue(I.getArgOperand(1)), DAG, TLI, Flags)); return; case Intrinsic::sqrt: case Intrinsic::fabs: @@ -6103,7 +6098,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)))); + getValue(I.getArgOperand(0)), Flags)); return; } case Intrinsic::lround: @@ -6128,38 +6123,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::maxnum: setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::minimum: setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::maximum: setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::fma: - setValue(&I, DAG.getNode(ISD::FMA, sdl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), - getValue(I.getArgOperand(2)))); + setValue(&I, DAG.getNode( + ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Flags)); return; #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: @@ -6174,17 +6168,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - getValue(I.getArgOperand(2)))); + getValue(I.getArgOperand(2)), Flags)); } else { // TODO: Intrinsic calls should have fast-math-flags. - SDValue Mul = DAG.getNode(ISD::FMUL, sdl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1))); + SDValue Mul = DAG.getNode( + ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags); SDValue Add = DAG.getNode(ISD::FADD, sdl, getValue(I.getArgOperand(0)).getValueType(), - Mul, - getValue(I.getArgOperand(2))); + Mul, getValue(I.getArgOperand(2)), Flags); setValue(&I, Add); } return; @@ -7532,8 +7524,12 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, if (!I.onlyReadsMemory()) return false; + SDNodeFlags Flags; + Flags.copyFMF(cast(I)); + SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); + setValue(&I, + DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp, Flags)); return true; } @@ -7548,10 +7544,13 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, if (!I.onlyReadsMemory()) return false; + SDNodeFlags Flags; + Flags.copyFMF(cast(I)); + SDValue Tmp0 = getValue(I.getArgOperand(0)); SDValue Tmp1 = getValue(I.getArgOperand(1)); EVT VT = Tmp0.getValueType(); - setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1)); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1, Flags)); return true; } @@ -8951,24 +8950,26 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, SDLoc dl = getCurSDLoc(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Res; - FastMathFlags FMF; - if (isa(I)) - FMF = I.getFastMathFlags(); + SDNodeFlags SDFlags; + if (auto *FPMO = dyn_cast(&I)) + SDFlags.copyFMF(*FPMO); switch (Intrinsic) { case Intrinsic::experimental_vector_reduce_v2_fadd: - if (FMF.allowReassoc()) + if (SDFlags.hasAllowReassociation()) Res = DAG.getNode(ISD::FADD, dl, VT, Op1, - DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2)); + DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags), + SDFlags); else - Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2, SDFlags); break; case Intrinsic::experimental_vector_reduce_v2_fmul: - if (FMF.allowReassoc()) + if (SDFlags.hasAllowReassociation()) Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, - DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2)); + DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags), + SDFlags); else - Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2, SDFlags); break; case Intrinsic::experimental_vector_reduce_add: Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1); @@ -8998,10 +8999,10 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); break; case Intrinsic::experimental_vector_reduce_fmax: - Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1); + Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); break; case Intrinsic::experimental_vector_reduce_fmin: - Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1); + Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); break; default: llvm_unreachable("Unhandled vector reduce intrinsic"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 7bad0551981408..e51e7bf89f8e7b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -518,13 +518,6 @@ class SelectionDAGBuilder { SDValue getValue(const Value *V); - /// Return the SDNode for the specified IR value if it exists. - SDNode *getNodeForIRValue(const Value *V) { - if (NodeMap.find(V) == NodeMap.end()) - return nullptr; - return NodeMap[V].getNode(); - } - SDValue getNonRegisterValue(const Value *V); SDValue getValueImpl(const Value *V); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index ae98edb74466da..cbdd027f55fef3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6409,7 +6409,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, SDValue Sel; if (Node->isStrictFPOpcode()) { - Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, + Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, SDNodeFlags(), Node->getOperand(0), /*IsSignaling*/ true); Chain = Sel.getValue(1); } else { diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp index 113d477ec80a72..806ba1aa982261 100644 --- a/llvm/lib/CodeGen/StackMaps.cpp +++ b/llvm/lib/CodeGen/StackMaps.cpp @@ -88,6 +88,29 @@ StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { llvm_unreachable("Unsupported stackmap version!"); } +unsigned StackMaps::getNextMetaArgIdx(MachineInstr *MI, unsigned CurIdx) { + assert(CurIdx < MI->getNumOperands() && "Bad meta arg index"); + const auto &MO = MI->getOperand(CurIdx); + if (MO.isImm()) { + switch (MO.getImm()) { + default: + llvm_unreachable("Unrecognized operand type."); + case StackMaps::DirectMemRefOp: + CurIdx += 2; + break; + case StackMaps::IndirectMemRefOp: + CurIdx += 3; + break; + case StackMaps::ConstantOp: + ++CurIdx; + break; + } + } + ++CurIdx; + assert(CurIdx < MI->getNumOperands() && "points past operand list"); + return CurIdx; +} + /// Go up the super-register chain until we hit a valid dwarf register number. static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) { int RegNum = TRI->getDwarfRegNum(Reg, false); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index d31c358798211a..47eba48c279dd8 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -1036,7 +1036,9 @@ DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) { static bool getFunctionNameAndStartLineForAddress(DWARFCompileUnit *CU, uint64_t Address, FunctionNameKind Kind, + DILineInfoSpecifier::FileLineInfoKind FileNameKind, std::string &FunctionName, + std::string &StartFile, uint32_t &StartLine) { // The address may correspond to instruction in some inlined function, // so we have to build the chain of inlined functions and take the @@ -1053,6 +1055,11 @@ static bool getFunctionNameAndStartLineForAddress(DWARFCompileUnit *CU, FunctionName = Name; FoundResult = true; } + std::string DeclFile = DIE.getDeclFile(FileNameKind); + if (!DeclFile.empty()) { + StartFile = DeclFile; + FoundResult = true; + } if (auto DeclLineResult = DIE.getDeclLine()) { StartLine = DeclLineResult; FoundResult = true; @@ -1224,8 +1231,9 @@ DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, if (!CU) return Result; - getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, - Result.FunctionName, Result.StartLine); + getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, Spec.FLIKind, + Result.FunctionName, + Result.StartFileName, Result.StartLine); if (Spec.FLIKind != FileLineInfoKind::None) { if (const DWARFLineTable *LineTable = getLineTableForUnit(CU)) { LineTable->getFileLineInfoForAddress( @@ -1244,15 +1252,17 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange( return Lines; uint32_t StartLine = 0; + std::string StartFileName; std::string FunctionName(DILineInfo::BadString); - getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, - FunctionName, StartLine); + getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, Spec.FLIKind, + FunctionName, StartFileName, StartLine); // If the Specifier says we don't need FileLineInfo, just // return the top-most function at the starting address. if (Spec.FLIKind == FileLineInfoKind::None) { DILineInfo Result; Result.FunctionName = FunctionName; + Result.StartFileName = StartFileName; Result.StartLine = StartLine; Lines.push_back(std::make_pair(Address.Address, Result)); return Lines; @@ -1276,6 +1286,7 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange( Result.FunctionName = FunctionName; Result.Line = Row.Line; Result.Column = Row.Column; + Result.StartFileName = StartFileName; Result.StartLine = StartLine; Lines.push_back(std::make_pair(Row.Address.Address, Result)); } @@ -1318,6 +1329,7 @@ DWARFContext::getInliningInfoForAddress(object::SectionedAddress Address, Frame.FunctionName = Name; if (auto DeclLineResult = FunctionDIE.getDeclLine()) Frame.StartLine = DeclLineResult; + Frame.StartFileName = FunctionDIE.getDeclFile(Spec.FLIKind); if (Spec.FLIKind != FileLineInfoKind::None) { if (i == 0) { // For the topmost frame, initialize the line table of this diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 116f72a1d58baa..31340077a126d7 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -557,6 +557,17 @@ uint64_t DWARFDie::getDeclLine() const { return toUnsigned(findRecursively(DW_AT_decl_line), 0); } +std::string +DWARFDie::getDeclFile(DILineInfoSpecifier::FileLineInfoKind Kind) const { + std::string FileName; + if (auto DeclFile = toUnsigned(findRecursively(DW_AT_decl_file))) { + if (const auto *LT = U->getContext().getLineTableForUnit(U)) { + LT->getFileNameByIndex(*DeclFile, U->getCompilationDir(), Kind, FileName); + } + } + return FileName; +} + void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, uint32_t &CallColumn, uint32_t &CallDiscriminator) const { diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp index 10352237763c9f..01dc31d8496571 100644 --- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -84,8 +84,10 @@ void DIPrinter::print(const DILineInfo &Info, bool Inlined) { return; } OS << " Filename: " << Filename << "\n"; - if (Info.StartLine) - OS << "Function start line: " << Info.StartLine << "\n"; + if (Info.StartLine) { + OS << " Function start filename: " << Info.StartFileName << "\n"; + OS << " Function start line: " << Info.StartLine << "\n"; + } OS << " Line: " << Info.Line << "\n"; OS << " Column: " << Info.Column << "\n"; if (Info.Discriminator) diff --git a/llvm/lib/Extensions/Extensions.cpp b/llvm/lib/Extensions/Extensions.cpp index e69de29bb2d1d6..0d25cbda38e004 100644 --- a/llvm/lib/Extensions/Extensions.cpp +++ b/llvm/lib/Extensions/Extensions.cpp @@ -0,0 +1,15 @@ +#include "llvm/Passes/PassPlugin.h" +#define HANDLE_EXTENSION(Ext) \ + llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); +#include "llvm/Support/Extension.def" + + +namespace llvm { + namespace details { + void extensions_anchor() { +#define HANDLE_EXTENSION(Ext) \ + get##Ext##PluginInfo(); +#include "llvm/Support/Extension.def" + } + } +} diff --git a/llvm/lib/Extensions/LLVMBuild.txt b/llvm/lib/Extensions/LLVMBuild.txt index 2005830a4dd7ad..7a98c8f680513a 100644 --- a/llvm/lib/Extensions/LLVMBuild.txt +++ b/llvm/lib/Extensions/LLVMBuild.txt @@ -18,4 +18,4 @@ type = Library name = Extensions parent = Libraries -required_libraries = +required_libraries = Support diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index ca29548a4d7ca5..65d8669604950e 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -350,7 +350,7 @@ static cl::opt EmbedBitcode( "lto-embed-bitcode", cl::init(false), cl::desc("Embed LLVM bitcode in object files produced by LTO")); -static void EmitBitcodeSection(Module &M, const Config &Conf) { +static void EmitBitcodeSection(Module &M) { if (!EmbedBitcode) return; SmallVector Buffer; @@ -369,7 +369,7 @@ void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream, if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) return; - EmitBitcodeSection(Mod, Conf); + EmitBitcodeSection(Mod); std::unique_ptr DwoOut; SmallString<1024> DwoFile(Conf.SplitDwarfOutput); diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 45165ffe3cac00..4d62174f7e5e46 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -127,7 +127,7 @@ struct StructInfo { std::vector Fields; StringMap FieldsByName; - FieldInfo &addField(StringRef FieldName, FieldType FT); + FieldInfo &addField(StringRef FieldName, FieldType FT, size_t FieldSize); StructInfo() = default; @@ -330,7 +330,8 @@ struct FieldInfo { FieldInfo(FieldType FT) : Contents(FT) {} }; -FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT) { +FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT, + size_t FieldSize) { if (!FieldName.empty()) FieldsByName[FieldName] = Fields.size(); Fields.emplace_back(FT); @@ -338,7 +339,7 @@ FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT) { if (IsUnion) { Field.Offset = 0; } else { - Size = llvm::alignTo(Size, Alignment); + Size = llvm::alignTo(Size, std::min(Alignment, FieldSize)); Field.Offset = Size; } return Field; @@ -759,13 +760,14 @@ class MasmParser : public MCAsmParser { // "real4", "real8" bool emitRealValues(const fltSemantics &Semantics); - bool addRealField(StringRef Name, const fltSemantics &Semantics); - bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics); + bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size); + bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics, + size_t Size); bool parseRealInstList( const fltSemantics &Semantics, SmallVectorImpl &Values, const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); bool parseDirectiveNamedRealValue(StringRef IDVal, - const fltSemantics &Semantics, + const fltSemantics &Semantics, size_t Size, StringRef Name, SMLoc NameLoc); bool parseOptionalAngleBracketOpen(); @@ -1314,7 +1316,7 @@ bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) { /// primaryexpr ::= symbol /// primaryexpr ::= number /// primaryexpr ::= '.' -/// primaryexpr ::= ~,+,- primaryexpr +/// primaryexpr ::= ~,+,-,'not' primaryexpr bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { SMLoc FirstTokenLoc = getLexer().getLoc(); AsmToken::TokenKind FirstTokenKind = Lexer.getKind(); @@ -1352,6 +1354,13 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { return Error(FirstTokenLoc, "invalid token in expression"); } } + // Parse named bitwise negation. + if (Identifier.equals_lower("not")) { + if (parsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); + return false; + } // Parse symbol variant. std::pair Split; if (!MAI.useParensForSymbolVariant()) { @@ -1772,8 +1781,18 @@ bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc) { SMLoc StartLoc = Lexer.getLoc(); while (true) { + AsmToken::TokenKind TokKind = Lexer.getKind(); + if (Lexer.getKind() == AsmToken::Identifier) { + StringRef Identifier = Lexer.getTok().getString(); + if (Identifier.equals_lower("and")) + TokKind = AsmToken::Amp; + else if (Identifier.equals_lower("not")) + TokKind = AsmToken::Exclaim; + else if (Identifier.equals_lower("or")) + TokKind = AsmToken::Pipe; + } MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add; - unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind); + unsigned TokPrec = getBinOpPrecedence(TokKind, Kind); // If the next token is lower precedence than we are allowed to eat, return // successfully with what we ate already. @@ -2101,9 +2120,9 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, case DK_DQ: return parseDirectiveValue(IDVal, 8); case DK_REAL4: - return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle()); + return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4); case DK_REAL8: - return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble()); + return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8); case DK_STRUCT: case DK_UNION: return parseDirectiveNestedStruct(IDVal, DirKind); @@ -2326,12 +2345,12 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc); case DK_REAL4: Lex(); - return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), IDVal, - IDLoc); + return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4, + IDVal, IDLoc); case DK_REAL8: Lex(); - return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), IDVal, - IDLoc); + return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8, + IDVal, IDLoc); case DK_STRUCT: case DK_UNION: Lex(); @@ -3057,6 +3076,11 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name, SMLoc EndLoc, StartLoc = Lexer.getLoc(); if (parseExpression(Expr, EndLoc)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name); + Sym->setRedefinable(Var.Redefinable); + Sym->setVariableValue(Expr); + Sym->setExternal(false); + if (Expr->evaluateAsAbsolute(Var.NumericValue, getStreamer().getAssemblerPtr())) return false; @@ -3229,7 +3253,7 @@ bool MasmParser::parseScalarInitializer(unsigned Size, Lex(); } else { const MCExpr *Value; - if (checkForValidSection() || parseExpression(Value)) + if (parseExpression(Value)) return true; if (getTok().is(AsmToken::Identifier) && getTok().getString().equals_lower("dup")) { @@ -3289,7 +3313,7 @@ bool MasmParser::emitIntegralValues(unsigned Size) { // Add a field to the current structure. bool MasmParser::addIntegralField(StringRef Name, unsigned Size) { StructInfo &Struct = StructInProgress.back(); - FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL); + FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size); IntFieldInfo &IntInfo = Field.Contents.IntInfo; Field.Type = Size; @@ -3449,6 +3473,9 @@ bool MasmParser::parseRealInstList(const fltSemantics &Semantics, // Initialize real data values. bool MasmParser::emitRealValues(const fltSemantics &Semantics) { + if (checkForValidSection()) + return true; + SmallVector ValuesAsInt; if (parseRealInstList(Semantics, ValuesAsInt)) return true; @@ -3461,15 +3488,15 @@ bool MasmParser::emitRealValues(const fltSemantics &Semantics) { } // Add a real field to the current struct. -bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) { +bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics, + size_t Size) { StructInfo &Struct = StructInProgress.back(); - FieldInfo &Field = Struct.addField(Name, FT_REAL); + FieldInfo &Field = Struct.addField(Name, FT_REAL, Size); RealFieldInfo &RealInfo = Field.Contents.RealInfo; Field.SizeOf = 0; - if (checkForValidSection() || - parseRealInstList(Semantics, RealInfo.AsIntValues)) + if (parseRealInstList(Semantics, RealInfo.AsIntValues)) return true; Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8; @@ -3485,15 +3512,13 @@ bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) { /// parseDirectiveRealValue /// ::= (real4 | real8) [ expression (, expression)* ] bool MasmParser::parseDirectiveRealValue(StringRef IDVal, - const fltSemantics &Semantics) { - if (checkForValidSection()) - return true; - + const fltSemantics &Semantics, + size_t Size) { if (StructInProgress.empty()) { // Initialize data value. if (emitRealValues(Semantics)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); - } else if (addRealField("", Semantics)) { + } else if (addRealField("", Semantics, Size)) { return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); } return false; @@ -3503,17 +3528,15 @@ bool MasmParser::parseDirectiveRealValue(StringRef IDVal, /// ::= name (real4 | real8) [ expression (, expression)* ] bool MasmParser::parseDirectiveNamedRealValue(StringRef IDVal, const fltSemantics &Semantics, - StringRef Name, SMLoc NameLoc) { - if (checkForValidSection()) - return true; - + size_t Size, StringRef Name, + SMLoc NameLoc) { if (StructInProgress.empty()) { // Initialize named data value. MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().emitLabel(Sym); if (emitRealValues(Semantics)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); - } else if (addRealField(Name, Semantics)) { + } else if (addRealField(Name, Semantics, Size)) { return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); } return false; @@ -3943,7 +3966,7 @@ bool MasmParser::emitStructValues(const StructInfo &Structure) { // Declare a field in the current struct. bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) { StructInfo &OwningStruct = StructInProgress.back(); - FieldInfo &Field = OwningStruct.addField(Name, FT_STRUCT); + FieldInfo &Field = OwningStruct.addField(Name, FT_STRUCT, Structure.Size); StructFieldInfo &StructInfo = Field.Contents.StructInfo; StructInfo.Structure = Structure; @@ -4117,7 +4140,8 @@ bool MasmParser::parseDirectiveNestedEnds() { else ParentStruct.Size += Structure.Size; } else { - FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT); + FieldInfo &Field = + ParentStruct.addField(Structure.Name, FT_STRUCT, Structure.Size); StructFieldInfo &StructInfo = Field.Contents.StructInfo; Field.Type = Structure.Size; Field.LengthOf = 1; diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp index bf29f40579ceb2..b634f7c123e8d5 100644 --- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp +++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp @@ -190,7 +190,7 @@ Error DWARFYAML::emitDebugAranges(raw_ostream &OS, const DWARFYAML::Data &DI) { Error DWARFYAML::emitDebugRanges(raw_ostream &OS, const DWARFYAML::Data &DI) { const size_t RangesOffset = OS.tell(); uint64_t EntryIndex = 0; - for (auto DebugRanges : DI.DebugRanges) { + for (auto DebugRanges : *DI.DebugRanges) { const size_t CurrOffset = OS.tell() - RangesOffset; if (DebugRanges.Offset && (uint64_t)*DebugRanges.Offset < CurrOffset) return createStringError(errc::invalid_argument, diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp index 353e5058a0e5df..975b9b40b6b188 100644 --- a/llvm/lib/ObjectYAML/DWARFYAML.cpp +++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp @@ -28,7 +28,7 @@ SetVector DWARFYAML::Data::getNonEmptySectionNames() const { SecNames.insert("debug_str"); if (DebugAranges) SecNames.insert("debug_aranges"); - if (!DebugRanges.empty()) + if (DebugRanges) SecNames.insert("debug_ranges"); if (!DebugLines.empty()) SecNames.insert("debug_line"); @@ -95,8 +95,7 @@ void MappingTraits::mapping(IO &IO, DWARFYAML::Data &DWARF) { IO.mapOptional("debug_str", DWARF.DebugStrings); IO.mapOptional("debug_abbrev", DWARF.DebugAbbrev); IO.mapOptional("debug_aranges", DWARF.DebugAranges); - if (!DWARF.DebugRanges.empty() || !IO.outputting()) - IO.mapOptional("debug_ranges", DWARF.DebugRanges); + IO.mapOptional("debug_ranges", DWARF.DebugRanges); IO.mapOptional("debug_pubnames", DWARF.PubNames); IO.mapOptional("debug_pubtypes", DWARF.PubTypes); DWARFCtx.IsGNUPubSec = true; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 9df6a985789eaa..9a2e895d7b7176 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1160,11 +1160,14 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. - OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions(). - forwardSwitchCondToPhi(true). - convertSwitchToLookupTable(true). - needCanonicalLoops(false). - sinkCommonInsts(true))); + // FIXME: study whether hoisting and/or sinking of common instructions should + // be delayed until after SLP vectorizer. + OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions() + .forwardSwitchCondToPhi(true) + .convertSwitchToLookupTable(true) + .needCanonicalLoops(false) + .hoistCommonInsts(true) + .sinkCommonInsts(true))); // Optimize parallel scalar instruction chains into SIMD instructions. if (PTO.SLPVectorization) diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp index 7b97723da60cc3..f8c576d305f05e 100644 --- a/llvm/lib/ProfileData/GCOV.cpp +++ b/llvm/lib/ProfileData/GCOV.cpp @@ -108,11 +108,10 @@ bool GCOVFile::readGCNO(GCOVBuffer &buf) { for (uint32_t i = 0, e = (length - 1) / 2; i != e; ++i) { uint32_t dstNo = buf.getWord(), flags = buf.getWord(); GCOVBlock *dst = fn->Blocks[dstNo].get(); - auto arc = - std::make_unique(*src, *dst, flags & GCOV_ARC_FALLTHROUGH); + auto arc = std::make_unique(*src, *dst, flags); src->addDstEdge(arc.get()); dst->addSrcEdge(arc.get()); - if (flags & GCOV_ARC_ON_TREE) + if (arc->onTree()) fn->treeArcs.push_back(std::move(arc)); else fn->arcs.push_back(std::move(arc)); @@ -221,10 +220,18 @@ bool GCOVFile::readGCDA(GCOVBuffer &buf) { for (std::unique_ptr &arc : fn->arcs) { if (!buf.readInt64(arc->Count)) return false; - // FIXME Fix counters arc->src.Counter += arc->Count; - if (arc->dst.succ.empty()) - arc->dst.Counter += arc->Count; + } + + if (fn->Blocks.size() >= 2) { + GCOVBlock &src = *fn->Blocks[0]; + GCOVBlock &sink = + Version < GCOV::V408 ? *fn->Blocks.back() : *fn->Blocks[1]; + auto arc = std::make_unique(sink, src, GCOV_ARC_ON_TREE); + sink.addDstEdge(arc.get()); + src.addSrcEdge(arc.get()); + fn->treeArcs.push_back(std::move(arc)); + fn->propagateCounts(src, nullptr); } } pos += 4 * length; @@ -260,6 +267,8 @@ void GCOVFile::collectLineCounts(FileInfo &fi) { fi.setProgramCount(ProgramCount); } +bool GCOVArc::onTree() const { return flags & GCOV_ARC_ON_TREE; } + //===----------------------------------------------------------------------===// // GCOVFunction implementation. @@ -271,10 +280,27 @@ uint64_t GCOVFunction::getEntryCount() const { return Blocks.front()->getCount(); } -/// getExitCount - Get the number of times the function returned by retrieving -/// the exit block's count. -uint64_t GCOVFunction::getExitCount() const { - return Blocks.back()->getCount(); +GCOVBlock &GCOVFunction::getExitBlock() const { + return file.getVersion() < GCOV::V408 ? *Blocks.back() : *Blocks[1]; +} + +// For each basic block, the sum of incoming edge counts equals the sum of +// outgoing edge counts by Kirchoff's circuit law. If the unmeasured arcs form a +// spanning tree, the count for each unmeasured arc (GCOV_ARC_ON_TREE) can be +// uniquely identified. +uint64_t GCOVFunction::propagateCounts(const GCOVBlock &v, GCOVArc *pred) { + uint64_t excess = 0; + for (GCOVArc *e : v.srcs()) + if (e != pred) + excess += e->onTree() ? propagateCounts(e->src, e) : e->Count; + for (GCOVArc *e : v.dsts()) + if (e != pred) + excess -= e->onTree() ? propagateCounts(e->dst, e) : e->Count; + if (int64_t(excess) < 0) + excess = -excess; + if (pred) + pred->Count = excess; + return excess; } void GCOVFunction::print(raw_ostream &OS) const { @@ -322,8 +348,11 @@ void GCOVBlock::print(raw_ostream &OS) const { } if (!succ.empty()) { OS << "\tDestination Edges : "; - for (const GCOVArc *Edge : succ) + for (const GCOVArc *Edge : succ) { + if (Edge->flags & GCOV_ARC_ON_TREE) + OS << '*'; OS << Edge->dst.Number << " (" << Edge->Count << "), "; + } OS << "\n"; } if (!Lines.empty()) { @@ -437,41 +466,40 @@ void GCOVBlock::getCyclesCount(const BlockVector &Blocks, uint64_t &Count) { } /// Get the count for the list of blocks which lie on the same line. -uint64_t GCOVBlock::getLineCount(const BlockVector &Blocks) { - uint64_t Count = 0; - - for (auto Block : Blocks) { - if (Block->getNumSrcEdges() == 0) { - // The block has no predecessors and a non-null counter - // (can be the case with entry block in functions). - Count += Block->getCount(); +uint64_t GCOVBlock::getLineCount(const BlockVector &blocks) { + uint64_t count = 0; + for (const GCOVBlock *block : blocks) { + if (block->Number == 0) { + // For nonstandard control flows, arcs into the exit block may be + // duplicately counted (fork) or not be counted (abnormal exit), and thus + // the (exit,entry) counter may be inaccurate. Count the entry block with + // the outgoing arcs. + for (const GCOVArc *arc : block->succ) + count += arc->Count; } else { // Add counts from predecessors that are not on the same line. - for (auto E : Block->srcs()) { - const GCOVBlock *W = &E->src; - if (find(Blocks, W) == Blocks.end()) { - Count += E->Count; - } - } - } - for (auto E : Block->dsts()) { - E->CyclesCount = E->Count; + for (const GCOVArc *arc : block->pred) + if (!llvm::is_contained(blocks, &arc->src)) + count += arc->Count; } + for (GCOVArc *arc : block->succ) + arc->CyclesCount = arc->Count; } - GCOVBlock::getCyclesCount(Blocks, Count); - - return Count; + GCOVBlock::getCyclesCount(blocks, count); + return count; } //===----------------------------------------------------------------------===// // FileInfo implementation. -// Safe integer division, returns 0 if numerator is 0. -static uint32_t safeDiv(uint64_t Numerator, uint64_t Divisor) { - if (!Numerator) +// Format dividend/divisor as a percentage. Return 1 if the result is greater +// than 0% and less than 1%. +static uint32_t formatPercentage(uint64_t dividend, uint64_t divisor) { + if (!dividend || !divisor) return 0; - return Numerator / Divisor; + dividend *= 100; + return dividend < divisor ? 1 : dividend / divisor; } // This custom division function mimics gcov's branch ouputs: @@ -794,14 +822,18 @@ void FileInfo::printFunctionSummary(raw_ostream &OS, for (const GCOVFunction *Func : Funcs) { uint64_t EntryCount = Func->getEntryCount(); uint32_t BlocksExec = 0; + const GCOVBlock &ExitBlock = Func->getExitBlock(); + uint64_t exitCount = 0; + for (const GCOVArc *arc : ExitBlock.pred) + exitCount += arc->Count; for (const GCOVBlock &Block : Func->blocks()) - if (Block.getNumDstEdges() && Block.getCount()) + if (Block.Number != 0 && &Block != &ExitBlock && Block.getCount()) ++BlocksExec; OS << "function " << Func->getName() << " called " << EntryCount - << " returned " << safeDiv(Func->getExitCount() * 100, EntryCount) + << " returned " << formatPercentage(exitCount, EntryCount) << "% blocks executed " - << safeDiv(BlocksExec * 100, Func->getNumBlocks() - 1) << "%\n"; + << formatPercentage(BlocksExec, Func->getNumBlocks() - 2) << "%\n"; } } diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index aad50e1240341d..03843687c10a49 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -115,13 +115,13 @@ KnownBits KnownBits::umax(const KnownBits &LHS, const KnownBits &RHS) { KnownBits KnownBits::umin(const KnownBits &LHS, const KnownBits &RHS) { // Flip the range of values: [0, 0xFFFFFFFF] <-> [0xFFFFFFFF, 0] - auto Flip = [](KnownBits Val) { return KnownBits(Val.One, Val.Zero); }; + auto Flip = [](const KnownBits &Val) { return KnownBits(Val.One, Val.Zero); }; return Flip(umax(Flip(LHS), Flip(RHS))); } KnownBits KnownBits::smax(const KnownBits &LHS, const KnownBits &RHS) { // Flip the range of values: [-0x80000000, 0x7FFFFFFF] <-> [0, 0xFFFFFFFF] - auto Flip = [](KnownBits Val) { + auto Flip = [](const KnownBits &Val) { unsigned SignBitPosition = Val.getBitWidth() - 1; APInt Zero = Val.Zero; APInt One = Val.One; @@ -134,7 +134,7 @@ KnownBits KnownBits::smax(const KnownBits &LHS, const KnownBits &RHS) { KnownBits KnownBits::smin(const KnownBits &LHS, const KnownBits &RHS) { // Flip the range of values: [-0x80000000, 0x7FFFFFFF] <-> [0xFFFFFFFF, 0] - auto Flip = [](KnownBits Val) { + auto Flip = [](const KnownBits &Val) { unsigned SignBitPosition = Val.getBitWidth() - 1; APInt Zero = Val.One; APInt One = Val.Zero; diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp index a5af98582452b3..b7d9bd4f865c90 100644 --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -529,7 +529,7 @@ static constexpr FeatureBitset ImpliedFeaturesAVX5124FMAPS = {}; static constexpr FeatureBitset ImpliedFeaturesAVX5124VNNIW = {}; // SSE4_A->FMA4->XOP chain. -static constexpr FeatureBitset ImpliedFeaturesSSE4_A = FeatureSSSE3; +static constexpr FeatureBitset ImpliedFeaturesSSE4_A = FeatureSSE3; static constexpr FeatureBitset ImpliedFeaturesFMA4 = FeatureAVX | FeatureSSE4_A; static constexpr FeatureBitset ImpliedFeaturesXOP = FeatureFMA4; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 2f1317d8f1ea88..b3694411966b56 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -734,3 +734,19 @@ unsigned AArch64RegisterInfo::getLocalAddressRegister( return getBaseRegister(); return getFrameRegister(MF); } + +/// SrcRC and DstRC will be morphed into NewRC if this returns true +bool AArch64RegisterInfo::shouldCoalesce( + MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, + const TargetRegisterClass *DstRC, unsigned DstSubReg, + const TargetRegisterClass *NewRC, LiveIntervals &LIS) const { + if (MI->isCopy() && + ((DstRC->getID() == AArch64::GPR64RegClassID) || + (DstRC->getID() == AArch64::GPR64commonRegClassID)) && + MI->getOperand(0).getSubReg() && MI->getOperand(1).getSubReg()) + // Do not coalesce in the case of a 32-bit subregister copy + // which implements a 32 to 64 bit zero extension + // which relies on the upper 32 bits being zeroed. + return false; + return true; +} diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index e3c8a77f433f84..d7580d7b683303 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -129,6 +129,12 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo { unsigned getLocalAddressRegister(const MachineFunction &MF) const; bool regNeedsCFI(unsigned Reg, unsigned &RegToUseForCFI) const; + + /// SrcRC and DstRC will be morphed into NewRC if this returns true + bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, + unsigned SubReg, const TargetRegisterClass *DstRC, + unsigned DstSubReg, const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 8b15898c1c1408..d7a14a3dc77281 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -455,6 +455,7 @@ void AArch64PassConfig::addIRPasses() { .forwardSwitchCondToPhi(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) + .hoistCommonInsts(true) .sinkCommonInsts(true))); // Run LoopDataPrefetch diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 9c2f2e7eecd14f..840208169168e4 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -34,6 +34,7 @@ #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -1215,6 +1216,350 @@ bool AMDGPUDisassembler::isGFX10() const { return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; } +//===----------------------------------------------------------------------===// +// AMDGPU specific symbol handling +//===----------------------------------------------------------------------===// +#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ + do { \ + KdStream << Indent << DIRECTIVE " " \ + << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ + } while (0) + +// NOLINTNEXTLINE(readability-identifier-naming) +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( + uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { + using namespace amdhsa; + StringRef Indent = "\t"; + + // We cannot accurately backward compute #VGPRs used from + // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same + // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we + // simply calculate the inverse of what the assembler does. + + uint32_t GranulatedWorkitemVGPRCount = + (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >> + COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT; + + uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) * + AMDGPU::IsaInfo::getVGPREncodingGranule(&STI); + + KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n'; + + // We cannot backward compute values used to calculate + // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following + // directives can't be computed: + // .amdhsa_reserve_vcc + // .amdhsa_reserve_flat_scratch + // .amdhsa_reserve_xnack_mask + // They take their respective default values if not specified in the assembly. + // + // GRANULATED_WAVEFRONT_SGPR_COUNT + // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK) + // + // We compute the inverse as though all directives apart from NEXT_FREE_SGPR + // are set to 0. So while disassembling we consider that: + // + // GRANULATED_WAVEFRONT_SGPR_COUNT + // = f(NEXT_FREE_SGPR + 0 + 0 + 0) + // + // The disassembler cannot recover the original values of those 3 directives. + + uint32_t GranulatedWavefrontSGPRCount = + (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >> + COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT; + + if (isGFX10() && GranulatedWavefrontSGPRCount) + return MCDisassembler::Fail; + + uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) * + AMDGPU::IsaInfo::getSGPREncodingGranule(&STI); + + KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; + KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; + KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; + KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_float_round_mode_32", + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); + PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64", + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); + PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32", + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); + PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64", + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0) + return MCDisassembler::Fail; + + if (isGFX10()) { + PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode", + COMPUTE_PGM_RSRC1_WGP_MODE); + PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED); + PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS); + } + return MCDisassembler::Success; +} + +// NOLINTNEXTLINE(readability-identifier-naming) +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( + uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { + using namespace amdhsa; + StringRef Indent = "\t"; + PRINT_DIRECTIVE( + ".amdhsa_system_sgpr_private_segment_wavefront_offset", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); + PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id", + COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE( + ".amdhsa_exception_fp_ieee_invalid_op", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); + PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); + PRINT_DIRECTIVE( + ".amdhsa_exception_fp_ieee_div_zero", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); + PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0) + return MCDisassembler::Fail; + + return MCDisassembler::Success; +} + +#undef PRINT_DIRECTIVE + +MCDisassembler::DecodeStatus +AMDGPUDisassembler::decodeKernelDescriptorDirective( + DataExtractor::Cursor &Cursor, ArrayRef Bytes, + raw_string_ostream &KdStream) const { +#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ + do { \ + KdStream << Indent << DIRECTIVE " " \ + << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ + } while (0) + + uint16_t TwoByteBuffer = 0; + uint32_t FourByteBuffer = 0; + uint64_t EightByteBuffer = 0; + + StringRef ReservedBytes; + StringRef Indent = "\t"; + + assert(Bytes.size() == 64); + DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8); + + switch (Cursor.tell()) { + case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer + << '\n'; + return MCDisassembler::Success; + + case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + KdStream << Indent << ".amdhsa_private_segment_fixed_size " + << FourByteBuffer << '\n'; + return MCDisassembler::Success; + + case amdhsa::RESERVED0_OFFSET: + // 8 reserved bytes, must be 0. + EightByteBuffer = DE.getU64(Cursor); + if (EightByteBuffer) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET: + // KERNEL_CODE_ENTRY_BYTE_OFFSET + // So far no directive controls this for Code Object V3, so simply skip for + // disassembly. + DE.skip(Cursor, 8); + return MCDisassembler::Success; + + case amdhsa::RESERVED1_OFFSET: + // 20 reserved bytes, must be 0. + ReservedBytes = DE.getBytes(Cursor, 20); + for (int I = 0; I < 20; ++I) { + if (ReservedBytes[I] != 0) { + return MCDisassembler::Fail; + } + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC3_OFFSET: + // COMPUTE_PGM_RSRC3 + // - Only set for GFX10, GFX6-9 have this to be 0. + // - Currently no directives directly control this. + FourByteBuffer = DE.getU32(Cursor); + if (!isGFX10() && FourByteBuffer) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC1_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) == + MCDisassembler::Fail) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC2_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) == + MCDisassembler::Fail) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET: + using namespace amdhsa; + TwoByteBuffer = DE.getU16(Cursor); + + PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); + + if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0) + return MCDisassembler::Fail; + + // Reserved for GFX9 + if (isGFX9() && + (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) { + return MCDisassembler::Fail; + } else if (isGFX10()) { + PRINT_DIRECTIVE(".amdhsa_wavefront_size32", + KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); + } + + if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) + return MCDisassembler::Fail; + + return MCDisassembler::Success; + + case amdhsa::RESERVED2_OFFSET: + // 6 bytes from here are reserved, must be 0. + ReservedBytes = DE.getBytes(Cursor, 6); + for (int I = 0; I < 6; ++I) { + if (ReservedBytes[I] != 0) + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + default: + llvm_unreachable("Unhandled index. Case statements cover everything."); + return MCDisassembler::Fail; + } +#undef PRINT_DIRECTIVE +} + +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( + StringRef KdName, ArrayRef Bytes, uint64_t KdAddress) const { + // CP microcode requires the kernel descriptor to be 64 aligned. + if (Bytes.size() != 64 || KdAddress % 64 != 0) + return MCDisassembler::Fail; + + std::string Kd; + raw_string_ostream KdStream(Kd); + KdStream << ".amdhsa_kernel " << KdName << '\n'; + + DataExtractor::Cursor C(0); + while (C && C.tell() < Bytes.size()) { + MCDisassembler::DecodeStatus Status = + decodeKernelDescriptorDirective(C, Bytes, KdStream); + + cantFail(C.takeError()); + + if (Status == MCDisassembler::Fail) + return MCDisassembler::Fail; + } + KdStream << ".end_amdhsa_kernel\n"; + outs() << KdStream.str(); + return MCDisassembler::Success; +} + +Optional +AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &CStream) const { + // Right now only kernel descriptor needs to be handled. + // We ignore all other symbols for target specific handling. + // TODO: + // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code + // Object V2 and V3 when symbols are marked protected. + + // amd_kernel_code_t for Code Object V2. + if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) { + Size = 256; + return MCDisassembler::Fail; + } + + // Code Object V3 kernel descriptors. + StringRef Name = Symbol.Name; + if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) { + Size = 64; // Size = 64 regardless of success or failure. + return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address); + } + return None; +} + //===----------------------------------------------------------------------===// // AMDGPUSymbolizer //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index f975af409a096c..315602c35288c2 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -17,10 +17,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/DataExtractor.h" #include #include @@ -66,6 +67,33 @@ class AMDGPUDisassembler : public MCDisassembler { DecodeStatus tryDecodeInst(const uint8_t* Table, MCInst &MI, uint64_t Inst, uint64_t Address) const; + Optional onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &CStream) const override; + + DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef Bytes, + uint64_t KdAddress) const; + + DecodeStatus + decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, + ArrayRef Bytes, + raw_string_ostream &KdStream) const; + + /// Decode as directives that handle COMPUTE_PGM_RSRC1. + /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC1. + /// \param KdStream - Stream to write the disassembled directives to. + // NOLINTNEXTLINE(readability-identifier-naming) + DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, + raw_string_ostream &KdStream) const; + + /// Decode as directives that handle COMPUTE_PGM_RSRC2. + /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC2. + /// \param KdStream - Stream to write the disassembled directives to. + // NOLINTNEXTLINE(readability-identifier-naming) + DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, + raw_string_ostream &KdStream) const; + DecodeStatus convertSDWAInst(MCInst &MI) const; DecodeStatus convertDPP8Inst(MCInst &MI) const; DecodeStatus convertMIMGInst(MCInst &MI) const; diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 0246c6508e9f20..914668f2b68a21 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -113,6 +113,8 @@ class SILowerControlFlow : public MachineFunctionPass { void combineMasks(MachineInstr &MI); + bool removeMBBifRedundant(MachineBasicBlock &MBB); + void process(MachineInstr &MI); // Skip to the next instruction, ignoring debug instructions, and trivial @@ -154,9 +156,6 @@ class SILowerControlFlow : public MachineFunctionPass { AU.addPreserved(); AU.addPreserved(); AU.addPreservedID(LiveVariablesID); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -604,6 +603,7 @@ void SILowerControlFlow::optimizeEndCf() { if (LIS) LIS->RemoveMachineInstrFromMaps(*MI); MI->eraseFromParent(); + removeMBBifRedundant(MBB); } } } @@ -658,6 +658,47 @@ void SILowerControlFlow::process(MachineInstr &MI) { } } +bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) { + bool Redundant = true; + for (auto &I : MBB.instrs()) { + if (!I.isDebugInstr() && !I.isUnconditionalBranch()) + Redundant = false; + } + if (Redundant) { + MachineBasicBlock *Succ = *MBB.succ_begin(); + SmallVector Preds(MBB.predecessors()); + for (auto P : Preds) { + P->replaceSuccessor(&MBB, Succ); + MachineBasicBlock::iterator I(P->getFirstInstrTerminator()); + while (I != P->end()) { + if (I->isBranch()) { + if (TII->getBranchDestBlock(*I) == &MBB) { + I->getOperand(0).setMBB(Succ); + break; + } + } + I++; + } + if (I == P->end()) { + MachineFunction *MF = P->getParent(); + MachineFunction::iterator InsertPt = + P->getNextNode() ? MachineFunction::iterator(P->getNextNode()) + : MF->end(); + MF->splice(InsertPt, Succ); + } + } + MBB.removeSuccessor(Succ); + if (LIS) { + for (auto &I : MBB.instrs()) + LIS->RemoveMachineInstrFromMaps(I); + } + MBB.clear(); + MBB.eraseFromParent(); + return true; + } + return false; +} + bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index dd7b520effa86c..d7d51fdd29ca88 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -6134,3 +6134,12 @@ bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault( MachineFunction &MF) const { return Subtarget.isMClass() && MF.getFunction().hasMinSize(); } + +bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const { + // Try hard to rematerialize any VCTPs because if we spill P0, it will block + // the tail predication conversion. This means that the element count + // register has to be live for longer, but that has to be better than + // spill/restore and VPT predication. + return isVCTP(&MI) && !isPredicated(MI); +} diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 53c627c2093433..5bf6e880056def 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -452,6 +452,9 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { MachineInstr *canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII) const; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const override; + private: /// Modeling special VFP / NEON fp MLA / MLS hazards. @@ -635,8 +638,7 @@ static inline unsigned getTailPredVectorWidth(unsigned Opcode) { return 0; } -static inline -bool isVCTP(MachineInstr *MI) { +static inline bool isVCTP(const MachineInstr *MI) { switch (MI->getOpcode()) { default: break; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 75543093bcbfe6..2287edeef7662f 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -5710,6 +5710,7 @@ def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>; def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>; def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>; +let isReMaterializable = 1 in class MVE_VCTPInst size, list pattern=[]> : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix, "$Rn", vpred_n, "", pattern> { diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index a98590fd79c685..69e188fe5f888c 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -527,7 +527,12 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { }; MBB = VCTP->getParent(); - if (auto *Def = RDA.getUniqueReachingMIDef(&MBB->back(), NumElements)) { + // Remove modifications to the element count since they have no purpose in a + // tail predicated loop. Explicitly refer to the vctp operand no matter which + // register NumElements has been assigned to, since that is what the + // modifications will be using + if (auto *Def = RDA.getUniqueReachingMIDef(&MBB->back(), + VCTP->getOperand(1).getReg())) { SmallPtrSet ElementChain; SmallPtrSet Ignore = { VCTP }; unsigned ExpectedVectorWidth = getTailPredVectorWidth(VCTP->getOpcode()); diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 55ac332e2c6a61..5068f9b5a0f46e 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -407,7 +407,8 @@ void ARMPassConfig::addIRPasses() { // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) addPass(createCFGSimplificationPass( - SimplifyCFGOptions().sinkCommonInsts(true), [this](const Function &F) { + SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true), + [this](const Function &F) { const auto &ST = this->TM->getSubtarget(F); return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); })); diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h index db5b5633f6d908..1bad0d11fee4ba 100644 --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -16,7 +16,8 @@ #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/DebugHandlerBase.h" -#include "llvm/CodeGen/MachineInstr.h" +#include +#include #include #include #include "BTF.h" @@ -27,9 +28,12 @@ class AsmPrinter; class BTFDebug; class DIType; class GlobalVariable; +class MachineFunction; +class MachineInstr; +class MachineOperand; +class MCInst; class MCStreamer; class MCSymbol; -class MachineFunction; /// The base class for BTF type generation. class BTFTypeBase { diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 8473515b3c758f..9e7176cd94218c 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -487,6 +487,7 @@ class HexagonTargetLowering : public TargetLowering { findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override; + bool shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const; bool isHvxOperation(SDNode *N, SelectionDAG &DAG) const; SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const; void LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl &Results, diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index e5d05cfe64c47b..22561691f0e027 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -1939,16 +1939,36 @@ HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { SDValue Op0 = Op.getOperand(0); MVT ResTy = ty(Op); MVT OpTy = ty(Op0); + + // .-res, op-> Scalar Illegal HVX + // Scalar ok extract(widen) - + // Illegal - widen widen + // HVX - - ok + if (Subtarget.isHVXVectorType(OpTy)) return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Op0); + assert(!isTypeLegal(OpTy) && "HVX-widening a truncate of scalar?"); + MVT WideOpTy = getWideTy(OpTy); SmallVector Concats = {Op0}; for (int i = 0, e = getFactor(OpTy) - 1; i != e; ++i) Concats.push_back(DAG.getUNDEF(OpTy)); SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideOpTy, Concats); - return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Cat); + SDValue V = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Cat); + // If the original result wasn't legal and was supposed to be widened, + // we're done. + if (shouldWidenToHvx(ResTy, DAG)) + return V; + + // The original result type wasn't meant to be widened to HVX, so + // leave it as it is. Standard legalization should be able to deal + // with it (since now it's a result of a target-idendependent ISD + // node). + assert(ResTy.isVector()); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy, + {V, getZero(dl, MVT::i32, DAG)}); } SDValue @@ -2029,11 +2049,15 @@ HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, SDValue Op(N, 0); switch (Opc) { + case ISD::TRUNCATE: { + assert(shouldWidenToHvx(ty(Op.getOperand(0)), DAG) && "Not widening?"); + SDValue T = WidenHvxTruncate(Op, DAG); + Results.push_back(T); + break; + } case ISD::STORE: { - assert( - getPreferredHvxVectorAction(ty(cast(N)->getValue())) == - TargetLoweringBase::TypeWidenVector && - "Not widening?"); + assert(shouldWidenToHvx(ty(cast(N)->getValue()), DAG) && + "Not widening?"); SDValue Store = WidenHvxStore(SDValue(N, 0), DAG); Results.push_back(Store); break; @@ -2061,12 +2085,12 @@ HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, unsigned Opc = N->getOpcode(); SDValue Op(N, 0); switch (Opc) { - case ISD::TRUNCATE: - if (!Subtarget.isHVXVectorType(ty(Op), false)) { - SDValue T = WidenHvxTruncate(Op, DAG); - Results.push_back(T); - } + case ISD::TRUNCATE: { + assert(shouldWidenToHvx(ty(Op), DAG) && "Not widening?"); + SDValue T = WidenHvxTruncate(Op, DAG); + Results.push_back(T); break; + } case ISD::BITCAST: if (isHvxBoolTy(ty(N->getOperand(0)))) { SDValue Op(N, 0); @@ -2103,8 +2127,22 @@ HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return SDValue(); } +bool +HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const { + assert(!Subtarget.isHVXVectorType(Ty, true)); + auto Action = getPreferredHvxVectorAction(Ty); + if (Action == TargetLoweringBase::TypeWidenVector) { + EVT WideTy = getTypeToTransformTo(*DAG.getContext(), Ty); + assert(WideTy.isSimple()); + return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true); + } + return false; +} + bool HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const { + if (!Subtarget.useHVXOps()) + return false; // If the type of any result, or any operand type are HVX vector types, // this is an HVX operation. auto IsHvxTy = [this](EVT Ty) { @@ -2122,15 +2160,7 @@ HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const { if (!Op.getValueType().isSimple()) return false; MVT ValTy = ty(Op); - if (ValTy.isVector()) { - auto Action = getPreferredVectorAction(ValTy); - if (Action == TargetLoweringBase::TypeWidenVector) { - EVT WideTy = getTypeToTransformTo(*DAG.getContext(), ValTy); - assert(WideTy.isSimple()); - return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true); - } - } - return false; + return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG); }; for (int i = 0, e = N->getNumValues(); i != e; ++i) { diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index b656a845b1526c..c9435cd21c2e0b 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -407,7 +407,7 @@ let Predicates = [UseHVX] in { def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(VecI8 (vpackl HVI16:$Vs)), (V6_vdealb HvxVR:$Vs)>; - def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w HvxVR:$Vs, (IMPLICIT_DEF))>; + def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>; def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>; def: Pat<(VecI16 (bswap HVI16:$Vs)), diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 6728306db3d571..37cf391c99838c 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -327,6 +327,7 @@ void HexagonPassConfig::addIRPasses() { .forwardSwitchCondToPhi(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) + .hoistCommonInsts(true) .sinkCommonInsts(true))); if (EnableLoopPrefetch) addPass(createLoopDataPrefetchPass()); diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 0c5df4ba1bade7..03933d82057663 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -40,8 +40,6 @@ namespace llvm { class Argument; -class CCState; -class CCValAssign; class FastISel; class FunctionLoweringInfo; class MachineBasicBlock; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index b213abb57aa833..f542a8018b4f0d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1199,6 +1199,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLibcallName(RTLIB::SRA_I128, nullptr); } + if (!isPPC64) + setMaxAtomicSizeInBitsSupported(32); + setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); // We have target-specific dag combine patterns for the following nodes: @@ -8219,8 +8222,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); EVT DstSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); - SDValue Sel = - DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, Chain, true); + SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, + SDNodeFlags(), Chain, true); Chain = Sel.getValue(1); SDValue FltOfs = DAG.getSelect( diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 0732e0f0ace362..2423bca42e8052 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -4660,7 +4660,7 @@ MachineInstr *PPCInstrInfo::findLoopInstr( bool PPCInstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const { - if (!LdSt.mayLoadOrStore()) + if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3) return false; // Handle only loads/stores with base register followed by immediate offset. diff --git a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp index f390ddb89e3c9a..1f3dead6101121 100644 --- a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp @@ -8,10 +8,8 @@ #include "RISCVMatInt.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/MachineValueType.h" +#include "llvm/ADT/APInt.h" #include "llvm/Support/MathExtras.h" -#include namespace llvm { diff --git a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h index b12ae2eade9993..17ca57458b4938 100644 --- a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h +++ b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h @@ -9,12 +9,11 @@ #ifndef LLVM_LIB_TARGET_RISCV_MATINT_H #define LLVM_LIB_TARGET_RISCV_MATINT_H -#include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/MachineValueType.h" #include namespace llvm { +class APInt; namespace RISCVMatInt { struct Inst { diff --git a/llvm/lib/Target/Sparc/LeonPasses.cpp b/llvm/lib/Target/Sparc/LeonPasses.cpp index e9d3aaeb9cfe23..6ad6940c6b51b2 100644 --- a/llvm/lib/Target/Sparc/LeonPasses.cpp +++ b/llvm/lib/Target/Sparc/LeonPasses.cpp @@ -10,14 +10,13 @@ //===----------------------------------------------------------------------===// #include "LeonPasses.h" -#include "llvm/CodeGen/ISDOpcodes.h" +#include "SparcSubtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; LEONMachineFunctionPass::LEONMachineFunctionPass(char &ID) diff --git a/llvm/lib/Target/Sparc/LeonPasses.h b/llvm/lib/Target/Sparc/LeonPasses.h index b165bc93780f65..9bc4569a12984a 100644 --- a/llvm/lib/Target/Sparc/LeonPasses.h +++ b/llvm/lib/Target/Sparc/LeonPasses.h @@ -12,14 +12,11 @@ #ifndef LLVM_LIB_TARGET_SPARC_LEON_PASSES_H #define LLVM_LIB_TARGET_SPARC_LEON_PASSES_H -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" - -#include "Sparc.h" -#include "SparcSubtarget.h" namespace llvm { +class SparcSubtarget; + class LLVM_LIBRARY_VISIBILITY LEONMachineFunctionPass : public MachineFunctionPass { protected: @@ -33,13 +30,11 @@ class LLVM_LIBRARY_VISIBILITY LEONMachineFunctionPass protected: LEONMachineFunctionPass(char &ID); - int GetRegIndexForOperand(MachineInstr &MI, int OperandIndex); void clearUsedRegisterList() { UsedRegisters.clear(); } void markRegisterUsed(int registerIndex) { UsedRegisters.push_back(registerIndex); } - int getUnusedFPRegister(MachineRegisterInfo &MRI); }; class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 02330a2dd4afae..d5ee4b3b9440e5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -178,6 +178,28 @@ getLatestInsertPos(MachineBasicBlock *MBB, return InsertPos; } +// Find a catch instruction and its destination register within an EH pad. +static MachineInstr *findCatch(MachineBasicBlock *EHPad, Register &ExnReg) { + assert(EHPad->isEHPad()); + MachineInstr *Catch = nullptr; + for (auto &MI : *EHPad) { + switch (MI.getOpcode()) { + case WebAssembly::CATCH: + Catch = &MI; + ExnReg = Catch->getOperand(0).getReg(); + break; + } + } + assert(Catch && "EH pad does not have a catch"); + assert(ExnReg != 0 && "Invalid register"); + return Catch; +} + +static MachineInstr *findCatch(MachineBasicBlock *EHPad) { + Register Dummy; + return findCatch(EHPad, Dummy); +} + void WebAssemblyCFGStackify::registerScope(MachineInstr *Begin, MachineInstr *End) { BeginToEnd[Begin] = End; @@ -1101,25 +1123,8 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { continue; MachineBasicBlock *EHPad = P.first; - - // Find 'catch' and 'local.set' or 'drop' instruction that follows the - // 'catch'. If -wasm-disable-explicit-locals is not set, 'catch' should be - // always followed by either 'local.set' or a 'drop', because 'br_on_exn' is - // generated after 'catch' in LateEHPrepare and we don't support blocks - // taking values yet. - MachineInstr *Catch = nullptr; - unsigned ExnReg = 0; - for (auto &MI : *EHPad) { - switch (MI.getOpcode()) { - case WebAssembly::CATCH: - Catch = &MI; - ExnReg = Catch->getOperand(0).getReg(); - break; - } - } - assert(Catch && "EH pad does not have a catch"); - assert(ExnReg != 0 && "Invalid register"); - + Register ExnReg = 0; + MachineInstr *Catch = findCatch(EHPad, ExnReg); auto SplitPos = std::next(Catch->getIterator()); // Create a new BB that's gonna be the destination for branches from the @@ -1371,22 +1376,41 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) { : WebAssembly::BlockType( WebAssembly::toValType(MFI.getResults().front())); - for (MachineBasicBlock &MBB : reverse(MF)) { - for (MachineInstr &MI : reverse(MBB)) { + SmallVector Worklist; + Worklist.push_back(MF.rbegin()->rbegin()); + + auto Process = [&](MachineBasicBlock::reverse_iterator It) { + auto *MBB = It->getParent(); + while (It != MBB->rend()) { + MachineInstr &MI = *It++; if (MI.isPosition() || MI.isDebugInstr()) continue; switch (MI.getOpcode()) { + case WebAssembly::END_TRY: { + // If a 'try''s return type is fixed, both its try body and catch body + // should satisfy the return type, so we need to search 'end' + // instructions before its corresponding 'catch' too. + auto *EHPad = TryToEHPad.lookup(EndToBegin[&MI]); + assert(EHPad); + Worklist.push_back(std::next(findCatch(EHPad)->getReverseIterator())); + LLVM_FALLTHROUGH; + } case WebAssembly::END_BLOCK: case WebAssembly::END_LOOP: - case WebAssembly::END_TRY: EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType)); continue; default: - // Something other than an `end`. We're done. + // Something other than an `end`. We're done for this BB. return; } } - } + // We've reached the beginning of a BB. Continue the search in the previous + // BB. + Worklist.push_back(MBB->getPrevNode()->rbegin()); + }; + + while (!Worklist.empty()) + Process(Worklist.pop_back_val()); } // WebAssembly functions end with an end instruction, as if the function body diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp index 0286482ac9af8e..8342cad45dfd0f 100644 --- a/llvm/lib/Target/X86/X86CallLowering.cpp +++ b/llvm/lib/Target/X86/X86CallLowering.cpp @@ -148,9 +148,9 @@ struct X86OutgoingValueHandler : public CallLowering::IncomingValueHandler { MachineFunction &MF = MIRBuilder.getMF(); Register ExtReg = extendRegister(ValVReg, VA); - auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, - VA.getLocVT().getStoreSize(), - inferAlignFromPtrInfo(MF, MPO)); + auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, + VA.getLocVT().getStoreSize(), + inferAlignFromPtrInfo(MF, MPO)); MIRBuilder.buildStore(ExtReg, Addr, *MMO); } @@ -194,7 +194,7 @@ bool X86CallLowering::lowerReturn( MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); - auto &DL = MF.getDataLayout(); + const DataLayout &DL = MF.getDataLayout(); LLVMContext &Ctx = Val->getType()->getContext(); const X86TargetLowering &TLI = *getTLI(); @@ -245,7 +245,7 @@ struct X86IncomingValueHandler : public CallLowering::IncomingValueHandler { void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) override { MachineFunction &MF = MIRBuilder.getMF(); - auto MMO = MF.getMachineMemOperand( + auto *MMO = MF.getMachineMemOperand( MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, inferAlignFromPtrInfo(MF, MPO)); MIRBuilder.buildLoad(ValVReg, Addr, *MMO); @@ -337,8 +337,7 @@ bool X86CallLowering::lowerFormalArguments( SmallVector SplitArgs; unsigned Idx = 0; - for (auto &Arg : F.args()) { - + for (const auto &Arg : F.args()) { // TODO: handle not simple cases. if (Arg.hasAttribute(Attribute::ByVal) || Arg.hasAttribute(Attribute::InReg) || @@ -377,10 +376,10 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); - auto &DL = F.getParent()->getDataLayout(); + const DataLayout &DL = F.getParent()->getDataLayout(); const X86Subtarget &STI = MF.getSubtarget(); const TargetInstrInfo &TII = *STI.getInstrInfo(); - auto TRI = STI.getRegisterInfo(); + const X86RegisterInfo *TRI = STI.getRegisterInfo(); // Handle only Linux C, X86_64_SysV calling conventions for now. if (!STI.isTargetLinux() || !(Info.CallConv == CallingConv::C || diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index 488ee51f1d89bb..3a0d6a52ef4636 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -141,7 +141,7 @@ class InstrReplacer : public InstrConverterBase { return false; // It's illegal to replace an instruction that implicitly defines a register // with an instruction that doesn't, unless that register dead. - for (auto &MO : MI->implicit_operands()) + for (const auto &MO : MI->implicit_operands()) if (MO.isReg() && MO.isDef() && !MO.isDead() && !TII->get(DstOpcode).hasImplicitDefOfPhysReg(MO.getReg())) return false; @@ -180,7 +180,7 @@ class InstrReplacerDstCOPY : public InstrConverterBase { MachineRegisterInfo *MRI) const override { assert(isLegal(MI, TII) && "Cannot convert instruction"); MachineBasicBlock *MBB = MI->getParent(); - auto &DL = MI->getDebugLoc(); + const DebugLoc &DL = MI->getDebugLoc(); Register Reg = MRI->createVirtualRegister( TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(), @@ -237,7 +237,7 @@ class InstrCOPYReplacer : public InstrReplacer { MachineRegisterInfo *MRI) const override { assert(MI->getOpcode() == TargetOpcode::COPY && "Expected a COPY"); - for (auto &MO : MI->operands()) { + for (const auto &MO : MI->operands()) { // Physical registers will not be converted. Assume that converting the // COPY to the destination domain will eventually result in a actual // instruction. @@ -517,7 +517,7 @@ void X86DomainReassignment::reassign(const Closure &C, RegDomain Domain) const { } } - for (auto MI : ToErase) + for (auto *MI : ToErase) MI->eraseFromParent(); } @@ -537,7 +537,7 @@ static bool usedAsAddr(const MachineInstr &MI, unsigned Reg, for (unsigned MemOpIdx = MemOpStart, MemOpEnd = MemOpStart + X86::AddrNumOperands; MemOpIdx < MemOpEnd; ++MemOpIdx) { - auto &Op = MI.getOperand(MemOpIdx); + const MachineOperand &Op = MI.getOperand(MemOpIdx); if (Op.isReg() && Op.getReg() == Reg) return true; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1395db57b57a0d..1cd928c1de1204 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -193,8 +193,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (Subtarget.hasCMov()) { setOperationAction(ISD::ABS , MVT::i16 , Custom); setOperationAction(ISD::ABS , MVT::i32 , Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::ABS , MVT::i64 , Custom); } - setOperationAction(ISD::ABS , MVT::i64 , Custom); // Funnel shifts. for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { @@ -20344,7 +20345,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, *DAG.getContext(), TheVT); SDValue Cmp; if (IsStrict) { - Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETLT, + Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETLT, SDNodeFlags(), Chain, /*IsSignaling*/ true); Chain = Cmp.getValue(1); } else { @@ -29718,28 +29719,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Res); return; } - case ISD::ABS: { - assert(N->getValueType(0) == MVT::i64 && - "Unexpected type (!= i64) on ABS."); - MVT HalfT = MVT::i32; - SDValue Lo, Hi, Tmp; - SDVTList VTList = DAG.getVTList(HalfT, MVT::i1); - - Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), - DAG.getConstant(0, dl, HalfT)); - Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), - DAG.getConstant(1, dl, HalfT)); - Tmp = DAG.getNode( - ISD::SRA, dl, HalfT, Hi, - DAG.getShiftAmountConstant(HalfT.getSizeInBits() - 1, HalfT, dl)); - Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo); - Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi, - SDValue(Lo.getNode(), 1)); - Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi); - Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo); - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi)); - return; - } // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32. case X86ISD::FMINC: case X86ISD::FMIN: diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index b2677b4572e47c..acb14b11aba9ef 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -625,7 +625,22 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, // We use a pointer use visitor to discover if there are any writes into an // alloca that dominates CoroBegin. If that is the case, insertSpills will copy // the value from the alloca into the coroutine frame spill slot corresponding -// to that alloca. +// to that alloca. We also collect any alias pointing to the alloca created +// before CoroBegin but used after CoroBegin. These alias will be recreated +// after CoroBegin from the frame address so that latter references are +// pointing to the frame instead of the stack. +// Note: We are repurposing PtrUseVisitor's isEscaped() to mean whether the +// pointer is potentially written into. +// TODO: If the pointer is really escaped, we are in big trouble because we +// will be escaping a pointer to a stack address that would no longer exist +// soon. However most escape analysis isn't good enough to precisely tell, +// so we are assuming that if a pointer is escaped that it's written into. +// TODO: Another potential issue is if we are creating an alias through +// a function call, e.g: +// %a = AllocaInst ... +// %b = call @computeAddress(... %a) +// If %b is an alias of %a and will be used after CoroBegin, this will be broken +// and there is nothing we can do about it. namespace { struct AllocaUseVisitor : PtrUseVisitor { using Base = PtrUseVisitor; @@ -633,49 +648,83 @@ struct AllocaUseVisitor : PtrUseVisitor { const CoroBeginInst &CB) : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {} - // We are only interested in uses that dominate coro.begin. + // We are only interested in uses that's not dominated by coro.begin. void visit(Instruction &I) { - if (DT.dominates(&I, &CoroBegin)) + if (!DT.dominates(&CoroBegin, &I)) Base::visit(I); } // We need to provide this overload as PtrUseVisitor uses a pointer based // visiting function. void visit(Instruction *I) { return visit(*I); } - void visitLoadInst(LoadInst &) {} // Good. Nothing to do. + // We cannot handle PHI node and SelectInst because they could be selecting + // between two addresses that point to different Allocas. + void visitPHINode(PHINode &I) { + assert(!usedAfterCoroBegin(I) && + "Unable to handle PHI node of aliases created before CoroBegin but " + "used after CoroBegin"); + } + + void visitSelectInst(SelectInst &I) { + assert(!usedAfterCoroBegin(I) && + "Unable to handle Select of aliases created before CoroBegin but " + "used after CoroBegin"); + } + + void visitLoadInst(LoadInst &) {} // If the use is an operand, the pointer escaped and anything can write into // that memory. If the use is the pointer, we are definitely writing into the // alloca and therefore we need to copy. - void visitStoreInst(StoreInst &SI) { PI.setAborted(&SI); } + void visitStoreInst(StoreInst &SI) { PI.setEscaped(&SI); } - // Any other instruction that is not filtered out by PtrUseVisitor, will - // result in the copy. - void visitInstruction(Instruction &I) { PI.setAborted(&I); } + // All mem intrinsics modify the data. + void visitMemIntrinsic(MemIntrinsic &MI) { PI.setEscaped(&MI); } + + void visitBitCastInst(BitCastInst &BC) { + Base::visitBitCastInst(BC); + handleAlias(BC); + } + + void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { + Base::visitAddrSpaceCastInst(ASC); + handleAlias(ASC); + } + + void visitGetElementPtrInst(GetElementPtrInst &GEPI) { + // The base visitor will adjust Offset accordingly. + Base::visitGetElementPtrInst(GEPI); + handleAlias(GEPI); + } + + const SmallVector, 1> &getAliases() const { + return Aliases; + } private: const DominatorTree &DT; const CoroBeginInst &CoroBegin; + // All alias to the original AllocaInst, and are used after CoroBegin. + // Each entry contains the instruction and the offset in the original Alloca. + SmallVector, 1> Aliases{}; + + bool usedAfterCoroBegin(Instruction &I) { + for (auto &U : I.uses()) + if (DT.dominates(&CoroBegin, U)) + return true; + return false; + } + + void handleAlias(Instruction &I) { + if (!usedAfterCoroBegin(I)) + return; + + assert(IsOffsetKnown && "Can only handle alias with known offset created " + "before CoroBegin and used after"); + Aliases.emplace_back(&I, Offset); + } }; } // namespace -static bool mightWriteIntoAllocaPtr(AllocaInst &A, const DominatorTree &DT, - const CoroBeginInst &CB) { - const DataLayout &DL = A.getModule()->getDataLayout(); - AllocaUseVisitor Visitor(DL, DT, CB); - auto PtrI = Visitor.visitPtr(A); - if (PtrI.isEscaped() || PtrI.isAborted()) { - auto *PointerEscapingInstr = PtrI.getEscapingInst() - ? PtrI.getEscapingInst() - : PtrI.getAbortingInst(); - if (PointerEscapingInstr) { - LLVM_DEBUG( - dbgs() << "AllocaInst copy was triggered by instruction: " - << *PointerEscapingInstr << "\n"); - } - return true; - } - return false; -} // We need to make room to insert a spill after initial PHIs, but before // catchswitch instruction. Placing it before violates the requirement that @@ -955,7 +1004,11 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { for (auto &P : Allocas) { AllocaInst *const A = P.first; - if (mightWriteIntoAllocaPtr(*A, DT, *CB)) { + AllocaUseVisitor Visitor(A->getModule()->getDataLayout(), DT, *CB); + auto PtrI = Visitor.visitPtr(*A); + assert(!PtrI.isAborted()); + if (PtrI.isEscaped()) { + // isEscaped really means potentially modified before CoroBegin. if (A->isArrayAllocation()) report_fatal_error( "Coroutines cannot handle copying of array allocas yet"); @@ -964,6 +1017,20 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { auto *Value = Builder.CreateLoad(A->getAllocatedType(), A); Builder.CreateStore(Value, G); } + // For each alias to Alloca created before CoroBegin but used after + // CoroBegin, we recreate them after CoroBegin by appplying the offset + // to the pointer in the frame. + for (const auto &Alias : Visitor.getAliases()) { + auto *FramePtr = GetFramePointer(P.second, A); + auto *FramePtrRaw = + Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C)); + auto *AliasPtr = Builder.CreateGEP( + FramePtrRaw, ConstantInt::get(Type::getInt64Ty(C), Alias.second)); + auto *AliasPtrTyped = + Builder.CreateBitCast(AliasPtr, Alias.first->getType()); + Alias.first->replaceUsesWithIf( + AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); }); + } } } return FramePtr; diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index ea285b51982c13..9927bca9955524 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -73,6 +73,14 @@ static cl::opt MaxFixpointIterations("attributor-max-iterations", cl::Hidden, cl::desc("Maximal number of fixpoint iterations."), cl::init(32)); + +static cl::opt MaxInitializationChainLengthX( + "attributor-max-initialization-chain-length", cl::Hidden, + cl::desc( + "Maximal number of chained initializations (to avoid stack overflows)"), + cl::location(MaxInitializationChainLength), cl::init(1024)); +unsigned llvm::MaxInitializationChainLength; + static cl::opt VerifyMaxFixpointIterations( "attributor-max-iterations-verify", cl::Hidden, cl::desc("Verify that max-iterations is a tight bound for a fixpoint"), @@ -132,11 +140,11 @@ static cl::opt PrintDependencies("attributor-print-dep", cl::Hidden, /// Logic operators for the change status enum class. /// ///{ -ChangeStatus llvm::operator|(ChangeStatus l, ChangeStatus r) { - return l == ChangeStatus::CHANGED ? l : r; +ChangeStatus llvm::operator|(ChangeStatus L, ChangeStatus R) { + return L == ChangeStatus::CHANGED ? L : R; } -ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) { - return l == ChangeStatus::UNCHANGED ? l : r; +ChangeStatus llvm::operator&(ChangeStatus L, ChangeStatus R) { + return L == ChangeStatus::UNCHANGED ? L : R; } ///} @@ -189,7 +197,7 @@ Argument *IRPosition::getAssociatedArgument() const { // Not an Argument and no argument number means this is not a call site // argument, thus we cannot find a callback argument to return. - int ArgNo = getArgNo(); + int ArgNo = getCallSiteArgNo(); if (ArgNo < 0) return nullptr; @@ -317,6 +325,13 @@ const IRPosition SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { IRPositions.emplace_back(IRP); + // Helper to determine if operand bundles on a call site are benin or + // potentially problematic. We handle only llvm.assume for now. + auto CanIgnoreOperandBundles = [](const CallBase &CB) { + return (isa(CB) && + cast(CB).getIntrinsicID() == Intrinsic ::assume); + }; + const auto *CB = dyn_cast(&IRP.getAnchorValue()); switch (IRP.getPositionKind()) { case IRPosition::IRP_INVALID: @@ -331,7 +346,7 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { assert(CB && "Expected call site!"); // TODO: We need to look at the operand bundles similar to the redirection // in CallBase. - if (!CB->hasOperandBundles()) + if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) if (const Function *Callee = CB->getCalledFunction()) IRPositions.emplace_back(IRPosition::function(*Callee)); return; @@ -339,7 +354,7 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { assert(CB && "Expected call site!"); // TODO: We need to look at the operand bundles similar to the redirection // in CallBase. - if (!CB->hasOperandBundles()) { + if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) { if (const Function *Callee = CB->getCalledFunction()) { IRPositions.emplace_back(IRPosition::returned(*Callee)); IRPositions.emplace_back(IRPosition::function(*Callee)); @@ -356,17 +371,17 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { IRPositions.emplace_back(IRPosition::callsite_function(*CB)); return; case IRPosition::IRP_CALL_SITE_ARGUMENT: { - int ArgNo = IRP.getArgNo(); - assert(CB && ArgNo >= 0 && "Expected call site!"); + assert(CB && "Expected call site!"); // TODO: We need to look at the operand bundles similar to the redirection // in CallBase. - if (!CB->hasOperandBundles()) { + if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) { const Function *Callee = CB->getCalledFunction(); - if (Callee && Callee->arg_size() > unsigned(ArgNo)) - IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo))); - if (Callee) + if (Callee) { + if (Argument *Arg = IRP.getAssociatedArgument()) + IRPositions.emplace_back(IRPosition::argument(*Arg)); IRPositions.emplace_back(IRPosition::function(*Callee)); } + } IRPositions.emplace_back(IRPosition::value(IRP.getAssociatedValue())); return; } @@ -503,7 +518,7 @@ void IRPosition::verify() { "Expected call base argument operand for a 'call site argument' " "position"); assert(cast(U->getUser())->getArgOperandNo(U) == - unsigned(getArgNo()) && + unsigned(getCallSiteArgNo()) && "Argument number mismatch!"); assert(U->get() == &getAssociatedValue() && "Associated value mismatch!"); return; @@ -1306,9 +1321,27 @@ ChangeStatus Attributor::cleanupIR() { CGUpdater.removeFunction(*Fn); } + if (!ToBeChangedUses.empty()) + ManifestChange = ChangeStatus::CHANGED; + + if (!ToBeChangedToUnreachableInsts.empty()) + ManifestChange = ChangeStatus::CHANGED; + if (!ToBeDeletedFunctions.empty()) ManifestChange = ChangeStatus::CHANGED; + if (!ToBeDeletedBlocks.empty()) + ManifestChange = ChangeStatus::CHANGED; + + if (!ToBeDeletedInsts.empty()) + ManifestChange = ChangeStatus::CHANGED; + + if (!InvokeWithDeadSuccessor.empty()) + ManifestChange = ChangeStatus::CHANGED; + + if (!DeadInsts.empty()) + ManifestChange = ChangeStatus::CHANGED; + NumFnDeleted += ToBeDeletedFunctions.size(); LLVM_DEBUG(dbgs() << "[Attributor] Deleted " << NumFnDeleted @@ -1431,7 +1464,7 @@ static void createShallowWrapper(Function &F) { BasicBlock *EntryBB = BasicBlock::Create(Ctx, "entry", Wrapper); SmallVector Args; - auto FArgIt = F.arg_begin(); + Argument *FArgIt = F.arg_begin(); for (Argument &Arg : Wrapper->args()) { Args.push_back(&Arg); Arg.setName((FArgIt++)->getName()); @@ -1463,9 +1496,8 @@ static Function *internalizeFunction(Function &F) { FunctionType *FnTy = F.getFunctionType(); // create a copy of the current function - Function *Copied = - Function::Create(FnTy, GlobalValue::PrivateLinkage, F.getAddressSpace(), - F.getName() + ".internalized"); + Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(), + F.getName() + ".internalized"); ValueToValueMapTy VMap; auto *NewFArgIt = Copied->arg_begin(); for (auto &Arg : F.args()) { @@ -1478,6 +1510,11 @@ static Function *internalizeFunction(Function &F) { // Copy the body of the original function to the new one CloneFunctionInto(Copied, &F, VMap, /* ModuleLevelChanges */ false, Returns); + // Set the linakage and visibility late as CloneFunctionInto has some implicit + // requirements. + Copied->setVisibility(GlobalValue::DefaultVisibility); + Copied->setLinkage(GlobalValue::PrivateLinkage); + // Copy metadata SmallVector, 1> MDs; F.getAllMetadata(MDs); @@ -1755,8 +1792,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures( assert(Success && "Assumed call site replacement to succeed!"); // Rewire the arguments. - auto OldFnArgIt = OldFn->arg_begin(); - auto NewFnArgIt = NewFn->arg_begin(); + Argument *OldFnArgIt = OldFn->arg_begin(); + Argument *NewFnArgIt = NewFn->arg_begin(); for (unsigned OldArgNum = 0; OldArgNum < ARIs.size(); ++OldArgNum, ++OldFnArgIt) { if (const std::unique_ptr &ARI = @@ -2152,7 +2189,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, IRPosition::Kind AP) { raw_ostream &llvm::operator<<(raw_ostream &OS, const IRPosition &Pos) { const Value &AV = Pos.getAssociatedValue(); return OS << "{" << Pos.getPositionKind() << ":" << AV.getName() << " [" - << Pos.getAnchorValue().getName() << "@" << Pos.getArgNo() << "]}"; + << Pos.getAnchorValue().getName() << "@" << Pos.getCallSiteArgNo() + << "]}"; } raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerRangeState &S) { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index b76e83def6e803..7bec9705970385 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -500,7 +500,7 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA, Optional T; // The argument number which is also the call site argument number. - unsigned ArgNo = QueryingAA.getIRPosition().getArgNo(); + unsigned ArgNo = QueryingAA.getIRPosition().getCallSiteArgNo(); auto CallSiteCheck = [&](AbstractCallSite ACS) { const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo); @@ -736,7 +736,7 @@ struct AANoUnwindCallSite final : AANoUnwindImpl { void initialize(Attributor &A) override { AANoUnwindImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -795,7 +795,7 @@ class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState { ReturnedValues.clear(); Function *F = getAssociatedFunction(); - if (!F) { + if (!F || F->isDeclaration()) { indicatePessimisticFixpoint(); return; } @@ -1141,11 +1141,13 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { RVState RVS({NewRVsMap, Unused, RetValAAIt.second}); VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS, CB); continue; - } else if (isa(RetVal)) { + } + if (isa(RetVal)) { // Call sites are resolved by the callee attribute over time, no need to // do anything for us. continue; - } else if (isa(RetVal)) { + } + if (isa(RetVal)) { // Constants are valid everywhere, we can simply take them. NewRVsMap[RetVal].insert(RIs.begin(), RIs.end()); continue; @@ -1386,7 +1388,7 @@ struct AANoSyncCallSite final : AANoSyncImpl { void initialize(Attributor &A) override { AANoSyncImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -1451,7 +1453,7 @@ struct AANoFreeCallSite final : AANoFreeImpl { void initialize(Attributor &A) override { AANoFreeImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -1898,7 +1900,7 @@ struct AANoRecurseCallSite final : AANoRecurseImpl { void initialize(Attributor &A) override { AANoRecurseImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -2274,7 +2276,7 @@ struct AAWillReturnImpl : public AAWillReturn { AAWillReturn::initialize(A); Function *F = getAnchorScope(); - if (!F || !A.isFunctionIPOAmendable(*F) || mayContainUnboundedCycle(*F, A)) + if (!F || F->isDeclaration() || mayContainUnboundedCycle(*F, A)) indicatePessimisticFixpoint(); } @@ -2318,9 +2320,9 @@ struct AAWillReturnCallSite final : AAWillReturnImpl { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - AAWillReturnImpl::initialize(A); + AAWillReturn::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || !A.isFunctionIPOAmendable(*F)) indicatePessimisticFixpoint(); } @@ -2493,7 +2495,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { void initialize(Attributor &A) override { // See callsite argument attribute and callee argument attribute. const auto &CB = cast(getAnchorValue()); - if (CB.paramHasAttr(getArgNo(), Attribute::NoAlias)) + if (CB.paramHasAttr(getCallSiteArgNo(), Attribute::NoAlias)) indicateOptimisticFixpoint(); Value &Val = getAssociatedValue(); if (isa(Val) && @@ -2508,7 +2510,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { const AAMemoryBehavior &MemBehaviorAA, const CallBase &CB, unsigned OtherArgNo) { // We do not need to worry about aliasing with the underlying IRP. - if (this->getArgNo() == (int)OtherArgNo) + if (this->getCalleeArgNo() == (int)OtherArgNo) return false; // If it is not a pointer or pointer vector we do not alias. @@ -2673,6 +2675,14 @@ struct AANoAliasReturned final : AANoAliasImpl { AANoAliasReturned(const IRPosition &IRP, Attributor &A) : AANoAliasImpl(IRP, A) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoAliasImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || F->isDeclaration()) + indicatePessimisticFixpoint(); + } + /// See AbstractAttribute::updateImpl(...). virtual ChangeStatus updateImpl(Attributor &A) override { @@ -2714,7 +2724,7 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl { void initialize(Attributor &A) override { AANoAliasImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -2923,7 +2933,7 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { CallBase &CB = cast(getAnchorValue()); - Use &U = CB.getArgOperandUse(getArgNo()); + Use &U = CB.getArgOperandUse(getCallSiteArgNo()); assert(!isa(U.get()) && "Expected undef values to be filtered out!"); UndefValue &UV = *UndefValue::get(U->getType()); @@ -3863,8 +3873,16 @@ struct AAAlignFloating : AAAlignImpl { /// Align attribute for function return value. struct AAAlignReturned final : AAReturnedFromReturnedValues { - AAAlignReturned(const IRPosition &IRP, Attributor &A) - : AAReturnedFromReturnedValues(IRP, A) {} + using Base = AAReturnedFromReturnedValues; + AAAlignReturned(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Base::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || F->isDeclaration()) + indicatePessimisticFixpoint(); + } /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) } @@ -3938,7 +3956,7 @@ struct AAAlignCallSiteReturned final void initialize(Attributor &A) override { Base::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -3954,7 +3972,7 @@ struct AANoReturnImpl : public AANoReturn { void initialize(Attributor &A) override { AANoReturn::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -4028,7 +4046,7 @@ struct AANoCaptureImpl : public AANoCapture { return; } - const Function *F = getArgNo() >= 0 ? getAssociatedFunction() : AnchorScope; + const Function *F = isArgumentPosition() ? getAssociatedFunction() : AnchorScope; // Check what state the associated function can actually capture. if (F) @@ -4047,7 +4065,7 @@ struct AANoCaptureImpl : public AANoCapture { if (!isAssumedNoCaptureMaybeReturned()) return; - if (getArgNo() >= 0) { + if (isArgumentPosition()) { if (isAssumedNoCapture()) Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture)); else if (ManifestInternal) @@ -4083,7 +4101,7 @@ struct AANoCaptureImpl : public AANoCapture { State.addKnownBits(NOT_CAPTURED_IN_RET); // Check existing "returned" attributes. - int ArgNo = IRP.getArgNo(); + int ArgNo = IRP.getCalleeArgNo(); if (F.doesNotThrow() && ArgNo >= 0) { for (unsigned u = 0, e = F.arg_size(); u < e; ++u) if (F.hasParamAttribute(u, Attribute::Returned)) { @@ -4260,12 +4278,12 @@ struct AACaptureUseTracker final : public CaptureTracker { ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { const IRPosition &IRP = getIRPosition(); const Value *V = - getArgNo() >= 0 ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue(); + isArgumentPosition() ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue(); if (!V) return indicatePessimisticFixpoint(); const Function *F = - getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); + isArgumentPosition() ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); assert(F && "Expected a function!"); const IRPosition &FnPos = IRPosition::function(*F); const auto &IsDeadAA = @@ -4611,7 +4629,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { auto PredForCallSite = [&](AbstractCallSite ACS) { const IRPosition &ACSArgPos = - IRPosition::callsite_argument(ACS, getArgNo()); + IRPosition::callsite_argument(ACS, getCallSiteArgNo()); // Check if a coresponding argument was found or if it is on not // associated (which can happen for callback calls). if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) @@ -4892,7 +4910,8 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating { ? dyn_cast(SimplifiedAssociatedValue.getValue()) : UndefValue::get(V.getType()); if (C) { - Use &U = cast(&getAnchorValue())->getArgOperandUse(getArgNo()); + Use &U = cast(&getAnchorValue()) + ->getArgOperandUse(getCallSiteArgNo()); // We can replace the AssociatedValue with the constant. if (&V != C && V.getType() == C->getType()) { if (A.changeUseAfterManifest(U, *C)) @@ -5211,7 +5230,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { return getAssociatedValue().getType()->getPointerElementType(); Optional Ty; - unsigned ArgNo = getIRPosition().getArgNo(); + unsigned ArgNo = getIRPosition().getCallSiteArgNo(); // Make sure the associated call site argument has the same type at all call // sites and it is an allocation we know is safe to privatize, for now that @@ -5747,7 +5766,7 @@ struct AAMemoryBehaviorImpl : public AAMemoryBehavior { void initialize(Attributor &A) override { intersectAssumedBits(BEST_STATE); getKnownStateFromValue(getIRPosition(), getState()); - IRAttribute::initialize(A); + AAMemoryBehavior::initialize(A); } /// Return the memory behavior information encoded in the IR for \p IRP. @@ -5933,14 +5952,21 @@ struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - if (Argument *Arg = getAssociatedArgument()) { - if (Arg->hasByValAttr()) { - addKnownBits(NO_WRITES); - removeKnownBits(NO_READS); - removeAssumedBits(NO_READS); - } + // If we don't have an associated attribute this is either a variadic call + // or an indirect call, either way, nothing to do here. + Argument *Arg = getAssociatedArgument(); + if (!Arg) { + indicatePessimisticFixpoint(); + return; + } + if (Arg->hasByValAttr()) { + addKnownBits(NO_WRITES); + removeKnownBits(NO_READS); + removeAssumedBits(NO_READS); } AAMemoryBehaviorArgument::initialize(A); + if (getAssociatedFunction()->isDeclaration()) + indicatePessimisticFixpoint(); } /// See AbstractAttribute::updateImpl(...). @@ -5971,6 +5997,14 @@ struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating { AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP, Attributor &A) : AAMemoryBehaviorFloating(IRP, A) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || F->isDeclaration()) + indicatePessimisticFixpoint(); + } + /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { // We do not annotate returned values. @@ -6020,10 +6054,8 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl { void initialize(Attributor &A) override { AAMemoryBehaviorImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F || !A.isFunctionIPOAmendable(*F)) { + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); - return; - } } /// See AbstractAttribute::updateImpl(...). @@ -6300,7 +6332,7 @@ struct AAMemoryLocationImpl : public AAMemoryLocation { void initialize(Attributor &A) override { intersectAssumedBits(BEST_STATE); getKnownStateFromValue(A, getIRPosition(), getState()); - IRAttribute::initialize(A); + AAMemoryLocation::initialize(A); } /// Return the memory behavior information encoded in the IR for \p IRP. @@ -6763,10 +6795,8 @@ struct AAMemoryLocationCallSite final : AAMemoryLocationImpl { void initialize(Attributor &A) override { AAMemoryLocationImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F || !A.isFunctionIPOAmendable(*F)) { + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); - return; - } } /// See AbstractAttribute::updateImpl(...). diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 326d1ab28b60a2..caa9a98ecb0749 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -784,10 +784,13 @@ void PassManagerBuilder::populateModulePassManager( // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. + // FIXME: study whether hoisting and/or sinking of common instructions should + // be delayed until after SLP vectorizer. MPM.add(createCFGSimplificationPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) + .hoistCommonInsts(true) .sinkCommonInsts(true))); if (SLPVectorize) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 5cf6eb2a885a60..5ce32bc592d052 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1615,43 +1615,27 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, // this. bool Swapped = false; GEPOperator *GEP1 = nullptr, *GEP2 = nullptr; + if (!isa(LHS) && isa(RHS)) { + std::swap(LHS, RHS); + Swapped = true; + } - // For now we require one side to be the base pointer "A" or a constant - // GEP derived from it. - if (GEPOperator *LHSGEP = dyn_cast(LHS)) { + // Require at least one GEP with a common base pointer on both sides. + if (auto *LHSGEP = dyn_cast(LHS)) { // (gep X, ...) - X if (LHSGEP->getOperand(0) == RHS) { GEP1 = LHSGEP; - Swapped = false; - } else if (GEPOperator *RHSGEP = dyn_cast(RHS)) { + } else if (auto *RHSGEP = dyn_cast(RHS)) { // (gep X, ...) - (gep X, ...) if (LHSGEP->getOperand(0)->stripPointerCasts() == - RHSGEP->getOperand(0)->stripPointerCasts()) { - GEP2 = RHSGEP; + RHSGEP->getOperand(0)->stripPointerCasts()) { GEP1 = LHSGEP; - Swapped = false; - } - } - } - - if (GEPOperator *RHSGEP = dyn_cast(RHS)) { - // X - (gep X, ...) - if (RHSGEP->getOperand(0) == LHS) { - GEP1 = RHSGEP; - Swapped = true; - } else if (GEPOperator *LHSGEP = dyn_cast(LHS)) { - // (gep X, ...) - (gep X, ...) - if (RHSGEP->getOperand(0)->stripPointerCasts() == - LHSGEP->getOperand(0)->stripPointerCasts()) { - GEP2 = LHSGEP; - GEP1 = RHSGEP; - Swapped = true; + GEP2 = RHSGEP; } } } if (!GEP1) - // No GEP found. return nullptr; if (GEP2) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 40f6e9e147d768..11c2367d1608e9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -657,6 +657,19 @@ InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) { return nullptr; } +static Optional getKnownSign(Value *Op, Instruction *CxtI, + const DataLayout &DL, AssumptionCache *AC, + DominatorTree *DT) { + KnownBits Known = computeKnownBits(Op, DL, 0, AC, CxtI, DT); + if (Known.isNonNegative()) + return false; + if (Known.isNegative()) + return true; + + return isImpliedByDomCondition( + ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL); +} + /// CallInst simplification. This mostly only handles folding of intrinsic /// instructions. For normal calls, it allows visitCallBase to do the heavy /// lifting. @@ -791,11 +804,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X)))) return replaceOperand(*II, 0, X); - if (Optional Imp = isImpliedByDomCondition( - ICmpInst::ICMP_SGE, IIOperand, - Constant::getNullValue(IIOperand->getType()), II, DL)) { + if (Optional Sign = getKnownSign(IIOperand, II, DL, &AC, &DT)) { // abs(x) -> x if x >= 0 - if (*Imp) + if (!*Sign) return replaceInstUsesWith(*II, IIOperand); // abs(x) -> -x if x < 0 diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 350d00095c6f19..74e9525e8ed46e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3090,9 +3090,10 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant( switch (II->getIntrinsicID()) { case Intrinsic::abs: // abs(A) == 0 -> A == 0 - if (C.isNullValue()) + // abs(A) == INT_MIN -> A == INT_MIN + if (C.isNullValue() || C.isMinSignedValue()) return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0), - Constant::getNullValue(Ty)); + ConstantInt::get(Ty, C)); break; case Intrinsic::bswap: @@ -3983,6 +3984,19 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, ConstantExpr::getNeg(RHSC)); } + { + // Try to remove shared constant multiplier from equality comparison: + // X * C == Y * C (with no overflowing/aliasing) --> X == Y + Value *X, *Y; + const APInt *C; + if (match(Op0, m_Mul(m_Value(X), m_APInt(C))) && *C != 0 && + match(Op1, m_Mul(m_Value(Y), m_SpecificInt(*C))) && I.isEquality()) + if (!C->countTrailingZeros() || + (BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap()) || + (BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap())) + return new ICmpInst(Pred, X, Y); + } + BinaryOperator *SRem = nullptr; // icmp (srem X, Y), Y if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1)) @@ -4059,10 +4073,6 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask); return new ICmpInst(Pred, And1, And2); } - // If there are no trailing zeros in the multiplier, just eliminate - // the multiplies (no masking is needed): - // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y - return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); } break; } diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 109e15d6d7cfc5..892ba559e7903c 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -114,9 +114,9 @@ static cl::opt cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 100)")); static cl::opt MemorySSAUpwardsStepLimit( - "dse-memoryssa-walklimit", cl::init(70), cl::Hidden, + "dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " - "MemoryDefs that may be killed (default = 70)")); + "MemoryDefs that may be killed (default = 90)")); static cl::opt MemorySSAPartialStoreLimit( "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, @@ -1901,6 +1901,18 @@ struct DSEState { return None; } + // Quick check if there are direct uses that are read-clobbers. + if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) { + if (auto *UseOrDef = dyn_cast(U.getUser())) + return !MSSA.dominates(StartAccess, UseOrDef) && + isReadClobber(DefLoc, UseOrDef->getMemoryInst()); + return false; + })) { + Cache.KnownReads.insert(Current); + LLVM_DEBUG(dbgs() << " ... found a read clobber\n"); + return None; + } + // If Current cannot be analyzed or is not removable, check the next // candidate. if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) { diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 51da10fc48790d..b655204d26dd24 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -1463,6 +1463,7 @@ class EarlyCSELegacyCommonPass : public FunctionPass { AU.addRequired(); AU.addRequired(); if (UseMemorySSA) { + AU.addRequired(); AU.addRequired(); AU.addPreserved(); } @@ -1504,6 +1505,7 @@ INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa", "Early CSE w/ MemorySSA", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index c71038d66f9956..036ca1d1054fee 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2850,7 +2850,6 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass { if (Impl.isMemDepEnabled()) AU.addRequired(); AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 51d12faf712ad8..20b85626dced94 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2329,36 +2329,6 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { return MadeAnyChanges; } -/// Return a symbolic upper bound for the backedge taken count of the loop. -/// This is more general than getConstantMaxBackedgeTakenCount as it returns -/// an arbitrary expression as opposed to only constants. -/// TODO: Move into the ScalarEvolution class. -static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE, - DominatorTree &DT, Loop *L) { - SmallVector ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - - // Form an expression for the maximum exit count possible for this loop. We - // merge the max and exact information to approximate a version of - // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. - SmallVector ExitCounts; - for (BasicBlock *ExitingBB : ExitingBlocks) { - const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); - if (isa(ExitCount)) - ExitCount = SE.getExitCount(L, ExitingBB, - ScalarEvolution::ConstantMaximum); - if (!isa(ExitCount)) { - assert(DT.dominates(ExitingBB, L->getLoopLatch()) && - "We should only have known counts for exiting blocks that " - "dominate latch!"); - ExitCounts.push_back(ExitCount); - } - } - if (ExitCounts.empty()) - return SE.getCouldNotCompute(); - return SE.getUMinFromMismatchedTypes(ExitCounts); -} - bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -2391,7 +2361,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { return false; // Get a symbolic upper bound on the loop backedge taken count. - const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); + const SCEV *MaxExitCount = SE->computeMaxBackedgeTakenCount(L); if (isa(MaxExitCount)) return false; diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index c3e46c1fadef31..47329fa1f043e2 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3834,10 +3834,14 @@ void LSRInstance::GenerateConstantOffsetsImpl( F.BaseOffset = (uint64_t)F.BaseOffset + Imm; if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) return; - if (IsScaledReg) + if (IsScaledReg) { F.ScaledReg = G; - else + } else { F.BaseRegs[Idx] = G; + // We may generate non canonical Formula if G is a recurrent expr reg + // related with current loop while F.ScaledReg is not. + F.canonicalize(*L); + } (void)InsertFormula(LU, LUIdx, F); } diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index 2afc778ed82141..33ab2907906e05 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -1350,6 +1350,25 @@ void SCCPSolver::handleCallResult(CallBase &CB) { return (void)mergeInValue(IV, &CB, CopyOfVal); } + + if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) { + // Compute result range for intrinsics supported by ConstantRange. + // Do this even if we don't know a range for all operands, as we may + // still know something about the result range, e.g. of abs(x). + SmallVector OpRanges; + for (Value *Op : II->args()) { + const ValueLatticeElement &State = getValueState(Op); + if (State.isConstantRange()) + OpRanges.push_back(State.getConstantRange()); + else + OpRanges.push_back( + ConstantRange::getFull(Op->getType()->getScalarSizeInBits())); + } + + ConstantRange Result = + ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges); + return (void)mergeInValue(II, ValueLatticeElement::getRange(Result)); + } } // The common case is that we aren't tracking the callee, either because we diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index db5211df397a87..b0435bf6e4eac6 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -63,8 +63,8 @@ static cl::opt UserForwardSwitchCond( cl::desc("Forward switch condition to phi ops (default = false)")); static cl::opt UserHoistCommonInsts( - "hoist-common-insts", cl::Hidden, cl::init(true), - cl::desc("hoist common instructions (default = true)")); + "hoist-common-insts", cl::Hidden, cl::init(false), + cl::desc("hoist common instructions (default = false)")); static cl::opt UserSinkCommonInsts( "sink-common-insts", cl::Hidden, cl::init(false), diff --git a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 9af39d9a0dd1c8..b124d0536254be 100644 --- a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -6,10 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This pass is used to ensure that functions have at most one return -// instruction in them. Additionally, it keeps track of which node is the new -// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode -// method will return a null pointer. +// This pass is used to ensure that functions have at most one return and one +// unreachable instruction in them. // //===----------------------------------------------------------------------===// @@ -61,12 +59,8 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { UnreachableBlocks.push_back(&I); // Then unreachable blocks. - if (UnreachableBlocks.empty()) { - UnreachableBlock = nullptr; - } else if (UnreachableBlocks.size() == 1) { - UnreachableBlock = UnreachableBlocks.front(); - } else { - UnreachableBlock = BasicBlock::Create(F.getContext(), + if (UnreachableBlocks.size() > 1) { + BasicBlock *UnreachableBlock = BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F); new UnreachableInst(F.getContext(), UnreachableBlock); @@ -76,14 +70,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { } } - // Now handle return blocks. - if (ReturningBlocks.empty()) { - ReturnBlock = nullptr; - return false; // No blocks return - } else if (ReturningBlocks.size() == 1) { - ReturnBlock = ReturningBlocks.front(); // Already has a single return block + // There is nothing more to do if we do not have multiple return blocks. + if (ReturningBlocks.size() <= 1) return false; - } // Otherwise, we need to insert a new basic block into the function, add a PHI // nodes (if the function returns values), and convert all of the return @@ -115,6 +104,5 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { BB->getInstList().pop_back(); // Remove the return insn BranchInst::Create(NewRetBlock, BB); } - ReturnBlock = NewRetBlock; return true; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 078b2ba1c70ac3..9c9e2ec8222d10 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -51,14 +51,12 @@ namespace llvm { class BasicBlock; class DominatorTree; class InnerLoopVectorizer; -template class InterleaveGroup; class LoopInfo; class raw_ostream; class RecurrenceDescriptor; class Value; class VPBasicBlock; class VPRegionBlock; -class VPSlotTracker; class VPlan; class VPlanSlp; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shl.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shl.mir new file mode 100644 index 00000000000000..fe75f9965bc908 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shl.mir @@ -0,0 +1,29 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s +--- +name: test_combine_shl_undef_x_s32 +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: test_combine_shl_undef_x_s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: $w0 = COPY [[C]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_IMPLICIT_DEF + %2:_(s32) = G_SHL %1(s32), %0(s32) + $w0 = COPY %2(s32) +... +--- +name: test_combine_shl_undef_x_v2s32 +body: | + bb.1: + liveins: $d0 + ; CHECK-LABEL: name: test_combine_shl_undef_x_v2s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = G_IMPLICIT_DEF + %2:_(<2 x s32>) = G_SHL %1(<2 x s32>), %0(<2 x s32>) + $d0 = COPY %2(<2 x s32>) +... diff --git a/llvm/test/CodeGen/AArch64/fp-const-fold.ll b/llvm/test/CodeGen/AArch64/fp-const-fold.ll index b282c8719ff637..dc3f71001d610b 100644 --- a/llvm/test/CodeGen/AArch64/fp-const-fold.ll +++ b/llvm/test/CodeGen/AArch64/fp-const-fold.ll @@ -161,49 +161,33 @@ define double @fmul_nnan_inf_op1(double %x) { ret double %r } -; TODO: Should simplify to undef - define double @fdiv_nnan_undef_op0(double %x) { ; CHECK-LABEL: fdiv_nnan_undef_op0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9221120237041090560 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = fdiv nnan double undef, %x ret double %r } -; TODO: Should simplify to undef - define double @fdiv_nnan_undef_op1(double %x) { ; CHECK-LABEL: fdiv_nnan_undef_op1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9221120237041090560 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = fdiv nnan double %x, undef ret double %r } -; TODO: Should simplify to undef - define double @fdiv_ninf_undef_op0(double %x) { ; CHECK-LABEL: fdiv_ninf_undef_op0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9221120237041090560 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = fdiv ninf double undef, %x ret double %r } -; TODO: Should simplify to undef - define double @fdiv_ninf_undef_op1(double %x) { ; CHECK-LABEL: fdiv_ninf_undef_op1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9221120237041090560 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = fdiv ninf double %x, undef ret double %r diff --git a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll index 3f8fa3e9e38379..b4c6e7736837a2 100644 --- a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll +++ b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll @@ -207,9 +207,8 @@ define void @le_i32_to_i16(i32 %x, i16* %p0) { ; ; BE-LABEL: le_i32_to_i16: ; BE: // %bb.0: -; BE-NEXT: lsr w8, w0, #16 -; BE-NEXT: strh w0, [x1] -; BE-NEXT: strh w8, [x1, #2] +; BE-NEXT: ror w8, w0, #16 +; BE-NEXT: str w8, [x1] ; BE-NEXT: ret %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 @@ -228,9 +227,8 @@ define void @le_i32_to_i16_order(i32 %x, i16* %p0) { ; ; BE-LABEL: le_i32_to_i16_order: ; BE: // %bb.0: -; BE-NEXT: lsr w8, w0, #16 -; BE-NEXT: strh w8, [x1, #2] -; BE-NEXT: strh w0, [x1] +; BE-NEXT: ror w8, w0, #16 +; BE-NEXT: str w8, [x1] ; BE-NEXT: ret %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 @@ -244,9 +242,8 @@ define void @le_i32_to_i16_order(i32 %x, i16* %p0) { define void @be_i32_to_i16(i32 %x, i16* %p0) { ; LE-LABEL: be_i32_to_i16: ; LE: // %bb.0: -; LE-NEXT: lsr w8, w0, #16 -; LE-NEXT: strh w0, [x1, #2] -; LE-NEXT: strh w8, [x1] +; LE-NEXT: ror w8, w0, #16 +; LE-NEXT: str w8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i32_to_i16: @@ -265,9 +262,8 @@ define void @be_i32_to_i16(i32 %x, i16* %p0) { define void @be_i32_to_i16_order(i32 %x, i16* %p0) { ; LE-LABEL: be_i32_to_i16_order: ; LE: // %bb.0: -; LE-NEXT: lsr w8, w0, #16 -; LE-NEXT: strh w8, [x1] -; LE-NEXT: strh w0, [x1, #2] +; LE-NEXT: ror w8, w0, #16 +; LE-NEXT: str w8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i32_to_i16_order: @@ -528,13 +524,12 @@ define void @le_i64_to_i16_order(i64 %x, i16* %p0) { define void @be_i64_to_i16(i64 %x, i16* %p0) { ; LE-LABEL: be_i64_to_i16: ; LE: // %bb.0: -; LE-NEXT: lsr x8, x0, #16 -; LE-NEXT: lsr x9, x0, #32 -; LE-NEXT: lsr x10, x0, #48 -; LE-NEXT: strh w0, [x1, #6] -; LE-NEXT: strh w8, [x1, #4] -; LE-NEXT: strh w9, [x1, #2] -; LE-NEXT: strh w10, [x1] +; LE-NEXT: lsr x8, x0, #32 +; LE-NEXT: lsr x9, x0, #48 +; LE-NEXT: ror w10, w0, #16 +; LE-NEXT: str w10, [x1, #4] +; LE-NEXT: strh w8, [x1, #2] +; LE-NEXT: strh w9, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i64_to_i16: @@ -599,8 +594,8 @@ define void @le_i64_to_i32(i64 %x, i32* %p0) { ; ; BE-LABEL: le_i64_to_i32: ; BE: // %bb.0: -; BE-NEXT: lsr x8, x0, #32 -; BE-NEXT: stp w0, w8, [x1] +; BE-NEXT: ror x8, x0, #32 +; BE-NEXT: str x8, [x1] ; BE-NEXT: ret %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 @@ -619,8 +614,8 @@ define void @le_i64_to_i32_order(i64 %x, i32* %p0) { ; ; BE-LABEL: le_i64_to_i32_order: ; BE: // %bb.0: -; BE-NEXT: lsr x8, x0, #32 -; BE-NEXT: stp w0, w8, [x1] +; BE-NEXT: ror x8, x0, #32 +; BE-NEXT: str x8, [x1] ; BE-NEXT: ret %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 @@ -634,8 +629,8 @@ define void @le_i64_to_i32_order(i64 %x, i32* %p0) { define void @be_i64_to_i32(i64 %x, i32* %p0) { ; LE-LABEL: be_i64_to_i32: ; LE: // %bb.0: -; LE-NEXT: lsr x8, x0, #32 -; LE-NEXT: stp w8, w0, [x1] +; LE-NEXT: ror x8, x0, #32 +; LE-NEXT: str x8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i64_to_i32: @@ -654,8 +649,8 @@ define void @be_i64_to_i32(i64 %x, i32* %p0) { define void @be_i64_to_i32_order(i64 %x, i32* %p0) { ; LE-LABEL: be_i64_to_i32_order: ; LE: // %bb.0: -; LE-NEXT: lsr x8, x0, #32 -; LE-NEXT: stp w8, w0, [x1] +; LE-NEXT: ror x8, x0, #32 +; LE-NEXT: str x8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i64_to_i32_order: diff --git a/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir b/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir new file mode 100644 index 00000000000000..b31144b409fca7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir @@ -0,0 +1,33 @@ +# RUN: llc -mtriple=aarch64-arm-none-eabi -o - %s \ +# RUN: -run-pass simple-register-coalescing | FileCheck %s + +# In this test case, the 32-bit copy implements a 32 to 64 bit zero extension +# and relies on the upper 32 bits being zeroed. +# Coalescing to the result of the 64-bit load meant overwriting +# the upper 32 bits incorrectly when the loaded byte was negative. + +--- | + @c = local_unnamed_addr global i8 -1, align 4 + + define i64 @bug_e(i32 %i32) local_unnamed_addr { + ret i64 0 + } +... +--- +name: bug_e +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + %1:gpr32 = COPY $w0 + %2:gpr64common = ADRP target-flags(aarch64-page) @c + %3:gpr64 = LDRSBXui %2, target-flags(aarch64-pageoff, aarch64-nc) @c :: (dereferenceable load 1 from @c, align 4) + %0:gpr32 = COPY %3.sub_32 + ; CHECK: {{.*}}.sub_32:gpr64 = COPY {{.*}}.sub_32 + STRBBui %1, %2, target-flags(aarch64-pageoff, aarch64-nc) @c :: (store 1 into @c, align 4) + %8:gpr64all = SUBREG_TO_REG 0, %0, %subreg.sub_32 + $x0 = COPY %8 + ; CHECK: $x0 = COPY + RET_ReallyLR implicit $x0 +... diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir index d50973c9abf99e..e87f1e7dc8dd03 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -16,16 +16,13 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) - ; GCN: DBG_VALUE ; GCN: bb.4: ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc ; GCN: DBG_VALUE @@ -68,14 +65,12 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: bb.4: ; GCN: successors: %bb.5(0x80000000) @@ -118,14 +113,12 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: bb.4: ; GCN: successors: %bb.5(0x80000000) @@ -387,22 +380,19 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.5(0x80000000) - ; GCN: S_BRANCH %bb.5 - ; GCN: bb.4: - ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc - ; GCN: S_ENDPGM 0 ; GCN: bb.5: ; GCN: successors: %bb.4(0x80000000) ; GCN: S_BRANCH %bb.4 + ; GCN: bb.4: + ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 diff --git a/llvm/test/CodeGen/AMDGPU/nop-data.ll b/llvm/test/CodeGen/AMDGPU/nop-data.ll index 7b6853acce2854..e21ca97e8ffca1 100644 --- a/llvm/test/CodeGen/AMDGPU/nop-data.ll +++ b/llvm/test/CodeGen/AMDGPU/nop-data.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-code-object-v3 -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - --mcpu=fiji | FileCheck %s ; CHECK: : -; CHECK-NEXT: s_endpgm +; CHECK: s_endpgm define amdgpu_kernel void @kernel0() align 256 { entry: ret void @@ -80,7 +80,7 @@ entry: ; CHECK-EMPTY: ; CHECK-NEXT: : -; CHECK-NEXT: s_endpgm +; CHECK: s_endpgm define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(4)* %ptr.out) align 256 { entry: ret void diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate-legal.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate-legal.ll new file mode 100644 index 00000000000000..e9c7f9cce771e0 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate-legal.ll @@ -0,0 +1,34 @@ +; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s + +; Truncating a type-to-be-widenened to a legal type (v8i8). +; Check that this compiles successfully. +; CHECK-LABEL: f0: +; CHECK: dealloc_return + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dllexport void @f0(i8* %a0) local_unnamed_addr #0 { +b0: + %v0 = load i8, i8* undef, align 1 + %v1 = zext i8 %v0 to i16 + %v2 = add i16 0, %v1 + %v3 = icmp sgt i16 %v2, 1 + %v4 = select i1 %v3, i16 %v2, i16 1 + %v5 = udiv i16 -32768, %v4 + %v6 = zext i16 %v5 to i32 + %v7 = insertelement <8 x i32> undef, i32 %v6, i32 0 + %v8 = shufflevector <8 x i32> %v7, <8 x i32> undef, <8 x i32> zeroinitializer + %v9 = load <8 x i16>, <8 x i16>* undef, align 2 + %v10 = sext <8 x i16> %v9 to <8 x i32> + %v11 = mul nsw <8 x i32> %v8, %v10 + %v12 = add nsw <8 x i32> %v11, + %v13 = lshr <8 x i32> %v12, + %v14 = trunc <8 x i32> %v13 to <8 x i8> + %v15 = getelementptr inbounds i8, i8* %a0, i32 undef + %v16 = bitcast i8* %v15 to <8 x i8>* + store <8 x i8> %v14, <8 x i8>* %v16, align 1 + ret void +} + +attributes #0 = { "target-features"="+hvx,+hvx-length128b" } diff --git a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll index b4790adfd90889..cf7225a5fc2005 100644 --- a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32 ; FIXME: -verify-machineinstrs currently fail on ppc64 (mismatched register/instruction). ; This is already checked for in Atomics-64.ll @@ -8,9 +9,25 @@ ; Indexed version of loads define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) { -; CHECK-LABEL: load_x_i8_seq_cst -; CHECK: sync -; CHECK: lbzx [[VAL:r[0-9]+]] +; PPC32-LABEL: load_x_i8_seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: lis r4, 1 +; PPC32-NEXT: sync +; PPC32-NEXT: ori r4, r4, 24464 +; PPC32-NEXT: lbzx r3, r3, r4 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_x_i8_seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: lis r4, 1 +; PPC64-NEXT: sync +; PPC64-NEXT: ori r4, r4, 24464 +; PPC64-NEXT: lbzx r3, r3, r4 +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: blr ; CHECK-PPC32: lwsync ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] ; CHECK-PPC64: bne- [[CR]], .+4 @@ -20,8 +37,23 @@ define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) { ret i8 %val } define i16 @load_x_i16_acquire([100000 x i16]* %mem) { -; CHECK-LABEL: load_x_i16_acquire -; CHECK: lhzx [[VAL:r[0-9]+]] +; PPC32-LABEL: load_x_i16_acquire: +; PPC32: # %bb.0: +; PPC32-NEXT: lis r4, 2 +; PPC32-NEXT: ori r4, r4, 48928 +; PPC32-NEXT: lhzx r3, r3, r4 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_x_i16_acquire: +; PPC64: # %bb.0: +; PPC64-NEXT: lis r4, 2 +; PPC64-NEXT: ori r4, r4, 48928 +; PPC64-NEXT: lhzx r3, r3, r4 +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: blr ; CHECK-PPC32: lwsync ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] ; CHECK-PPC64: bne- [[CR]], .+4 @@ -31,19 +63,39 @@ define i16 @load_x_i16_acquire([100000 x i16]* %mem) { ret i16 %val } define i32 @load_x_i32_monotonic([100000 x i32]* %mem) { -; CHECK-LABEL: load_x_i32_monotonic -; CHECK: lwzx -; CHECK-NOT: sync +; CHECK-LABEL: load_x_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: lis r4, 5 +; CHECK-NEXT: ori r4, r4, 32320 +; CHECK-NEXT: lwzx r3, r3, r4 +; CHECK-NEXT: blr %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000 %val = load atomic i32, i32* %ptr monotonic, align 4 ret i32 %val } define i64 @load_x_i64_unordered([100000 x i64]* %mem) { -; CHECK-LABEL: load_x_i64_unordered -; PPC32: __sync_ -; PPC64-NOT: __sync_ -; PPC64: ldx -; CHECK-NOT: sync +; PPC32-LABEL: load_x_i64_unordered: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: addi r3, r3, -896 +; PPC32-NEXT: addis r3, r3, 11 +; PPC32-NEXT: li r4, 0 +; PPC32-NEXT: bl __atomic_load_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_x_i64_unordered: +; PPC64: # %bb.0: +; PPC64-NEXT: lis r4, 10 +; PPC64-NEXT: ori r4, r4, 64640 +; PPC64-NEXT: ldx r3, r3, r4 +; PPC64-NEXT: blr %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000 %val = load atomic i64, i64* %ptr unordered, align 8 ret i64 %val @@ -51,35 +103,69 @@ define i64 @load_x_i64_unordered([100000 x i64]* %mem) { ; Indexed version of stores define void @store_x_i8_seq_cst([100000 x i8]* %mem) { -; CHECK-LABEL: store_x_i8_seq_cst -; CHECK: sync -; CHECK: stbx +; CHECK-LABEL: store_x_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: lis r4, 1 +; CHECK-NEXT: ori r4, r4, 24464 +; CHECK-NEXT: li r5, 42 +; CHECK-NEXT: sync +; CHECK-NEXT: stbx r5, r3, r4 +; CHECK-NEXT: blr %ptr = getelementptr inbounds [100000 x i8], [100000 x i8]* %mem, i64 0, i64 90000 store atomic i8 42, i8* %ptr seq_cst, align 1 ret void } define void @store_x_i16_release([100000 x i16]* %mem) { -; CHECK-LABEL: store_x_i16_release -; CHECK: lwsync -; CHECK: sthx +; CHECK-LABEL: store_x_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: lis r4, 2 +; CHECK-NEXT: ori r4, r4, 48928 +; CHECK-NEXT: li r5, 42 +; CHECK-NEXT: lwsync +; CHECK-NEXT: sthx r5, r3, r4 +; CHECK-NEXT: blr %ptr = getelementptr inbounds [100000 x i16], [100000 x i16]* %mem, i64 0, i64 90000 store atomic i16 42, i16* %ptr release, align 2 ret void } define void @store_x_i32_monotonic([100000 x i32]* %mem) { -; CHECK-LABEL: store_x_i32_monotonic -; CHECK-NOT: sync -; CHECK: stwx +; CHECK-LABEL: store_x_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: lis r4, 5 +; CHECK-NEXT: ori r4, r4, 32320 +; CHECK-NEXT: li r5, 42 +; CHECK-NEXT: stwx r5, r3, r4 +; CHECK-NEXT: blr %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000 store atomic i32 42, i32* %ptr monotonic, align 4 ret void } define void @store_x_i64_unordered([100000 x i64]* %mem) { -; CHECK-LABEL: store_x_i64_unordered -; CHECK-NOT: sync -; PPC32: __sync_ -; PPC64-NOT: __sync_ -; PPC64: stdx +; PPC32-LABEL: store_x_i64_unordered: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: addi r3, r3, -896 +; PPC32-NEXT: addis r3, r3, 11 +; PPC32-NEXT: li r5, 0 +; PPC32-NEXT: li r6, 42 +; PPC32-NEXT: li r7, 0 +; PPC32-NEXT: bl __atomic_store_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: store_x_i64_unordered: +; PPC64: # %bb.0: +; PPC64-NEXT: lis r4, 10 +; PPC64-NEXT: ori r4, r4, 64640 +; PPC64-NEXT: li r5, 42 +; PPC64-NEXT: stdx r5, r3, r4 +; PPC64-NEXT: blr %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000 store atomic i64 42, i64* %ptr unordered, align 8 ret void diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll index c964218cb60bf6..008cd4c7157c1f 100644 --- a/llvm/test/CodeGen/PowerPC/atomics.ll +++ b/llvm/test/CodeGen/PowerPC/atomics.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32 ; This is already checked for in Atomics-64.ll ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64 @@ -9,22 +10,35 @@ ; We first check loads, for all sizes from i8 to i64. ; We also vary orderings to check for barriers. define i8 @load_i8_unordered(i8* %mem) { -; CHECK-LABEL: load_i8_unordered -; CHECK: lbz -; CHECK-NOT: sync +; CHECK-LABEL: load_i8_unordered: +; CHECK: # %bb.0: +; CHECK-NEXT: lbz r3, 0(r3) +; CHECK-NEXT: blr %val = load atomic i8, i8* %mem unordered, align 1 ret i8 %val } define i16 @load_i16_monotonic(i16* %mem) { -; CHECK-LABEL: load_i16_monotonic -; CHECK: lhz -; CHECK-NOT: sync +; CHECK-LABEL: load_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: blr %val = load atomic i16, i16* %mem monotonic, align 2 ret i16 %val } define i32 @load_i32_acquire(i32* %mem) { -; CHECK-LABEL: load_i32_acquire -; CHECK: lwz [[VAL:r[0-9]+]] +; PPC32-LABEL: load_i32_acquire: +; PPC32: # %bb.0: +; PPC32-NEXT: lwz r3, 0(r3) +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_i32_acquire: +; PPC64: # %bb.0: +; PPC64-NEXT: lwz r3, 0(r3) +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: blr %val = load atomic i32, i32* %mem acquire, align 4 ; CHECK-PPC32: lwsync ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] @@ -33,11 +47,28 @@ define i32 @load_i32_acquire(i32* %mem) { ret i32 %val } define i64 @load_i64_seq_cst(i64* %mem) { -; CHECK-LABEL: load_i64_seq_cst -; CHECK: sync -; PPC32: __sync_ -; PPC64-NOT: __sync_ -; PPC64: ld [[VAL:r[0-9]+]] +; PPC32-LABEL: load_i64_seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: li r4, 5 +; PPC32-NEXT: bl __atomic_load_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_i64_seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: sync +; PPC64-NEXT: ld r3, 0(r3) +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: blr %val = load atomic i64, i64* %mem seq_cst, align 8 ; CHECK-PPC32: lwsync ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] @@ -48,95 +79,401 @@ define i64 @load_i64_seq_cst(i64* %mem) { ; Stores define void @store_i8_unordered(i8* %mem) { -; CHECK-LABEL: store_i8_unordered -; CHECK-NOT: sync -; CHECK: stb +; CHECK-LABEL: store_i8_unordered: +; CHECK: # %bb.0: +; CHECK-NEXT: li r4, 42 +; CHECK-NEXT: stb r4, 0(r3) +; CHECK-NEXT: blr store atomic i8 42, i8* %mem unordered, align 1 ret void } define void @store_i16_monotonic(i16* %mem) { -; CHECK-LABEL: store_i16_monotonic -; CHECK-NOT: sync -; CHECK: sth +; CHECK-LABEL: store_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: li r4, 42 +; CHECK-NEXT: sth r4, 0(r3) +; CHECK-NEXT: blr store atomic i16 42, i16* %mem monotonic, align 2 ret void } define void @store_i32_release(i32* %mem) { -; CHECK-LABEL: store_i32_release -; CHECK: lwsync -; CHECK: stw +; CHECK-LABEL: store_i32_release: +; CHECK: # %bb.0: +; CHECK-NEXT: li r4, 42 +; CHECK-NEXT: lwsync +; CHECK-NEXT: stw r4, 0(r3) +; CHECK-NEXT: blr store atomic i32 42, i32* %mem release, align 4 ret void } define void @store_i64_seq_cst(i64* %mem) { -; CHECK-LABEL: store_i64_seq_cst -; CHECK: sync -; PPC32: __sync_ -; PPC64-NOT: __sync_ -; PPC64: std +; PPC32-LABEL: store_i64_seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: li r5, 0 +; PPC32-NEXT: li r6, 42 +; PPC32-NEXT: li r7, 5 +; PPC32-NEXT: bl __atomic_store_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: store_i64_seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: li r4, 42 +; PPC64-NEXT: sync +; PPC64-NEXT: std r4, 0(r3) +; PPC64-NEXT: blr store atomic i64 42, i64* %mem seq_cst, align 8 ret void } ; Atomic CmpXchg define i8 @cas_strong_i8_sc_sc(i8* %mem) { -; CHECK-LABEL: cas_strong_i8_sc_sc -; CHECK: sync +; PPC32-LABEL: cas_strong_i8_sc_sc: +; PPC32: # %bb.0: +; PPC32-NEXT: rlwinm r8, r3, 3, 27, 28 +; PPC32-NEXT: li r5, 1 +; PPC32-NEXT: li r6, 0 +; PPC32-NEXT: li r7, 255 +; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29 +; PPC32-NEXT: xori r3, r8, 24 +; PPC32-NEXT: slw r5, r5, r3 +; PPC32-NEXT: slw r8, r6, r3 +; PPC32-NEXT: slw r6, r7, r3 +; PPC32-NEXT: and r7, r5, r6 +; PPC32-NEXT: and r8, r8, r6 +; PPC32-NEXT: sync +; PPC32-NEXT: .LBB8_1: +; PPC32-NEXT: lwarx r9, 0, r4 +; PPC32-NEXT: and r5, r9, r6 +; PPC32-NEXT: cmpw r5, r8 +; PPC32-NEXT: bne cr0, .LBB8_3 +; PPC32-NEXT: # %bb.2: +; PPC32-NEXT: andc r9, r9, r6 +; PPC32-NEXT: or r9, r9, r7 +; PPC32-NEXT: stwcx. r9, 0, r4 +; PPC32-NEXT: bne cr0, .LBB8_1 +; PPC32-NEXT: b .LBB8_4 +; PPC32-NEXT: .LBB8_3: +; PPC32-NEXT: stwcx. r9, 0, r4 +; PPC32-NEXT: .LBB8_4: +; PPC32-NEXT: srw r3, r5, r3 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: cas_strong_i8_sc_sc: +; PPC64: # %bb.0: +; PPC64-NEXT: rlwinm r8, r3, 3, 27, 28 +; PPC64-NEXT: li r5, 1 +; PPC64-NEXT: li r6, 0 +; PPC64-NEXT: li r7, 255 +; PPC64-NEXT: rldicr r4, r3, 0, 61 +; PPC64-NEXT: xori r3, r8, 24 +; PPC64-NEXT: slw r5, r5, r3 +; PPC64-NEXT: slw r8, r6, r3 +; PPC64-NEXT: slw r6, r7, r3 +; PPC64-NEXT: and r7, r5, r6 +; PPC64-NEXT: and r8, r8, r6 +; PPC64-NEXT: sync +; PPC64-NEXT: .LBB8_1: +; PPC64-NEXT: lwarx r9, 0, r4 +; PPC64-NEXT: and r5, r9, r6 +; PPC64-NEXT: cmpw r5, r8 +; PPC64-NEXT: bne cr0, .LBB8_3 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: andc r9, r9, r6 +; PPC64-NEXT: or r9, r9, r7 +; PPC64-NEXT: stwcx. r9, 0, r4 +; PPC64-NEXT: bne cr0, .LBB8_1 +; PPC64-NEXT: b .LBB8_4 +; PPC64-NEXT: .LBB8_3: +; PPC64-NEXT: stwcx. r9, 0, r4 +; PPC64-NEXT: .LBB8_4: +; PPC64-NEXT: srw r3, r5, r3 +; PPC64-NEXT: lwsync +; PPC64-NEXT: blr %val = cmpxchg i8* %mem, i8 0, i8 1 seq_cst seq_cst -; CHECK: lwsync %loaded = extractvalue { i8, i1} %val, 0 ret i8 %loaded } define i16 @cas_weak_i16_acquire_acquire(i16* %mem) { -; CHECK-LABEL: cas_weak_i16_acquire_acquire -;CHECK-NOT: sync +; PPC32-LABEL: cas_weak_i16_acquire_acquire: +; PPC32: # %bb.0: +; PPC32-NEXT: li r6, 0 +; PPC32-NEXT: rlwinm r4, r3, 3, 27, 27 +; PPC32-NEXT: li r5, 1 +; PPC32-NEXT: ori r7, r6, 65535 +; PPC32-NEXT: xori r4, r4, 16 +; PPC32-NEXT: slw r8, r5, r4 +; PPC32-NEXT: slw r9, r6, r4 +; PPC32-NEXT: slw r5, r7, r4 +; PPC32-NEXT: rlwinm r3, r3, 0, 0, 29 +; PPC32-NEXT: and r6, r8, r5 +; PPC32-NEXT: and r8, r9, r5 +; PPC32-NEXT: .LBB9_1: +; PPC32-NEXT: lwarx r9, 0, r3 +; PPC32-NEXT: and r7, r9, r5 +; PPC32-NEXT: cmpw r7, r8 +; PPC32-NEXT: bne cr0, .LBB9_3 +; PPC32-NEXT: # %bb.2: +; PPC32-NEXT: andc r9, r9, r5 +; PPC32-NEXT: or r9, r9, r6 +; PPC32-NEXT: stwcx. r9, 0, r3 +; PPC32-NEXT: bne cr0, .LBB9_1 +; PPC32-NEXT: b .LBB9_4 +; PPC32-NEXT: .LBB9_3: +; PPC32-NEXT: stwcx. r9, 0, r3 +; PPC32-NEXT: .LBB9_4: +; PPC32-NEXT: srw r3, r7, r4 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: cas_weak_i16_acquire_acquire: +; PPC64: # %bb.0: +; PPC64-NEXT: li r6, 0 +; PPC64-NEXT: rlwinm r4, r3, 3, 27, 27 +; PPC64-NEXT: li r5, 1 +; PPC64-NEXT: ori r7, r6, 65535 +; PPC64-NEXT: xori r4, r4, 16 +; PPC64-NEXT: slw r8, r5, r4 +; PPC64-NEXT: slw r9, r6, r4 +; PPC64-NEXT: slw r5, r7, r4 +; PPC64-NEXT: rldicr r3, r3, 0, 61 +; PPC64-NEXT: and r6, r8, r5 +; PPC64-NEXT: and r8, r9, r5 +; PPC64-NEXT: .LBB9_1: +; PPC64-NEXT: lwarx r9, 0, r3 +; PPC64-NEXT: and r7, r9, r5 +; PPC64-NEXT: cmpw r7, r8 +; PPC64-NEXT: bne cr0, .LBB9_3 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: andc r9, r9, r5 +; PPC64-NEXT: or r9, r9, r6 +; PPC64-NEXT: stwcx. r9, 0, r3 +; PPC64-NEXT: bne cr0, .LBB9_1 +; PPC64-NEXT: b .LBB9_4 +; PPC64-NEXT: .LBB9_3: +; PPC64-NEXT: stwcx. r9, 0, r3 +; PPC64-NEXT: .LBB9_4: +; PPC64-NEXT: srw r3, r7, r4 +; PPC64-NEXT: lwsync +; PPC64-NEXT: blr %val = cmpxchg weak i16* %mem, i16 0, i16 1 acquire acquire -; CHECK: lwsync %loaded = extractvalue { i16, i1} %val, 0 ret i16 %loaded } define i32 @cas_strong_i32_acqrel_acquire(i32* %mem) { -; CHECK-LABEL: cas_strong_i32_acqrel_acquire -; CHECK: lwsync +; CHECK-LABEL: cas_strong_i32_acqrel_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: li r6, 0 +; CHECK-NEXT: lwsync +; CHECK-NEXT: .LBB10_1: +; CHECK-NEXT: lwarx r4, 0, r3 +; CHECK-NEXT: cmpw r6, r4 +; CHECK-NEXT: bne cr0, .LBB10_3 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: stwcx. r5, 0, r3 +; CHECK-NEXT: bne cr0, .LBB10_1 +; CHECK-NEXT: b .LBB10_4 +; CHECK-NEXT: .LBB10_3: +; CHECK-NEXT: stwcx. r4, 0, r3 +; CHECK-NEXT: .LBB10_4: +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr %val = cmpxchg i32* %mem, i32 0, i32 1 acq_rel acquire -; CHECK: lwsync %loaded = extractvalue { i32, i1} %val, 0 ret i32 %loaded } define i64 @cas_weak_i64_release_monotonic(i64* %mem) { -; CHECK-LABEL: cas_weak_i64_release_monotonic -; CHECK: lwsync +; PPC32-LABEL: cas_weak_i64_release_monotonic: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: li r4, 0 +; PPC32-NEXT: stw r4, 12(r1) +; PPC32-NEXT: li r5, 0 +; PPC32-NEXT: stw r4, 8(r1) +; PPC32-NEXT: addi r4, r1, 8 +; PPC32-NEXT: li r6, 1 +; PPC32-NEXT: li r7, 3 +; PPC32-NEXT: li r8, 0 +; PPC32-NEXT: bl __atomic_compare_exchange_8 +; PPC32-NEXT: lwz r4, 12(r1) +; PPC32-NEXT: lwz r3, 8(r1) +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: cas_weak_i64_release_monotonic: +; PPC64: # %bb.0: +; PPC64-NEXT: li r5, 1 +; PPC64-NEXT: li r6, 0 +; PPC64-NEXT: lwsync +; PPC64-NEXT: .LBB11_1: +; PPC64-NEXT: ldarx r4, 0, r3 +; PPC64-NEXT: cmpd r6, r4 +; PPC64-NEXT: bne cr0, .LBB11_4 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: stdcx. r5, 0, r3 +; PPC64-NEXT: bne cr0, .LBB11_1 +; PPC64-NEXT: # %bb.3: +; PPC64-NEXT: mr r3, r4 +; PPC64-NEXT: blr +; PPC64-NEXT: .LBB11_4: +; PPC64-NEXT: stdcx. r4, 0, r3 +; PPC64-NEXT: mr r3, r4 +; PPC64-NEXT: blr %val = cmpxchg weak i64* %mem, i64 0, i64 1 release monotonic -; CHECK-NOT: [sync ] %loaded = extractvalue { i64, i1} %val, 0 ret i64 %loaded } ; AtomicRMW define i8 @add_i8_monotonic(i8* %mem, i8 %operand) { -; CHECK-LABEL: add_i8_monotonic -; CHECK-NOT: sync +; PPC32-LABEL: add_i8_monotonic: +; PPC32: # %bb.0: +; PPC32-NEXT: rlwinm r7, r3, 3, 27, 28 +; PPC32-NEXT: li r6, 255 +; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29 +; PPC32-NEXT: xori r3, r7, 24 +; PPC32-NEXT: slw r4, r4, r3 +; PPC32-NEXT: slw r6, r6, r3 +; PPC32-NEXT: .LBB12_1: +; PPC32-NEXT: lwarx r7, 0, r5 +; PPC32-NEXT: add r8, r4, r7 +; PPC32-NEXT: andc r9, r7, r6 +; PPC32-NEXT: and r8, r8, r6 +; PPC32-NEXT: or r8, r8, r9 +; PPC32-NEXT: stwcx. r8, 0, r5 +; PPC32-NEXT: bne cr0, .LBB12_1 +; PPC32-NEXT: # %bb.2: +; PPC32-NEXT: srw r3, r7, r3 +; PPC32-NEXT: blr +; +; PPC64-LABEL: add_i8_monotonic: +; PPC64: # %bb.0: +; PPC64-NEXT: rlwinm r7, r3, 3, 27, 28 +; PPC64-NEXT: li r6, 255 +; PPC64-NEXT: rldicr r5, r3, 0, 61 +; PPC64-NEXT: xori r3, r7, 24 +; PPC64-NEXT: slw r4, r4, r3 +; PPC64-NEXT: slw r6, r6, r3 +; PPC64-NEXT: .LBB12_1: +; PPC64-NEXT: lwarx r7, 0, r5 +; PPC64-NEXT: add r8, r4, r7 +; PPC64-NEXT: andc r9, r7, r6 +; PPC64-NEXT: and r8, r8, r6 +; PPC64-NEXT: or r8, r8, r9 +; PPC64-NEXT: stwcx. r8, 0, r5 +; PPC64-NEXT: bne cr0, .LBB12_1 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: srw r3, r7, r3 +; PPC64-NEXT: blr %val = atomicrmw add i8* %mem, i8 %operand monotonic ret i8 %val } define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) { -; CHECK-LABEL: xor_i16_seq_cst -; CHECK: sync +; PPC32-LABEL: xor_i16_seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: li r6, 0 +; PPC32-NEXT: rlwinm r7, r3, 3, 27, 27 +; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29 +; PPC32-NEXT: ori r6, r6, 65535 +; PPC32-NEXT: xori r3, r7, 16 +; PPC32-NEXT: slw r4, r4, r3 +; PPC32-NEXT: slw r6, r6, r3 +; PPC32-NEXT: sync +; PPC32-NEXT: .LBB13_1: +; PPC32-NEXT: lwarx r7, 0, r5 +; PPC32-NEXT: xor r8, r4, r7 +; PPC32-NEXT: andc r9, r7, r6 +; PPC32-NEXT: and r8, r8, r6 +; PPC32-NEXT: or r8, r8, r9 +; PPC32-NEXT: stwcx. r8, 0, r5 +; PPC32-NEXT: bne cr0, .LBB13_1 +; PPC32-NEXT: # %bb.2: +; PPC32-NEXT: srw r3, r7, r3 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: xor_i16_seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: li r6, 0 +; PPC64-NEXT: rlwinm r7, r3, 3, 27, 27 +; PPC64-NEXT: rldicr r5, r3, 0, 61 +; PPC64-NEXT: ori r6, r6, 65535 +; PPC64-NEXT: xori r3, r7, 16 +; PPC64-NEXT: slw r4, r4, r3 +; PPC64-NEXT: slw r6, r6, r3 +; PPC64-NEXT: sync +; PPC64-NEXT: .LBB13_1: +; PPC64-NEXT: lwarx r7, 0, r5 +; PPC64-NEXT: xor r8, r4, r7 +; PPC64-NEXT: andc r9, r7, r6 +; PPC64-NEXT: and r8, r8, r6 +; PPC64-NEXT: or r8, r8, r9 +; PPC64-NEXT: stwcx. r8, 0, r5 +; PPC64-NEXT: bne cr0, .LBB13_1 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: srw r3, r7, r3 +; PPC64-NEXT: lwsync +; PPC64-NEXT: blr %val = atomicrmw xor i16* %mem, i16 %operand seq_cst -; CHECK: lwsync ret i16 %val } define i32 @xchg_i32_acq_rel(i32* %mem, i32 %operand) { -; CHECK-LABEL: xchg_i32_acq_rel -; CHECK: lwsync +; CHECK-LABEL: xchg_i32_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: lwsync +; CHECK-NEXT: .LBB14_1: +; CHECK-NEXT: lwarx r5, 0, r3 +; CHECK-NEXT: stwcx. r4, 0, r3 +; CHECK-NEXT: bne cr0, .LBB14_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: mr r3, r5 +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr %val = atomicrmw xchg i32* %mem, i32 %operand acq_rel -; CHECK: lwsync ret i32 %val } define i64 @and_i64_release(i64* %mem, i64 %operand) { -; CHECK-LABEL: and_i64_release -; CHECK: lwsync +; PPC32-LABEL: and_i64_release: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: li r7, 3 +; PPC32-NEXT: bl __atomic_fetch_and_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: and_i64_release: +; PPC64: # %bb.0: +; PPC64-NEXT: lwsync +; PPC64-NEXT: .LBB15_1: +; PPC64-NEXT: ldarx r5, 0, r3 +; PPC64-NEXT: and r6, r4, r5 +; PPC64-NEXT: stdcx. r6, 0, r3 +; PPC64-NEXT: bne cr0, .LBB15_1 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: mr r3, r5 +; PPC64-NEXT: blr %val = atomicrmw and i64* %mem, i64 %operand release -; CHECK-NOT: [sync ] ret i64 %val } diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll index 90ea31b26916e1..91745b4b3ea215 100644 --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -557,13 +557,13 @@ define double @fcmp_nnan(double %a, double %y, double %z) { ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' ; FMFDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64 ; FMFDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1 -; FMFDEBUG: f64,ch,glue = CopyFromReg afn t16, Register:f64 $f1, t16:1 +; FMFDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' ; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64 ; GLOBALDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1 -; GLOBALDEBUG: f64,ch,glue = CopyFromReg afn t16, Register:f64 $f1, t16:1 +; GLOBALDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' declare double @log2(double) diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-14.ll b/llvm/test/CodeGen/SystemZ/fp-mul-14.ll index 8bab2135739c44..363511655ad916 100644 --- a/llvm/test/CodeGen/SystemZ/fp-mul-14.ll +++ b/llvm/test/CodeGen/SystemZ/fp-mul-14.ll @@ -2,9 +2,6 @@ ; ; Check that a multiply-and-add results. -; FIXME: This test is xfailed temporarily -; XFAIL: * - define void @f1(float %arg, float* %Dst) { ; CHECK-LABEL: f1: ; CHECK: maeb diff --git a/llvm/test/CodeGen/SystemZ/vec-zext.ll b/llvm/test/CodeGen/SystemZ/vec-zext.ll index b4c8f2307b0b7a..cb61d31e5ebe31 100644 --- a/llvm/test/CodeGen/SystemZ/vec-zext.ll +++ b/llvm/test/CodeGen/SystemZ/vec-zext.ll @@ -92,3 +92,19 @@ define <8 x i16> @fun10(<8 x i8> %val1) { ret <8 x i16> %z } +define <2 x i32> @fun11(<2 x i64> %Arg1, <2 x i64> %Arg2) { +; CHECK-LABEL: fun11: +; CHECK: vgbm %v0, 0 +; CHECK-NEXT: vceqg %v1, %v24, %v0 +; CHECK-NEXT: vceqg %v0, %v26, %v0 +; CHECK-NEXT: vo %v0, %v1, %v0 +; CHECK-NEXT: vrepig %v1, 1 +; CHECK-NEXT: vn %v0, %v0, %v1 +; CHECK-NEXT: vpkg %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %i3 = icmp eq <2 x i64> %Arg1, zeroinitializer + %i5 = icmp eq <2 x i64> %Arg2, zeroinitializer + %i6 = or <2 x i1> %i3, %i5 + %i7 = zext <2 x i1> %i6 to <2 x i32> + ret <2 x i32> %i7 +} diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll index 2fa8a4d8ed7eff..459e2c8395997d 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -10,7 +10,6 @@ define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: add.w r12, r3, #3 ; CHECK-NEXT: mov.w lr, #1 ; CHECK-NEXT: bic r12, r12, #3 @@ -21,28 +20,26 @@ define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: and r4, r12, #15 -; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill ; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vldrwt.u32 q1, [r2], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r1], #16 ; CHECK-NEXT: vdup.32 q3, r4 ; CHECK-NEXT: vpt.i32 eq, q3, zr ; CHECK-NEXT: vmovt q1, q2 -; CHECK-NEXT: add.w r12, r12, #4 -; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 ; CHECK-NEXT: vmul.i32 q1, q1, q2 +; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB0_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u32 r0, q0 -; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop {r4, pc} entry: %cmp8 = icmp eq i32 %N, 0 @@ -101,8 +98,7 @@ define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a, ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: ldr.w r12, [sp, #40] +; CHECK-NEXT: ldr.w r12, [sp, #32] ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq .LBB1_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph @@ -116,10 +112,9 @@ define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a, ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: and r5, r4, #15 -; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: vpsttt ; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r3], #16 @@ -127,22 +122,21 @@ define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a, ; CHECK-NEXT: vdup.32 q4, r5 ; CHECK-NEXT: vpt.i32 eq, q4, zr ; CHECK-NEXT: vsubt.i32 q1, q3, q2 -; CHECK-NEXT: adds r4, #4 -; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 ; CHECK-NEXT: vmul.i32 q1, q1, q2 +; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB1_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u32 r0, q0 -; CHECK-NEXT: b .LBB1_5 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: .LBB1_5: @ %for.cond.cleanup -; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r7, pc} i32* noalias nocapture readonly %c, i32* noalias nocapture readonly %d, i32 %N) { diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir index 9a5856335dfc67..210eae9e643509 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir @@ -173,11 +173,10 @@ body: | ; CHECK: renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 - ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg + ; CHECK: dead $r3 = tMOVr $r1, 14 /* CC::al */, $noreg ; CHECK: bb.3.do.body: ; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000) - ; CHECK: liveins: $lr, $q0, $q1, $r0, $r1, $r2, $r3 - ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg + ; CHECK: liveins: $lr, $q0, $q1, $r0, $r1, $r2 ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.01, align 4) ; CHECK: renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VSUBf32 killed renamable $q2, renamable $q1, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 0, killed $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll index 5a370e5f96e76c..1cf101ea5d5f1f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -27,7 +27,6 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float ; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: .LBB0_3: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vldrw.u32 q2, [r0], #16 ; CHECK-NEXT: vsub.f32 q2, q2, q1 ; CHECK-NEXT: vfma.f32 q0, q2, q2 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll new file mode 100644 index 00000000000000..6ce2b9f5f1c026 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll @@ -0,0 +1,223 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp %s -o - | FileCheck %s + +define void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg4, i16 zeroext %arg5) { +; CHECK-LABEL: remat_vctp: +; CHECK: @ %bb.0: @ %bb +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: ldrd r5, r12, [sp, #80] +; CHECK-NEXT: cmp.w r12, #4 +; CHECK-NEXT: mov r4, r12 +; CHECK-NEXT: vmvn.i32 q0, #0x80000000 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge r4, #4 +; CHECK-NEXT: vmov.i32 q1, #0x3f +; CHECK-NEXT: sub.w r4, r12, r4 +; CHECK-NEXT: vmov.i32 q2, #0x1 +; CHECK-NEXT: add.w lr, r4, #3 +; CHECK-NEXT: movs r4, #1 +; CHECK-NEXT: add.w lr, r4, lr, lsr #2 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: .LBB0_1: @ %bb6 +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q4, [r1], #16 +; CHECK-NEXT: vabs.s32 q5, q4 +; CHECK-NEXT: vcls.s32 q3, q5 +; CHECK-NEXT: vshl.u32 q5, q5, q3 +; CHECK-NEXT: vadd.i32 q3, q3, q2 +; CHECK-NEXT: vshr.u32 q6, q5, #24 +; CHECK-NEXT: vand q6, q6, q1 +; CHECK-NEXT: vldrw.u32 q7, [r5, q6, uxtw #2] +; CHECK-NEXT: vqrdmulh.s32 q6, q7, q5 +; CHECK-NEXT: vqsub.s32 q6, q0, q6 +; CHECK-NEXT: vqrdmulh.s32 q6, q7, q6 +; CHECK-NEXT: vqshl.s32 q6, q6, #1 +; CHECK-NEXT: vqrdmulh.s32 q5, q6, q5 +; CHECK-NEXT: vqsub.s32 q5, q0, q5 +; CHECK-NEXT: vqrdmulh.s32 q5, q6, q5 +; CHECK-NEXT: vqshl.s32 q5, q5, #1 +; CHECK-NEXT: vpt.s32 lt, q4, zr +; CHECK-NEXT: vnegt.s32 q5, q5 +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 +; CHECK-NEXT: vqrdmulh.s32 q4, q4, q5 +; CHECK-NEXT: vpstt +; CHECK-NEXT: vstrwt.32 q4, [r2], #16 +; CHECK-NEXT: vstrwt.32 q3, [r3], #16 +; CHECK-NEXT: le lr, .LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %bb44 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: pop {r4, r5, r7, pc} +bb: + %i = zext i16 %arg5 to i32 + br label %bb6 + +bb6: ; preds = %bb6, %bb + %i7 = phi i32* [ %arg3, %bb ], [ %i38, %bb6 ] + %i8 = phi i32 [ %i, %bb ], [ %i42, %bb6 ] + %i9 = phi i32* [ %arg2, %bb ], [ %i41, %bb6 ] + %i10 = phi i32* [ %arg1, %bb ], [ %i40, %bb6 ] + %i11 = phi i32* [ %arg, %bb ], [ %i39, %bb6 ] + %i12 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i8) + %i13 = bitcast i32* %i11 to <4 x i32>* + %i14 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %i13, i32 4, <4 x i1> %i12, <4 x i32> zeroinitializer) + %i15 = bitcast i32* %i10 to <4 x i32>* + %i16 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %i15, i32 4, <4 x i1> %i12, <4 x i32> zeroinitializer) + %i17 = icmp slt <4 x i32> %i16, zeroinitializer + %i18 = sub <4 x i32> zeroinitializer, %i16 + %i19 = select <4 x i1> %i17, <4 x i32> %i18, <4 x i32> %i16 + %i20 = tail call <4 x i32> @llvm.arm.mve.vcls.v4i32(<4 x i32> %i19) + %i21 = shl <4 x i32> %i19, %i20 + %i22 = add <4 x i32> %i20, + %i23 = lshr <4 x i32> %i21, + %i24 = and <4 x i32> %i23, + %i25 = tail call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %arg4, <4 x i32> %i24, i32 32, i32 2, i32 0) + %i26 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i25, <4 x i32> %i21) + %i27 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> , <4 x i32> %i26) + %i28 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i25, <4 x i32> %i27) + %i29 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %i28, i32 1, i32 0) + %i30 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i29, <4 x i32> %i21) + %i31 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> , <4 x i32> %i30) + %i32 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i29, <4 x i32> %i31) + %i33 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %i32, i32 1, i32 0) + %i34 = tail call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> %i33, <4 x i1> %i17, <4 x i32> %i33) + %i35 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i14, <4 x i32> %i34) + %i36 = bitcast i32* %i9 to <4 x i32>* + %i37 = bitcast i32* %i7 to <4 x i32>* + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %i35, <4 x i32>* %i36, i32 4, <4 x i1> %i12) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %i22, <4 x i32>* %i37, i32 4, <4 x i1> %i12) + %i38 = getelementptr inbounds i32, i32* %i7, i32 4 + %i39 = getelementptr inbounds i32, i32* %i11, i32 4 + %i40 = getelementptr inbounds i32, i32* %i10, i32 4 + %i41 = getelementptr inbounds i32, i32* %i9, i32 4 + %i42 = add nsw i32 %i8, -4 + %i43 = icmp sgt i32 %i8, 4 + br i1 %i43, label %bb6, label %bb44 + +bb44: ; preds = %bb6 + ret void +} + +define void @dont_remat_predicated_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg4, i16 zeroext %arg5, i32 %conv.mask) { +; CHECK-LABEL: dont_remat_predicated_vctp: +; CHECK: @ %bb.0: @ %bb +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: ldrd r6, r12, [sp, #88] +; CHECK-NEXT: movs r4, #4 +; CHECK-NEXT: cmp.w r12, #4 +; CHECK-NEXT: vmvn.i32 q0, #0x80000000 +; CHECK-NEXT: csel r5, r12, r4, lt +; CHECK-NEXT: vmov.i32 q1, #0x3f +; CHECK-NEXT: sub.w r5, r12, r5 +; CHECK-NEXT: vmov.i32 q2, #0x1 +; CHECK-NEXT: add.w lr, r5, #3 +; CHECK-NEXT: movs r5, #1 +; CHECK-NEXT: add.w lr, r5, lr, lsr #2 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: .LBB1_1: @ %bb6 +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vpst +; CHECK-NEXT: vctpt.32 r4 +; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q4, [r1], #16 +; CHECK-NEXT: vabs.s32 q5, q4 +; CHECK-NEXT: vcls.s32 q3, q5 +; CHECK-NEXT: vshl.u32 q5, q5, q3 +; CHECK-NEXT: vadd.i32 q3, q3, q2 +; CHECK-NEXT: vshr.u32 q6, q5, #24 +; CHECK-NEXT: vand q6, q6, q1 +; CHECK-NEXT: vldrw.u32 q7, [r6, q6, uxtw #2] +; CHECK-NEXT: vqrdmulh.s32 q6, q7, q5 +; CHECK-NEXT: vqsub.s32 q6, q0, q6 +; CHECK-NEXT: vqrdmulh.s32 q6, q7, q6 +; CHECK-NEXT: vqshl.s32 q6, q6, #1 +; CHECK-NEXT: vqrdmulh.s32 q5, q6, q5 +; CHECK-NEXT: vqsub.s32 q5, q0, q5 +; CHECK-NEXT: vqrdmulh.s32 q5, q6, q5 +; CHECK-NEXT: vqshl.s32 q5, q5, #1 +; CHECK-NEXT: vpt.s32 lt, q4, zr +; CHECK-NEXT: vnegt.s32 q5, q5 +; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 +; CHECK-NEXT: vqrdmulh.s32 q4, q4, q5 +; CHECK-NEXT: vpstt +; CHECK-NEXT: vstrwt.32 q4, [r2], #16 +; CHECK-NEXT: vstrwt.32 q3, [r3], #16 +; CHECK-NEXT: le lr, .LBB1_1 +; CHECK-NEXT: @ %bb.2: @ %bb44 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: pop {r4, r5, r6, pc} +bb: + %i = zext i16 %arg5 to i32 + br label %bb6 + +bb6: ; preds = %bb6, %bb + %i7 = phi i32* [ %arg3, %bb ], [ %i38, %bb6 ] + %i8 = phi i32 [ %i, %bb ], [ %i42, %bb6 ] + %i9 = phi i32* [ %arg2, %bb ], [ %i41, %bb6 ] + %i10 = phi i32* [ %arg1, %bb ], [ %i40, %bb6 ] + %i11 = phi i32* [ %arg, %bb ], [ %i39, %bb6 ] + %i12 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 4) + %mask = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i8) + %pred = and <4 x i1> %i12, %mask + %i13 = bitcast i32* %i11 to <4 x i32>* + %i14 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %i13, i32 4, <4 x i1> %pred, <4 x i32> zeroinitializer) + %i15 = bitcast i32* %i10 to <4 x i32>* + %i16 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %i15, i32 4, <4 x i1> %pred, <4 x i32> zeroinitializer) + %i17 = icmp slt <4 x i32> %i16, zeroinitializer + %i18 = sub <4 x i32> zeroinitializer, %i16 + %i19 = select <4 x i1> %i17, <4 x i32> %i18, <4 x i32> %i16 + %i20 = tail call <4 x i32> @llvm.arm.mve.vcls.v4i32(<4 x i32> %i19) + %i21 = shl <4 x i32> %i19, %i20 + %i22 = add <4 x i32> %i20, + %i23 = lshr <4 x i32> %i21, + %i24 = and <4 x i32> %i23, + %i25 = tail call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %arg4, <4 x i32> %i24, i32 32, i32 2, i32 0) + %i26 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i25, <4 x i32> %i21) + %i27 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> , <4 x i32> %i26) + %i28 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i25, <4 x i32> %i27) + %i29 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %i28, i32 1, i32 0) + %i30 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i29, <4 x i32> %i21) + %i31 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> , <4 x i32> %i30) + %i32 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i29, <4 x i32> %i31) + %i33 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %i32, i32 1, i32 0) + %i34 = tail call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> %i33, <4 x i1> %i17, <4 x i32> %i33) + %i35 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i14, <4 x i32> %i34) + %i36 = bitcast i32* %i9 to <4 x i32>* + %i37 = bitcast i32* %i7 to <4 x i32>* + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %i35, <4 x i32>* %i36, i32 4, <4 x i1> %pred) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %i22, <4 x i32>* %i37, i32 4, <4 x i1> %pred) + %i38 = getelementptr inbounds i32, i32* %i7, i32 4 + %i39 = getelementptr inbounds i32, i32* %i11, i32 4 + %i40 = getelementptr inbounds i32, i32* %i10, i32 4 + %i41 = getelementptr inbounds i32, i32* %i9, i32 4 + %i42 = add nsw i32 %i8, -4 + %i43 = icmp sgt i32 %i8, 4 + br i1 %i43, label %bb6, label %bb44 + +bb44: ; preds = %bb6 + ret void +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) +declare <4 x i1> @llvm.arm.mve.vctp32(i32) +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) +declare <4 x i32> @llvm.arm.mve.vcls.v4i32(<4 x i32>) +declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32*, <4 x i32>, i32, i32, i32) +declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32>, i32, i32) +declare <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/mve-abs.ll b/llvm/test/CodeGen/Thumb2/mve-abs.ll index 0b5dcbced1a562..8a9b8814ef2ec8 100644 --- a/llvm/test/CodeGen/Thumb2/mve-abs.ll +++ b/llvm/test/CodeGen/Thumb2/mve-abs.ll @@ -40,33 +40,24 @@ entry: define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) { ; CHECK-LABEL: abs_v2i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: rsbs.w lr, r1, #0 -; CHECK-NEXT: sbc.w r2, r12, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: cset r3, mi -; CHECK-NEXT: ands r3, r3, #1 -; CHECK-NEXT: csel r1, lr, r1, ne -; CHECK-NEXT: csel r0, r2, r0, ne -; CHECK-NEXT: vmov.32 q1[0], r1 -; CHECK-NEXT: vmov r1, s2 -; CHECK-NEXT: vmov.32 q1[1], r0 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: adds.w r1, r1, r0, asr #31 +; CHECK-NEXT: adc.w r2, r0, r0, asr #31 +; CHECK-NEXT: eor.w r2, r2, r0, asr #31 +; CHECK-NEXT: eor.w r0, r1, r0, asr #31 +; CHECK-NEXT: vmov.32 q1[0], r0 ; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: rsbs r2, r1, #0 -; CHECK-NEXT: sbc.w r12, r12, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: cset r3, mi -; CHECK-NEXT: ands r3, r3, #1 -; CHECK-NEXT: csel r1, r2, r1, ne -; CHECK-NEXT: csel r0, r12, r0, ne +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: adds.w r1, r1, r0, asr #31 +; CHECK-NEXT: eor.w r1, r1, r0, asr #31 ; CHECK-NEXT: vmov.32 q1[2], r1 +; CHECK-NEXT: adc.w r1, r0, r0, asr #31 +; CHECK-NEXT: eor.w r0, r1, r0, asr #31 ; CHECK-NEXT: vmov.32 q1[3], r0 ; CHECK-NEXT: vmov q0, q1 -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: bx lr entry: %0 = icmp slt <2 x i64> %s1, zeroinitializer %1 = sub nsw <2 x i64> zeroinitializer, %s1 diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll index a1f25e0f33342e..77f0c77033f95b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll @@ -3,30 +3,11 @@ ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16,+fp64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP define arm_aapcs_vfpcc float @fadd_v2f32(<2 x float> %x, float %y) { -; CHECK-FP-LABEL: fadd_v2f32: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vadd.f32 s0, s0, s1 -; CHECK-FP-NEXT: vldr s2, .LCPI0_0 -; CHECK-FP-NEXT: vadd.f32 s0, s0, s2 -; CHECK-FP-NEXT: vadd.f32 s0, s4, s0 -; CHECK-FP-NEXT: bx lr -; CHECK-FP-NEXT: .p2align 2 -; CHECK-FP-NEXT: @ %bb.1: -; CHECK-FP-NEXT: .LCPI0_0: -; CHECK-FP-NEXT: .long 0x00000000 @ float 0 -; -; CHECK-NOFP-LABEL: fadd_v2f32: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vadd.f32 s0, s0, s1 -; CHECK-NOFP-NEXT: vldr s2, .LCPI0_0 -; CHECK-NOFP-NEXT: vadd.f32 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f32 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f32 s0, s4, s0 -; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 2 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI0_0: -; CHECK-NOFP-NEXT: .long 0x00000000 @ float 0 +; CHECK-LABEL: fadd_v2f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vadd.f32 s0, s0, s1 +; CHECK-NEXT: vadd.f32 s0, s4, s0 +; CHECK-NEXT: bx lr entry: %z = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float %y, <2 x float> %x) ret float %z @@ -80,34 +61,14 @@ entry: } define arm_aapcs_vfpcc void @fadd_v2f16(<2 x half> %x, half* %yy) { -; CHECK-FP-LABEL: fadd_v2f16: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmovx.f16 s4, s0 -; CHECK-FP-NEXT: vadd.f16 s0, s0, s4 -; CHECK-FP-NEXT: vldr.16 s2, [r0] -; CHECK-FP-NEXT: vadd.f16 s0, s2, s0 -; CHECK-FP-NEXT: vstr.16 s0, [r0] -; CHECK-FP-NEXT: bx lr -; -; CHECK-NOFP-LABEL: fadd_v2f16: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s4 -; CHECK-NOFP-NEXT: vldr.16 s2, .LCPI3_0 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vldr.16 s2, [r0] -; CHECK-NOFP-NEXT: vadd.f16 s0, s2, s0 -; CHECK-NOFP-NEXT: vstr.16 s0, [r0] -; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 1 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI3_0: -; CHECK-NOFP-NEXT: .short 0x0000 @ half 0 +; CHECK-LABEL: fadd_v2f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovx.f16 s4, s0 +; CHECK-NEXT: vadd.f16 s0, s0, s4 +; CHECK-NEXT: vldr.16 s2, [r0] +; CHECK-NEXT: vadd.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %y = load half, half* %yy %z = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v2f16(half %y, <2 x half> %x) @@ -134,20 +95,11 @@ define arm_aapcs_vfpcc void @fadd_v4f16(<4 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: vadd.f16 s4, s0, s4 ; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 ; CHECK-NOFP-NEXT: vadd.f16 s4, s4, s1 -; CHECK-NOFP-NEXT: vldr.16 s2, .LCPI4_0 -; CHECK-NOFP-NEXT: vadd.f16 s0, s4, s0 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 ; CHECK-NOFP-NEXT: vldr.16 s2, [r0] +; CHECK-NOFP-NEXT: vadd.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: vadd.f16 s0, s2, s0 ; CHECK-NOFP-NEXT: vstr.16 s0, [r0] ; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 1 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI4_0: -; CHECK-NOFP-NEXT: .short 0x0000 @ half 0 entry: %y = load half, half* %yy %z = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half %y, <4 x half> %x) diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll index 887dc470b3bc8b..f78d56ca0b9620 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll @@ -1023,6 +1023,54 @@ while.end: ; preds = %while.body, %while. ret void } +; When the function return type is non-void and 'end' instructions are at the +; very end of a function, CFGStackify's fixEndsAtEndOfFunction function fixes +; the corresponding block/loop/try's type to match the function's return type. +; But when a `try`'s type is fixed, we should also check `end` instructions +; before its corresponding `catch`, because both `try` and `catch` body should +; satisfy the return type requirements. + +; NOSORT-LABEL: test19 +; NOSORT: try i32 +; NOSORT: loop i32 +; NOSORT: end_loop +; NOSORT: catch +; NOSORT: end_try +; NOSORT-NEXT: end_function +define i32 @test19(i32 %n) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +entry: + %t = alloca %class.Object, align 1 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %n + br label %for.body + +for.body: ; preds = %for.cond + %div = sdiv i32 %n, 2 + %cmp1 = icmp eq i32 %i.0, %div + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %call = invoke i32 @baz() + to label %invoke.cont unwind label %ehcleanup + +invoke.cont: ; preds = %if.then + %call2 = call %class.Object* @_ZN6ObjectD2Ev(%class.Object* %t) #4 + ret i32 %call + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +ehcleanup: ; preds = %if.then + %0 = cleanuppad within none [] + %call3 = call %class.Object* @_ZN6ObjectD2Ev(%class.Object* %t) #4 [ "funclet"(token %0) ] + cleanupret from %0 unwind to caller +} + + ; Check if the unwind destination mismatch stats are correct ; NOSORT-STAT: 17 wasm-cfg-stackify - Number of EH pad unwind mismatches found diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll index 345830676abaaa..8e20b001cc3e87 100644 --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -132,46 +132,43 @@ define i64 @test_i64(i64 %a) nounwind { define i128 @test_i128(i128 %a) nounwind { ; X64-LABEL: test_i128: ; X64: # %bb.0: -; X64-NEXT: xorl %edx, %edx +; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: negq %rax -; X64-NEXT: sbbq %rsi, %rdx -; X64-NEXT: testq %rsi, %rsi -; X64-NEXT: cmovnsq %rdi, %rax -; X64-NEXT: cmovnsq %rsi, %rdx +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: sarq $63, %rcx +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: xorq %rcx, %rax +; X64-NEXT: xorq %rcx, %rdx ; X64-NEXT: retq ; ; X86-LABEL: test_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: addl %edx, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: negl %edi -; X86-NEXT: movl $0, %ebx -; X86-NEXT: sbbl %edx, %ebx -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %eax, %esi -; X86-NEXT: testl %eax, %eax -; X86-NEXT: cmovnsl %eax, %esi -; X86-NEXT: cmovnsl %ecx, %ebp -; X86-NEXT: cmovnsl %edx, %ebx -; X86-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, (%eax) -; X86-NEXT: movl %ebx, 4(%eax) -; X86-NEXT: movl %ebp, 8(%eax) -; X86-NEXT: movl %esi, 12(%eax) +; X86-NEXT: adcl %edx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: adcl %edx, %ebx +; X86-NEXT: adcl %edx, %ecx +; X86-NEXT: xorl %edx, %ecx +; X86-NEXT: xorl %edx, %ebx +; X86-NEXT: xorl %edx, %edi +; X86-NEXT: xorl %edx, %esi +; X86-NEXT: movl %esi, (%eax) +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %ebx, 8(%eax) +; X86-NEXT: movl %ecx, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 %r = call i128 @llvm.abs.i128(i128 %a, i1 false) ret i128 %r diff --git a/llvm/test/CodeGen/X86/fp-undef.ll b/llvm/test/CodeGen/X86/fp-undef.ll index d46bea703fdf0b..95049d16a7bf44 100644 --- a/llvm/test/CodeGen/X86/fp-undef.ll +++ b/llvm/test/CodeGen/X86/fp-undef.ll @@ -100,7 +100,6 @@ define float @frem_undef_op1(float %x) { define float @fadd_undef_op0_nnan(float %x) { ; ANY-LABEL: fadd_undef_op0_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fadd nnan float undef, %x ret float %r @@ -109,7 +108,6 @@ define float @fadd_undef_op0_nnan(float %x) { define float @fadd_undef_op1_fast(float %x) { ; ANY-LABEL: fadd_undef_op1_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fadd fast float %x, undef ret float %r @@ -118,7 +116,6 @@ define float @fadd_undef_op1_fast(float %x) { define float @fsub_undef_op0_fast(float %x) { ; ANY-LABEL: fsub_undef_op0_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fsub fast float undef, %x ret float %r @@ -127,7 +124,6 @@ define float @fsub_undef_op0_fast(float %x) { define float @fsub_undef_op1_nnan(float %x) { ; ANY-LABEL: fsub_undef_op1_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fsub nnan float %x, undef ret float %r @@ -136,7 +132,6 @@ define float @fsub_undef_op1_nnan(float %x) { define float @fmul_undef_op0_nnan(float %x) { ; ANY-LABEL: fmul_undef_op0_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fmul nnan float undef, %x ret float %r @@ -145,7 +140,6 @@ define float @fmul_undef_op0_nnan(float %x) { define float @fmul_undef_op1_fast(float %x) { ; ANY-LABEL: fmul_undef_op1_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fmul fast float %x, undef ret float %r @@ -154,7 +148,6 @@ define float @fmul_undef_op1_fast(float %x) { define float @fdiv_undef_op0_fast(float %x) { ; ANY-LABEL: fdiv_undef_op0_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fdiv fast float undef, %x ret float %r @@ -163,7 +156,6 @@ define float @fdiv_undef_op0_fast(float %x) { define float @fdiv_undef_op1_nnan(float %x) { ; ANY-LABEL: fdiv_undef_op1_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fdiv nnan float %x, undef ret float %r @@ -172,7 +164,6 @@ define float @fdiv_undef_op1_nnan(float %x) { define float @frem_undef_op0_nnan(float %x) { ; ANY-LABEL: frem_undef_op0_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = frem nnan float undef, %x ret float %r @@ -181,7 +172,6 @@ define float @frem_undef_op0_nnan(float %x) { define float @frem_undef_op1_fast(float %x) { ; ANY-LABEL: frem_undef_op1_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = frem fast float %x, undef ret float %r @@ -234,7 +224,6 @@ define double @frem_undef_undef(double %x) { define float @fadd_undef_op0_nnan_constant(float %x) { ; ANY-LABEL: fadd_undef_op0_nnan_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fadd nnan float undef, 1.0 ret float %r @@ -252,7 +241,6 @@ define float @fadd_undef_op1_constant(float %x) { define float @fsub_undef_op0_fast_constant(float %x) { ; ANY-LABEL: fsub_undef_op0_fast_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fsub fast float undef, 3.0 ret float %r @@ -270,7 +258,6 @@ define float @fsub_undef_op1_constant(float %x) { define float @fmul_undef_op0_nnan_constant(float %x) { ; ANY-LABEL: fmul_undef_op0_nnan_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fmul nnan float undef, 5.0 ret float %r @@ -288,7 +275,6 @@ define float @fmul_undef_op1_constant(float %x) { define float @fdiv_undef_op0_fast_constant(float %x) { ; ANY-LABEL: fdiv_undef_op0_fast_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fdiv fast float undef, 7.0 ret float %r @@ -306,7 +292,6 @@ define float @fdiv_undef_op1_constant(float %x) { define float @frem_undef_op0_nnan_constant(float %x) { ; ANY-LABEL: frem_undef_op0_nnan_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = frem nnan float undef, 9.0 ret float %r @@ -335,7 +320,6 @@ define double @fadd_undef_op0_constant_nan(double %x) { define double @fadd_undef_op1_fast_constant_nan(double %x) { ; ANY-LABEL: fadd_undef_op1_fast_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fadd fast double 0xFFF0000000000001, undef ret double %r @@ -353,7 +337,6 @@ define double @fsub_undef_op0_constant_nan(double %x) { define double @fsub_undef_op1_nnan_constant_nan(double %x) { ; ANY-LABEL: fsub_undef_op1_nnan_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fsub nnan double 0x7FF0000000000011, undef ret double %r @@ -371,7 +354,6 @@ define double @fmul_undef_op0_constant_nan(double %x) { define double @fmul_undef_op1_fast_constant_nan(double %x) { ; ANY-LABEL: fmul_undef_op1_fast_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fmul fast double 0xFFF0000000000101, undef ret double %r @@ -389,7 +371,6 @@ define double @fdiv_undef_op0_constant_nan(double %x) { define double @fdiv_undef_op1_nnan_constant_nan(double %x) { ; ANY-LABEL: fdiv_undef_op1_nnan_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fdiv nnan double 0x7FF0000000000111, undef ret double %r @@ -407,7 +388,6 @@ define double @frem_undef_op0_constant_nan(double %x) { define double @frem_undef_op1_fast_constant_nan(double %x) { ; ANY-LABEL: frem_undef_op1_fast_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = frem fast double 0xFFF0000000001001, undef ret double %r @@ -427,7 +407,6 @@ define double @fadd_undef_op0_constant_inf(double %x) { define double @fadd_undef_op1_fast_constant_inf(double %x) { ; ANY-LABEL: fadd_undef_op1_fast_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fadd fast double 0xFFF0000000000000, undef ret double %r @@ -445,7 +424,6 @@ define double @fsub_undef_op0_constant_inf(double %x) { define double @fsub_undef_op1_ninf_constant_inf(double %x) { ; ANY-LABEL: fsub_undef_op1_ninf_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fsub ninf double 0x7FF0000000000000, undef ret double %r @@ -463,7 +441,6 @@ define double @fmul_undef_op0_constant_inf(double %x) { define double @fmul_undef_op1_fast_constant_inf(double %x) { ; ANY-LABEL: fmul_undef_op1_fast_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fmul fast double 0xFFF0000000000000, undef ret double %r @@ -481,7 +458,6 @@ define double @fdiv_undef_op0_constant_inf(double %x) { define double @fdiv_undef_op1_ninf_constant_inf(double %x) { ; ANY-LABEL: fdiv_undef_op1_ninf_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fdiv ninf double 0x7FF0000000000000, undef ret double %r @@ -499,7 +475,6 @@ define double @frem_undef_op0_constant_inf(double %x) { define double @frem_undef_op1_fast_constant_inf(double %x) { ; ANY-LABEL: frem_undef_op1_fast_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = frem fast double 0xFFF0000000000000, undef ret double %r diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll index 338e66622dcd95..319eb6f5edc328 100644 --- a/llvm/test/CodeGen/X86/iabs.ll +++ b/llvm/test/CodeGen/X86/iabs.ll @@ -120,3 +120,50 @@ define i64 @test_i64(i64 %a) nounwind { ret i64 %abs } +define i128 @test_i128(i128 %a) nounwind { +; X86-LABEL: test_i128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: addl %edx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: adcl %edx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: adcl %edx, %ebx +; X86-NEXT: adcl %edx, %ecx +; X86-NEXT: xorl %edx, %ecx +; X86-NEXT: xorl %edx, %ebx +; X86-NEXT: xorl %edx, %edi +; X86-NEXT: xorl %edx, %esi +; X86-NEXT: movl %esi, (%eax) +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %ebx, 8(%eax) +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl $4 +; +; X64-LABEL: test_i128: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rdx +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: sarq $63, %rcx +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: xorq %rcx, %rax +; X64-NEXT: xorq %rcx, %rdx +; X64-NEXT: retq + %tmp1neg = sub i128 0, %a + %b = icmp sgt i128 %a, -1 + %abs = select i1 %b, i128 %a, i128 %tmp1neg + ret i128 %abs +} + diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.ll b/llvm/test/CodeGen/X86/statepoint-vreg.ll index b613a949c273d9..66b984b905364a 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg.ll @@ -47,6 +47,7 @@ entry: call void @consume(i32 addrspace(1)* %rel1) ret i1 %res1 } + ; test pointer variables intermixed with pointer constants define void @test_mixed(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) gc "statepoint-example" { ; CHECK-LABEL: test_mixed: @@ -567,6 +568,28 @@ exceptional_return.right: ret i64 addrspace(1)* %val.relocated3 } +; test ISEL for constant base pointer - must properly tie operands +define void @test_const_base(i32 addrspace(1)* %a) gc "statepoint-example" { +; CHECK-LABEL: test_const_base: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: callq func +; CHECK-NEXT: .Ltmp24: +; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: callq consume +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %token1 = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["deopt" (i32 0, i32 1, i32 7, i32 addrspace(1)* null, i32 9), "gc-live" (i32 addrspace(1)* null, i32 addrspace(1)* %a)] + %rel = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token1, i32 0, i32 1) + call void @consume(i32 addrspace(1)* %rel) + ret void +} + declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll index 6d6796d1c902dc..14dd43ed71a463 100644 --- a/llvm/test/CodeGen/X86/stores-merging.ll +++ b/llvm/test/CodeGen/X86/stores-merging.ll @@ -482,9 +482,8 @@ define void @trunc_i32_to_i16(i32 %x, i16* %p) { define void @be_i32_to_i16(i32 %x, i16* %p0) { ; CHECK-LABEL: be_i32_to_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: movw %di, 2(%rsi) -; CHECK-NEXT: shrl $16, %edi -; CHECK-NEXT: movw %di, (%rsi) +; CHECK-NEXT: rorl $16, %edi +; CHECK-NEXT: movl %edi, (%rsi) ; CHECK-NEXT: retq %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 @@ -498,10 +497,8 @@ define void @be_i32_to_i16(i32 %x, i16* %p0) { define void @be_i32_to_i16_order(i32 %x, i16* %p0) { ; CHECK-LABEL: be_i32_to_i16_order: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shrl $16, %eax -; CHECK-NEXT: movw %ax, (%rsi) -; CHECK-NEXT: movw %di, 2(%rsi) +; CHECK-NEXT: rorl $16, %edi +; CHECK-NEXT: movl %edi, (%rsi) ; CHECK-NEXT: retq %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 @@ -589,9 +586,8 @@ define void @trunc_i64_to_i32(i64 %x, i32* %p) { define void @be_i64_to_i32(i64 %x, i32* %p0) { ; CHECK-LABEL: be_i64_to_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, 4(%rsi) -; CHECK-NEXT: shrq $32, %rdi -; CHECK-NEXT: movl %edi, (%rsi) +; CHECK-NEXT: rorq $32, %rdi +; CHECK-NEXT: movq %rdi, (%rsi) ; CHECK-NEXT: retq %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 @@ -605,10 +601,8 @@ define void @be_i64_to_i32(i64 %x, i32* %p0) { define void @be_i64_to_i32_order(i64 %x, i32* %p0) { ; CHECK-LABEL: be_i64_to_i32_order: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shrq $32, %rax -; CHECK-NEXT: movl %eax, (%rsi) -; CHECK-NEXT: movl %edi, 4(%rsi) +; CHECK-NEXT: rorq $32, %rdi +; CHECK-NEXT: movq %rdi, (%rsi) ; CHECK-NEXT: retq %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 diff --git a/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml b/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml index 8948bf92b7d76a..5aea820145cf73 100644 --- a/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml +++ b/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml @@ -239,3 +239,72 @@ DWARF: - AbbrCode: 0x00000000 Values: [] ... + +## Test generating and dumping an empty __debug_ranges section. + +# RUN: yaml2obj --docnum=2 %s | obj2yaml | FileCheck %s --check-prefix=EMPTY + +# EMPTY: DWARF: +# EMPTY-NEXT: debug_ranges: [] +# EMPTY-NEXT: ... + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x0000000A + ncmds: 1 + sizeofcmds: 232 + flags: 0x00000000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DWARF + vmaddr: 0x00 + vmsize: 0x00 + fileoff: 0x00 + filesize: 0x00 + maxprot: 0 + initprot: 0 + nsects: 1 + flags: 0 + Sections: + - sectname: __debug_ranges + segname: __DWARF + addr: 0x00 + size: [[SIZE=0]] + offset: 0x210 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: [[CONTENT=]] + +## Test generating and dumping a __debug_ranges section whose size isn't a +## multiple of the address size. This test case is to ensure that when the +## parser fails, the content of the __debug_ranges section will be dumped into +## the 'content' entry and the 'debug_ranges' entry will not exist. + +# RUN: yaml2obj --docnum=2 -DSIZE=3 -DCONTENT='010203' %s | obj2yaml | FileCheck %s --check-prefix=FAILS + +# FAILS-NOT: DWARF: +# FAILS: Sections: +# FAILS-NEXT: - sectname: __debug_ranges +# FAILS-NEXT: segname: __DWARF +# FAILS-NEXT: addr: 0x0000000000000000 +# FAILS-NEXT: size: 3 +# FAILS-NEXT: offset: 0x00000210 +# FAILS-NEXT: align: 0 +# FAILS-NEXT: reloff: 0x00000000 +# FAILS-NEXT: nreloc: 0 +# FAILS-NEXT: flags: 0x00000000 +# FAILS-NEXT: reserved1: 0x00000000 +# FAILS-NEXT: reserved2: 0x00000000 +# FAILS-NEXT: reserved3: 0x00000000 +# FAILS-NEXT: content: '010203' +# FAILS-NEXT: ... diff --git a/llvm/test/ObjectYAML/MachO/DWARF-debug_str.yaml b/llvm/test/ObjectYAML/MachO/DWARF-debug_str.yaml index 29247b334a1a91..9bb55ea3509118 100644 --- a/llvm/test/ObjectYAML/MachO/DWARF-debug_str.yaml +++ b/llvm/test/ObjectYAML/MachO/DWARF-debug_str.yaml @@ -321,3 +321,61 @@ DWARF: # EMPTY-STRING-NEXT: debug_str: # EMPTY-STRING-NEXT: - '' # EMPTY-STRING-NEXT: ... + +## d) Test generating and dumping a __debug_str section which contains a string without a null terminator. + +# RUN: yaml2obj --docnum=3 %s | obj2yaml | FileCheck %s --check-prefix=NO-TERMINATOR + +# NO-TERMINATOR-NOT: DWARF: +# NO-TERMINATOR: Sections: +# NO-TERMINATOR-NEXT: - sectname: __debug_str +# NO-TERMINATOR-NEXT: segname: __DWARF +# NO-TERMINATOR-NEXT: addr: 0x0000000000000000 +# NO-TERMINATOR-NEXT: size: 7 +# NO-TERMINATOR-NEXT: offset: 0x00000210 +# NO-TERMINATOR-NEXT: align: 0 +# NO-TERMINATOR-NEXT: reloff: 0x00000000 +# NO-TERMINATOR-NEXT: nreloc: 0 +# NO-TERMINATOR-NEXT: flags: 0x00000000 +# NO-TERMINATOR-NEXT: reserved1: 0x00000000 +# NO-TERMINATOR-NEXT: reserved2: 0x00000000 +# NO-TERMINATOR-NEXT: reserved3: 0x00000000 +# NO-TERMINATOR-NEXT: content: '61626300616263' +# NO-TERMINATOR-NEXT: ... + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x0000000A + ncmds: 1 + sizeofcmds: 232 + flags: 0x00000000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DWARF + vmaddr: 0x00 + vmsize: 0x00 + fileoff: 0x00 + filesize: 0x00 + maxprot: 0 + initprot: 0 + nsects: 1 + flags: 0 + Sections: + - sectname: __debug_str + segname: __DWARF + addr: 0x00 + size: 7 + offset: 0x210 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '61626300616263' ## "abc\0abc" diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll index 421ddc2bdd3967..a50017ac733152 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll index 50d318198e1493..310abfba58d55c 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -44,7 +44,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11:#.*]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11:#.*]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12:#.*]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -57,7 +57,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11:#.*]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11:#.*]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12:#.*]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -138,7 +138,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -151,7 +151,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -232,7 +232,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -245,7 +245,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -326,7 +326,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -339,7 +339,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -418,7 +418,7 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -431,7 +431,7 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -508,7 +508,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -521,7 +521,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -600,7 +600,7 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -613,7 +613,7 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -694,7 +694,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -707,7 +707,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll index 25729fb8933353..29f6a1bf6d3f5f 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll @@ -4,8 +4,8 @@ ; we don't do that anymore. It also verifies that the combination of ; globalopt and argpromotion is able to optimize the call safely. ; -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll index 5e40294cdb27bc..64d5adaa75020c 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll index 3584172b242daa..932f9197e9ce16 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll index ee411ec0c857ec..91bf46ca2148fc 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll index 4d8b20cb1cf3f4..5afeb2071d192a 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll index 03ca89fd1b08ae..26e4ce2679cccd 100644 --- a/llvm/test/Transforms/Attributor/callbacks.ll +++ b/llvm/test/Transforms/Attributor/callbacks.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -115,6 +115,7 @@ declare !callback !0 void @t0_callback_broker(i32*, i32*, void (i32*, i32*, ...) ; we deduce and propagate noalias and others properly. define void @t1_caller(i32* noalias %a) { +; ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@t1_caller ; IS__TUNIT_OPM-SAME: (i32* noalias nocapture align 256 [[A:%.*]]) { ; IS__TUNIT_OPM-NEXT: entry: @@ -136,7 +137,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t1_caller @@ -160,7 +161,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -190,7 +191,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; ; IS________NPM: Function Attrs: nosync ; IS________NPM-LABEL: define {{[^@]+}}@t1_callback_callee -; IS________NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* noalias nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) [[ATTR0:#.*]] { +; IS________NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) [[ATTR0:#.*]] { ; IS________NPM-NEXT: entry: ; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -236,7 +237,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t2_caller @@ -260,7 +261,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -337,8 +338,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t3_caller @@ -363,8 +364,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/Attributor/chain.ll b/llvm/test/Transforms/Attributor/chain.ll new file mode 100644 index 00000000000000..0306fe22c0b3c4 --- /dev/null +++ b/llvm/test/Transforms/Attributor/chain.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-max-initialization-chain-length=1 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_1 +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-max-initialization-chain-length=1 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_1 +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-max-initialization-chain-length=1024 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_5 +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-max-initialization-chain-length=1024 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_5 + +declare void @foo(i8* dereferenceable(8) %arg) + +define dso_local i32 @bar(i32* %arg) { +; CHECK_1-LABEL: define {{[^@]+}}@bar +; CHECK_1-SAME: (i32* dereferenceable_or_null(8) [[ARG:%.*]]) { +; CHECK_1-NEXT: entry: +; CHECK_1-NEXT: [[BC1:%.*]] = bitcast i32* [[ARG]] to i8* +; CHECK_1-NEXT: call void @foo(i8* dereferenceable_or_null(8) [[BC1]]) +; CHECK_1-NEXT: [[LD:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK_1-NEXT: ret i32 [[LD]] +; +; CHECK_5-LABEL: define {{[^@]+}}@bar +; CHECK_5-SAME: (i32* nonnull dereferenceable(8) [[ARG:%.*]]) { +; CHECK_5-NEXT: entry: +; CHECK_5-NEXT: [[BC1:%.*]] = bitcast i32* [[ARG]] to i8* +; CHECK_5-NEXT: call void @foo(i8* nonnull dereferenceable(8) [[BC1]]) +; CHECK_5-NEXT: [[LD:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK_5-NEXT: ret i32 [[LD]] +; +entry: + %bc1 = bitcast i32* %arg to i8* + call void @foo(i8* %bc1) + %ld = load i32, i32* %arg + ret i32 %ld +} diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll index 791af581b22a0d..d7dc9d42f49b2b 100644 --- a/llvm/test/Transforms/Attributor/depgraph.ll +++ b/llvm/test/Transforms/Attributor/depgraph.ll @@ -51,88 +51,214 @@ define i32* @checkAndAdvance(i32* align 16 %0) { ; Check for graph ; -; GRAPH: [AANoUnwind] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nounwind -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind -; GRAPH: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind -; GRAPH: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: [AANoSync] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nosync -; GRAPH: updates [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync -; GRAPH: updates [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync -; GRAPH: [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree -; GRAPH: [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state readonly -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument -; GRAPH: [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> -; GRAPH: updates [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> -; GRAPH: updates [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> -; GRAPH: [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull -; GRAPH: [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly -; GRAPH: [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nofree -; GRAPH: [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind -; GRAPH: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live -; GRAPH: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live -; GRAPH: updates [AANoUnwind] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nounwind -; GRAPH: [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state readonly -; GRAPH: [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: [AANonNull] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull -; GRAPH: [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync -; GRAPH: updates [AANoSync] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nosync -; GRAPH: [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nofree -; GRAPH: [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> -; GRAPH: updates [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> -; GRAPH: [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH: [AAIsDead] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state Live[#BB 4/4][#TBEP 0][#KDE 1] +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAWillReturn] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state may-noreturn +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAUndefinedBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state undefined-behavior +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUndef] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state may-undef-or-poison +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAReturnedValues] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state returns(#3)[#UC: 1] +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUnwind] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nounwind +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind +; GRAPH-NEXT: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind +; GRAPH-NEXT: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoSync] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nosync +; GRAPH-NEXT: updates [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync +; GRAPH-NEXT: updates [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueConstantRange] for CtxI ' %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state range(32) +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAPotentialValues] for CtxI ' %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state set-state(< {full-set} >) +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' br i1 %3, label %4, label %7' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoReturn] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state may-return +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoRecurse] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state may-recurse +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state readonly +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAHeapToStack] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state [H2S] Mallocs: 0 +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> +; GRAPH-NEXT: updates [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> +; GRAPH-NEXT: updates [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoAlias] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state may-alias +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state unknown-dereferenceable +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUndef] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state may-undef-or-poison +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoAlias] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state may-alias +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state dereferenceable<4-4> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state unknown-dereferenceable +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state align<16-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state align<16-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nofree +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAPrivatizablePtr] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state [no-priv] +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind +; GRAPH-NEXT: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live +; GRAPH-NEXT: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live +; GRAPH-NEXT: updates [AANoUnwind] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nounwind +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state readonly +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUndef] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state may-undef-or-poison +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoAlias] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state may-alias +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nofree +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueConstantRange] for CtxI ' %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state range(1) +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueConstantRange] for CtxI <> at position {flt: [@-1]} with state range(32)<[0,1) / [0,1)> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAPotentialValues] for CtxI ' %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state set-state(< {full-set} >) +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoReturn] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state may-return +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoAlias] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state may-alias +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUndef] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state may-undef-or-poison +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state align<16-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' ret i32* %.0' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' br label %8' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAWillReturn] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state may-noreturn +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoRecurse] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state may-recurse +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync +; GRAPH-NEXT: updates [AANoSync] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nosync +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nofree +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' br label %8' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> +; GRAPH-NEXT: updates [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state unknown-dereferenceable +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state unknown-dereferenceable + ; GRAPH-NOT: update ; diff --git a/llvm/test/Transforms/Attributor/dereferenceable-2.ll b/llvm/test/Transforms/Attributor/dereferenceable-2.ll index aa3130e4a3190a..816e5c47ef35ba 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-2.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-2.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll index 3c34419a960d43..27774c525c4e03 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -428,9 +428,8 @@ define void @test11() { ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test11() { -; IS________NPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 ; IS________NPM-NEXT: tail call void @sync_will_return(i8* [[TMP1]]) [[ATTR6]] -; IS________NPM-NEXT: tail call void @free(i8* nocapture [[TMP1]]) ; IS________NPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) @@ -739,10 +738,9 @@ define void @test16c(i8 %v, i8** %P) { ; ; IS________NPM-LABEL: define {{[^@]+}}@test16c ; IS________NPM-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) { -; IS________NPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 ; IS________NPM-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 ; IS________NPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) [[ATTR6]] -; IS________NPM-NEXT: tail call void @free(i8* nocapture [[TMP1]]) ; IS________NPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) diff --git a/llvm/test/Transforms/Attributor/internalize.ll b/llvm/test/Transforms/Attributor/internalize.ll index 8a244b5c998c39..3e485382e9be0f 100644 --- a/llvm/test/Transforms/Attributor/internalize.ll +++ b/llvm/test/Transforms/Attributor/internalize.ll @@ -12,16 +12,14 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,IS__TUNIT_____ENABLED,IS________NPM_ENABLED,IS__TUNIT_NPM_ENABLED ; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,IS__CGSCC_____ENABLED,IS________OPM_ENABLED,IS__CGSCC_OPM_ENABLED ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_OPM_ENABLED,IS__CGSCC_____ENABLED,IS________NPM_ENABLED,IS__CGSCC_NPM_ENABLED -; RUN: opt -attributor -attributor-cgscc -disable-inlining -attributor-allow-deep-wrappers -S < %s | FileCheck %s --check-prefix=DWRAPPER ; TEST 1: This function is of linkage `linkonce`, we cannot internalize this ; function and use information derived from it ; -; DWRAPPER-NOT: Function Attrs -; DWRAPPER-NOT: inner1.internalized +; CHECK-NOT: inner1.internalized define linkonce i32 @inner1(i32 %a, i32 %b) { ; CHECK-LABEL: define {{[^@]+}}@inner1 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; CHECK-NEXT: ret i32 [[C]] @@ -34,11 +32,10 @@ entry: ; TEST 2: This function is of linkage `weak`, we cannot internalize this function and ; use information derived from it ; -; DWRAPPER-NOT: Function Attrs -; DWRAPPER-NOT: inner2.internalized +; CHECK-NOT: inner2.internalized define weak i32 @inner2(i32 %a, i32 %b) { ; CHECK-LABEL: define {{[^@]+}}@inner2 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; CHECK-NEXT: ret i32 [[C]] @@ -51,17 +48,12 @@ entry: ; TEST 3: This function is of linkage `linkonce_odr`, which can be internalized using the ; deep wrapper, and the IP information derived from this function can be used ; -; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; DWRAPPER: define private i32 @inner3.internalized(i32 %a, i32 %b) -; DWRAPPER-NEXT: entry: -; DWRAPPER-NEXT: %c = add i32 %a, %b -; DWRAPPER-NEXT: ret i32 %c define linkonce_odr i32 @inner3(i32 %a, i32 %b) { -; CHECK-LABEL: define {{[^@]+}}@inner3 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] -; CHECK-NEXT: ret i32 [[C]] +; CHECK_DISABLED-LABEL: define {{[^@]+}}@inner3 +; CHECK_DISABLED-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK_DISABLED-NEXT: entry: +; CHECK_DISABLED-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK_DISABLED-NEXT: ret i32 [[C]] ; entry: %c = add i32 %a, %b @@ -71,17 +63,12 @@ entry: ; TEST 4: This function is of linkage `weak_odr`, which can be internalized using the deep ; wrapper ; -; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; DWRAPPER: define private i32 @inner4.internalized(i32 %a, i32 %b) -; DWRAPPER-NEXT: entry: -; DWRAPPER-NEXT: %c = add i32 %a, %b -; DWRAPPER-NEXT: ret i32 %c define weak_odr i32 @inner4(i32 %a, i32 %b) { -; CHECK-LABEL: define {{[^@]+}}@inner4 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] -; CHECK-NEXT: ret i32 [[C]] +; CHECK_DISABLED-LABEL: define {{[^@]+}}@inner4 +; CHECK_DISABLED-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK_DISABLED-NEXT: entry: +; CHECK_DISABLED-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK_DISABLED-NEXT: ret i32 [[C]] ; entry: %c = add i32 %a, %b @@ -91,10 +78,10 @@ entry: ; TEST 5: This function has linkage `linkonce_odr` but is never called (num of use = 0), so there ; is no need to internalize this ; -; DWRAPPER-NOT: inner5.internalized +; CHECK-NOT: inner5.internalized define linkonce_odr i32 @inner5(i32 %a, i32 %b) { ; CHECK-LABEL: define {{[^@]+}}@inner5 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; CHECK-NEXT: ret i32 [[C]] @@ -109,16 +96,8 @@ entry: ; Since the inner3 is internalized, the use of the original function should be replaced by the ; copied one ; -; DWRAPPER-NOT: call i32 @inner1.internalized -; DWRAPPER: call i32 @inner1 -; DWRAPPER-NOT: call i32 @inner2.internalized -; DWRAPPER: call i32 @inner2 -; DWRAPPER-NOT: call i32 @inner3 -; DWRAPPER: call i32 @inner3.internalized -; DWRAPPER-NOT: call i32 @inner4 -; DWRAPPER: call i32 @inner4.internalized define i32 @outer1() { -; CHECK_DISABLED-LABEL: define {{[^@]+}}@outer1() +; CHECK_DISABLED-LABEL: define {{[^@]+}}@outer1() { ; CHECK_DISABLED-NEXT: entry: ; CHECK_DISABLED-NEXT: [[RET1:%.*]] = call i32 @inner1(i32 noundef 1, i32 noundef 2) ; CHECK_DISABLED-NEXT: [[RET2:%.*]] = call i32 @inner2(i32 noundef 1, i32 noundef 2) @@ -126,7 +105,7 @@ define i32 @outer1() { ; CHECK_DISABLED-NEXT: [[RET4:%.*]] = call i32 @inner4(i32 [[RET3]], i32 [[RET3]]) ; CHECK_DISABLED-NEXT: ret i32 [[RET4]] ; -; CHECK_ENABLED-LABEL: define {{[^@]+}}@outer1() +; CHECK_ENABLED-LABEL: define {{[^@]+}}@outer1() { ; CHECK_ENABLED-NEXT: entry: ; CHECK_ENABLED-NEXT: [[RET1:%.*]] = call i32 @inner1(i32 noundef 1, i32 noundef 2) ; CHECK_ENABLED-NEXT: [[RET2:%.*]] = call i32 @inner2(i32 noundef 1, i32 noundef 2) @@ -145,29 +124,38 @@ entry: define linkonce_odr void @unused_arg(i8) { ; CHECK_DISABLED-LABEL: define {{[^@]+}}@unused_arg -; CHECK_DISABLED-SAME: (i8 [[TMP0:%.*]]) +; CHECK_DISABLED-SAME: (i8 [[TMP0:%.*]]) { ; CHECK_DISABLED-NEXT: unreachable ; unreachable } define void @unused_arg_caller() { -; CHECK_DISABLED-LABEL: define {{[^@]+}}@unused_arg_caller() +; CHECK_DISABLED-LABEL: define {{[^@]+}}@unused_arg_caller() { ; CHECK_DISABLED-NEXT: call void @unused_arg(i8 noundef 0) ; CHECK_DISABLED-NEXT: ret void ; ; IS__TUNIT_____ENABLED: Function Attrs: nofree noreturn nosync nounwind readnone willreturn -; IS__TUNIT_____ENABLED-LABEL: define {{[^@]+}}@unused_arg_caller() +; IS__TUNIT_____ENABLED-LABEL: define {{[^@]+}}@unused_arg_caller +; IS__TUNIT_____ENABLED-SAME: () [[ATTR1:#.*]] { ; IS__TUNIT_____ENABLED-NEXT: unreachable ; ; IS__CGSCC_____ENABLED: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn -; IS__CGSCC_____ENABLED-LABEL: define {{[^@]+}}@unused_arg_caller() +; IS__CGSCC_____ENABLED-LABEL: define {{[^@]+}}@unused_arg_caller +; IS__CGSCC_____ENABLED-SAME: () [[ATTR2:#.*]] { ; IS__CGSCC_____ENABLED-NEXT: unreachable -; -; DWRAPPER: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn -; DWRAPPER-LABEL: define {{[^@]+}}@unused_arg_caller() -; DWRAPPER-NEXT: unreachable ; call void @unused_arg(i8 0) ret void } + +; Don't crash on linkonce_odr hidden functions +define linkonce_odr hidden void @__clang_call_terminate() { +; CHECK_DISABLED-LABEL: define {{[^@]+}}@__clang_call_terminate() { +; CHECK_DISABLED-NEXT: call void @__clang_call_terminate() +; CHECK_DISABLED-NEXT: unreachable +; + call void @__clang_call_terminate() + unreachable +} + diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll index ea36bb5f66e8ca..8919cf66cbb9b1 100644 --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -854,22 +854,22 @@ define internal void @middle() { ; NOT_CGSCC_NPM-NEXT: call void @non_dead_b3() [[ATTR11]] ; NOT_CGSCC_NPM-NEXT: br label [[BB1:%.*]] ; NOT_CGSCC_NPM: bb1: -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b4() [[ATTR2:#.*]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b5() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b6() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b7() [[ATTR2]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b4() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b5() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b6() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b7() [[ATTR11]] ; NOT_CGSCC_NPM-NEXT: br label [[BB2:%.*]] ; NOT_CGSCC_NPM: bb2: -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b8() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b9() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b10() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b11() [[ATTR2]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b8() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b9() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b10() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b11() [[ATTR11]] ; NOT_CGSCC_NPM-NEXT: br label [[BB3:%.*]] ; NOT_CGSCC_NPM: bb3: -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b12() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b13() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b14() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b15() [[ATTR2]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b12() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b13() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b14() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b15() [[ATTR11]] ; NOT_CGSCC_NPM-NEXT: br label [[BB4:%.*]] ; NOT_CGSCC_NPM: bb4: ; NOT_CGSCC_NPM-NEXT: call void @non_exact2() diff --git a/llvm/test/Transforms/Attributor/misc.ll b/llvm/test/Transforms/Attributor/misc.ll index 3fa65e07a5162e..a5c4556ac04173 100644 --- a/llvm/test/Transforms/Attributor/misc.ll +++ b/llvm/test/Transforms/Attributor/misc.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll index e7e47d42f45664..a4c05fb4ca29d5 100644 --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -1,8 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM -; TODO: The old pass manager cgscc run is disabled as it causes a crash on windows which is under investigation: http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/23151 -; opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; TEST 1 - negative. @@ -42,10 +41,10 @@ define i8* @return_noalias(){ } define void @nocapture(i8* %a){ -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@nocapture -; NOT_CGSCC_NPM-SAME: (i8* nocapture nofree readnone [[A:%.*]]) [[ATTR0:#.*]] { -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@nocapture +; IS__TUNIT____-SAME: (i8* nocapture nofree readnone [[A:%.*]]) [[ATTR0:#.*]] { +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@nocapture @@ -145,10 +144,10 @@ declare i8* @baz(...) nounwind uwtable ; Returning global pointer. Should not be noalias. define i8** @getter() { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@getter -; NOT_CGSCC_NPM-SAME: () [[ATTR0]] { -; NOT_CGSCC_NPM-NEXT: ret i8** @G +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@getter +; IS__TUNIT____-SAME: () [[ATTR0]] { +; IS__TUNIT____-NEXT: ret i8** @G ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@getter @@ -160,10 +159,10 @@ define i8** @getter() { ; Returning global pointer. Should not be noalias. define i8** @calle1(){ -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@calle1 -; NOT_CGSCC_NPM-SAME: () [[ATTR0]] { -; NOT_CGSCC_NPM-NEXT: ret i8** @G +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@calle1 +; IS__TUNIT____-SAME: () [[ATTR0]] { +; IS__TUNIT____-NEXT: ret i8** @G ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@calle1 @@ -410,6 +409,7 @@ define void @test12_3(){ } define void @test12_4(){ +; ; IS________OPM-LABEL: define {{[^@]+}}@test12_4() { ; IS________OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) ; IS________OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) @@ -422,17 +422,17 @@ define void @test12_4(){ ; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) ; IS________OPM-NEXT: ret void ; -; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test12_4() { -; NOT_TUNIT_OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) -; NOT_TUNIT_OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) -; NOT_TUNIT_OPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 -; NOT_TUNIT_OPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 -; NOT_TUNIT_OPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* noalias nocapture [[A]], i8* noalias nocapture [[B]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) -; NOT_TUNIT_OPM-NEXT: ret void +; IS________NPM-LABEL: define {{[^@]+}}@test12_4() { +; IS________NPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; IS________NPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; IS________NPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 +; IS________NPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 +; IS________NPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 +; IS________NPM-NEXT: tail call void @two_args(i8* noalias nocapture [[A]], i8* noalias nocapture [[B]]) +; IS________NPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) +; IS________NPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) +; IS________NPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) +; IS________NPM-NEXT: ret void ; %A = tail call noalias i8* @malloc(i64 4) %B = tail call noalias i8* @malloc(i64 4) @@ -470,12 +470,6 @@ define void @test13_use_noalias(){ ; CHECK-NEXT: call void @use_i8_internal(i8* noalias nocapture [[C2]]) ; CHECK-NEXT: ret void ; -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test13_use_noalias() -; IS__CGSCC_OPM-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS__CGSCC_OPM-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* -; IS__CGSCC_OPM-NEXT: [[C2:%.*]] = bitcast i16* [[C1]] to i8* -; IS__CGSCC_OPM-NEXT: call void @use_i8_internal(i8* noalias [[C2]]) -; IS__CGSCC_OPM-NEXT: ret void %m1 = tail call noalias i8* @malloc(i64 4) %c1 = bitcast i8* %m1 to i16* %c2 = bitcast i16* %c1 to i8* @@ -504,11 +498,11 @@ define void @test13_use_alias(){ ; TEST 14 i2p casts define internal i32 @p2i(i32* %arg) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@p2i -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree readnone [[ARG:%.*]]) [[ATTR0]] { -; NOT_CGSCC_NPM-NEXT: [[P2I:%.*]] = ptrtoint i32* [[ARG]] to i32 -; NOT_CGSCC_NPM-NEXT: ret i32 [[P2I]] +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@p2i +; IS__TUNIT____-SAME: (i32* noalias nofree readnone [[ARG:%.*]]) [[ATTR0]] { +; IS__TUNIT____-NEXT: [[P2I:%.*]] = ptrtoint i32* [[ARG]] to i32 +; IS__TUNIT____-NEXT: ret i32 [[P2I]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@p2i @@ -521,14 +515,14 @@ define internal i32 @p2i(i32* %arg) { } define i32 @i2p(i32* %arg) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readonly willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@i2p -; NOT_CGSCC_NPM-SAME: (i32* nofree readonly [[ARG:%.*]]) [[ATTR4:#.*]] { -; NOT_CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) [[ATTR0]] -; NOT_CGSCC_NPM-NEXT: [[I2P:%.*]] = inttoptr i32 [[C]] to i8* -; NOT_CGSCC_NPM-NEXT: [[BC:%.*]] = bitcast i8* [[I2P]] to i32* -; NOT_CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree readonly align 4 [[BC]]) [[ATTR4]] -; NOT_CGSCC_NPM-NEXT: ret i32 [[CALL]] +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readonly willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@i2p +; IS__TUNIT____-SAME: (i32* nofree readonly [[ARG:%.*]]) [[ATTR4:#.*]] { +; IS__TUNIT____-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) [[ATTR0]] +; IS__TUNIT____-NEXT: [[I2P:%.*]] = inttoptr i32 [[C]] to i8* +; IS__TUNIT____-NEXT: [[BC:%.*]] = bitcast i8* [[I2P]] to i32* +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree readonly align 4 [[BC]]) [[ATTR4]] +; IS__TUNIT____-NEXT: ret i32 [[CALL]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@i2p @@ -546,11 +540,11 @@ define i32 @i2p(i32* %arg) { ret i32 %call } define internal i32 @ret(i32* %arg) { -; NOT_CGSCC_NPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@ret -; NOT_CGSCC_NPM-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) [[ATTR5:#.*]] { -; NOT_CGSCC_NPM-NEXT: [[L:%.*]] = load i32, i32* [[ARG]], align 4 -; NOT_CGSCC_NPM-NEXT: ret i32 [[L]] +; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@ret +; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) [[ATTR5:#.*]] { +; IS__TUNIT____-NEXT: [[L:%.*]] = load i32, i32* [[ARG]], align 4 +; IS__TUNIT____-NEXT: ret i32 [[L]] ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@ret @@ -572,29 +566,17 @@ define internal i32 @ret(i32* %arg) { ; Function Attrs: nounwind optsize define internal fastcc double @strtox(i8* %s, i8** %p, i32 %prec) unnamed_addr { -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@strtox -; NOT_CGSCC_NPM-SAME: (i8* [[S:%.*]]) unnamed_addr { -; NOT_CGSCC_NPM-NEXT: entry: -; NOT_CGSCC_NPM-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 -; NOT_CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* -; NOT_CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) [[ATTR10:#.*]] -; NOT_CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) -; NOT_CGSCC_NPM-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) -; NOT_CGSCC_NPM-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) -; NOT_CGSCC_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) -; NOT_CGSCC_NPM-NEXT: ret double [[CALL1]] -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@strtox -; IS__CGSCC____-SAME: (i8* noalias [[S:%.*]]) unnamed_addr { -; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 -; IS__CGSCC____-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* -; IS__CGSCC____-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) [[ATTR10]] -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) -; IS__CGSCC____-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) -; IS__CGSCC____-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) -; IS__CGSCC____-NEXT: ret double [[CALL1]] +; CHECK-LABEL: define {{[^@]+}}@strtox +; CHECK-SAME: (i8* noalias [[S:%.*]]) unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) [[ATTR10:#.*]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) +; CHECK-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) +; CHECK-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) +; CHECK-NEXT: ret double [[CALL1]] ; entry: %f = alloca %struct._IO_FILE, align 8 @@ -642,11 +624,11 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) @alias_of_p = external global i32* define void @make_alias(i32* %p) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@make_alias -; NOT_CGSCC_NPM-SAME: (i32* nofree writeonly [[P:%.*]]) [[ATTR7:#.*]] { -; NOT_CGSCC_NPM-NEXT: store i32* [[P]], i32** @alias_of_p, align 8 -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@make_alias +; IS__TUNIT____-SAME: (i32* nofree writeonly [[P:%.*]]) [[ATTR7:#.*]] { +; IS__TUNIT____-NEXT: store i32* [[P]], i32** @alias_of_p, align 8 +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@make_alias @@ -659,11 +641,11 @@ define void @make_alias(i32* %p) { } define void @only_store(i32* %p) { -; NOT_CGSCC_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@only_store -; NOT_CGSCC_NPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) [[ATTR8:#.*]] { -; NOT_CGSCC_NPM-NEXT: store i32 0, i32* [[P]], align 4 -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@only_store +; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) [[ATTR8:#.*]] { +; IS__TUNIT____-NEXT: store i32 0, i32* [[P]], align 4 +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@only_store @@ -676,17 +658,17 @@ define void @only_store(i32* %p) { } define void @test15_caller(i32* noalias %p, i32 %c) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test15_caller -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; NOT_CGSCC_NPM: if.then: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[IF_END]] -; NOT_CGSCC_NPM: if.end: -; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test15_caller +; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__TUNIT____: if.then: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[IF_END]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test15_caller @@ -733,23 +715,23 @@ if.end: ; Therefore, only one of the two conditions of if statementes will be fulfilled. define internal void @test16_sub(i32* noalias %p, i32 %c1, i32 %c2) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test16_sub -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C1:%.*]], i32 [[C2:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C1]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; NOT_CGSCC_NPM: if.then: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[IF_END]] -; NOT_CGSCC_NPM: if.end: -; NOT_CGSCC_NPM-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]] -; NOT_CGSCC_NPM: if.then2: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[IF_END3]] -; NOT_CGSCC_NPM: if.end3: -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test16_sub +; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C1:%.*]], i32 [[C2:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C1]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__TUNIT____: if.then: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[IF_END]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]] +; IS__TUNIT____: if.then2: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[IF_END3]] +; IS__TUNIT____: if.end3: +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test16_sub @@ -790,11 +772,11 @@ if.end3: } define void @test16_caller(i32* %p, i32 %c) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test16_caller -; NOT_CGSCC_NPM-SAME: (i32* nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test16_caller +; IS__TUNIT____-SAME: (i32* nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) [[ATTR7]] +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test16_caller @@ -826,20 +808,20 @@ define void @test16_caller(i32* %p, i32 %c) { ; } define void @test17_caller(i32* noalias %p, i32 %c) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test17_caller -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: entry: -; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] -; NOT_CGSCC_NPM: l1: -; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[L3:%.*]] -; NOT_CGSCC_NPM: l2: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[L3]] -; NOT_CGSCC_NPM: l3: -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test17_caller +; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] +; IS__TUNIT____: l1: +; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[L3:%.*]] +; IS__TUNIT____: l2: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[L3]] +; IS__TUNIT____: l3: +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test17_caller @@ -884,10 +866,10 @@ l3: ; } define void @noreturn() { -; NOT_CGSCC_NPM: Function Attrs: nofree noreturn nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@noreturn -; NOT_CGSCC_NPM-SAME: () [[ATTR9:#.*]] { -; NOT_CGSCC_NPM-NEXT: unreachable +; IS__TUNIT____: Function Attrs: nofree noreturn nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@noreturn +; IS__TUNIT____-SAME: () [[ATTR9:#.*]] { +; IS__TUNIT____-NEXT: unreachable ; ; IS__CGSCC____: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@noreturn @@ -899,18 +881,18 @@ define void @noreturn() { } define void @test18_caller(i32* noalias %p, i32 %c) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test18_caller -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: entry: -; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] -; NOT_CGSCC_NPM: l1: -; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: unreachable -; NOT_CGSCC_NPM: l2: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test18_caller +; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] +; IS__TUNIT____: l1: +; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: unreachable +; IS__TUNIT____: l2: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test18_caller diff --git a/llvm/test/Transforms/Attributor/nofree.ll b/llvm/test/Transforms/Attributor/nofree.ll index 6cbaf71a01e399..b459527fe2eda4 100644 --- a/llvm/test/Transforms/Attributor/nofree.ll +++ b/llvm/test/Transforms/Attributor/nofree.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/noundef.ll b/llvm/test/Transforms/Attributor/noundef.ll index 34142af9ef8cd8..211338eefa0b9f 100644 --- a/llvm/test/Transforms/Attributor/noundef.ll +++ b/llvm/test/Transforms/Attributor/noundef.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Coroutines/coro-param-copy.ll b/llvm/test/Transforms/Coroutines/coro-param-copy.ll index 5967a05226fdb2..da08c4f15e156f 100644 --- a/llvm/test/Transforms/Coroutines/coro-param-copy.ll +++ b/llvm/test/Transforms/Coroutines/coro-param-copy.ll @@ -5,22 +5,37 @@ define i8* @f() "coroutine.presplit"="1" { entry: + %a.addr = alloca i64 ; read-only before coro.begin + %a = load i64, i64* %a.addr ; cannot modify the value, don't need to copy + %x.addr = alloca i64 - call void @use(i64* %x.addr) ; might write to %x + call void @use(i64* %x.addr) ; uses %x.addr before coro.begin + %y.addr = alloca i64 - %y = load i64, i64* %y.addr ; cannot modify the value, don't need to copy - call void @print(i64 %y) + %y.cast = bitcast i64* %y.addr to i8* ; alias created and used after coro.begin + + %z.addr = alloca i64 + %flag = call i1 @check() + br i1 %flag, label %flag_true, label %flag_merge + +flag_true: + call void @use(i64* %z.addr) ; conditionally used %z.addr + br label %flag_merge +flag_merge: %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) %size = call i32 @llvm.coro.size.i32() - %alloc = call i8* @myAlloc(i64 %y, i32 %size) + %alloc = call i8* @myAlloc(i32 %size) %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + call void @llvm.memset.p0i8.i32(i8* %y.cast, i8 1, i32 4, i1 false) %0 = call i8 @llvm.coro.suspend(token none, i1 false) switch i8 %0, label %suspend [i8 0, label %resume i8 1, label %cleanup] resume: + call void @use(i64* %a.addr) call void @use(i64* %x.addr) call void @use(i64* %y.addr) + call void @use(i64* %z.addr) br label %cleanup cleanup: @@ -33,26 +48,36 @@ suspend: } ; See that we added both x and y to the frame. -; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i1 } +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i64, i64, i1 } ; See that all of the uses prior to coro-begin stays put. ; CHECK-LABEL: define i8* @f() { ; CHECK-NEXT: entry: +; CHECK-NEXT: %a.addr = alloca i64 ; CHECK-NEXT: %x.addr = alloca i64 ; CHECK-NEXT: call void @use(i64* %x.addr) ; CHECK-NEXT: %y.addr = alloca i64 -; CHECK-NEXT: %y = load i64, i64* %y.addr -; CHECK-NEXT: call void @print(i64 %y) +; CHECK-NEXT: %z.addr = alloca i64 ; See that we only copy the x as y was not modified prior to coro.begin. -; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr -; CHECK-NEXT: %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 -; CHECK-NEXT: %1 = load i64, i64* %x.addr -; CHECK-NEXT: store i64 %1, i64* %0 -; CHECK-NEXT: %index.addr1 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 -; CHECK-NEXT: store i1 false, i1* %index.addr1 +; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr +; The next 3 instructions are to copy data in %x.addr from stack to frame. +; CHECK-NEXT: %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 +; CHECK-NEXT: %1 = load i64, i64* %x.addr, align 4 +; CHECK-NEXT: store i64 %1, i64* %0, align 4 +; The next 2 instructions are to recreate %y.cast in the original IR. +; CHECK-NEXT: %2 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 +; CHECK-NEXT: %3 = bitcast i64* %2 to i8* +; The next 3 instructions are to copy data in %z.addr from stack to frame. +; CHECK-NEXT: %4 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 +; CHECK-NEXT: %5 = load i64, i64* %z.addr, align 4 +; CHECK-NEXT: store i64 %5, i64* %4, align 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %3, i8 1, i32 4, i1 false) +; CHECK-NEXT: %index.addr1 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 6 +; CHECK-NEXT: store i1 false, i1* %index.addr1, align 1 ; CHECK-NEXT: ret i8* %hdl + declare i8* @llvm.coro.free(token, i8*) declare i32 @llvm.coro.size.i32() declare i8 @llvm.coro.suspend(token, i1) @@ -64,7 +89,9 @@ declare i1 @llvm.coro.alloc(token) declare i8* @llvm.coro.begin(token, i8*) declare i1 @llvm.coro.end(i8*, i1) -declare noalias i8* @myAlloc(i64, i32) -declare void @print(i64) +declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1) + +declare noalias i8* @myAlloc(i32) declare void @use(i64*) declare void @free(i8*) +declare i1 @check() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/read-clobber-after-overwrite.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/read-clobber-after-overwrite.ll new file mode 100644 index 00000000000000..4f704c35a90b10 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/read-clobber-after-overwrite.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -dse -enable-dse-memoryssa -S %s | FileCheck %s + +declare i1 @cond() readnone + +define i32 @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M0:%.*]] = alloca [4 x i32], align 16 +; CHECK-NEXT: br label [[LOOP_1:%.*]] +; CHECK: loop.1: +; CHECK-NEXT: br label [[LOOP_2:%.*]] +; CHECK: loop.2: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[LOOP_1]] ], [ [[IV_NEXT:%.*]], [[LOOP_2]] ] +; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[M0]], i64 3, i64 [[IV]] +; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[M0]], i64 0, i64 [[IV]] +; CHECK-NEXT: store i32 20, i32* [[PTR_2]], align 4 +; CHECK-NEXT: store i32 30, i32* [[PTR_1]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[C_3:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_1_LATCH:%.*]], label [[LOOP_2]] +; CHECK: loop.1.latch: +; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C_2]], label [[EXIT:%.*]], label [[LOOP_1]] +; CHECK: exit: +; CHECK-NEXT: [[PTR_3:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[M0]], i64 0, i64 1 +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR_3]], align 16 +; CHECK-NEXT: ret i32 [[LV]] +; +entry: + %M0 = alloca [4 x i32], align 16 + br label %loop.1 + +loop.1: + br label %loop.2 + +loop.2: + %iv = phi i64 [ 0, %loop.1 ], [ %iv.next, %loop.2 ] + %ptr.1 = getelementptr inbounds [4 x i32], [4 x i32]* %M0, i64 3, i64 %iv + store i32 10, i32* %ptr.1, align 4 + %ptr.2 = getelementptr inbounds [4 x i32], [4 x i32]* %M0, i64 0, i64 %iv + store i32 20, i32* %ptr.2, align 4 + store i32 30, i32* %ptr.1, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %c.3 = call i1 @cond() + br i1 %c.3, label %loop.1.latch, label %loop.2 + +loop.1.latch: + %c.2 = call i1 @cond() + br i1 %c.2, label %exit, label %loop.1 + +exit: + %ptr.3 = getelementptr inbounds [4 x i32], [4 x i32]* %M0, i64 0, i64 1 + %lv = load i32, i32* %ptr.3, align 16 + ret i32 %lv + + +} diff --git a/llvm/test/Transforms/EarlyCSE/masked-intrinsics.ll b/llvm/test/Transforms/EarlyCSE/masked-intrinsics.ll new file mode 100644 index 00000000000000..77183ab97a6b0f --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/masked-intrinsics.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -early-cse < %s | FileCheck %s + +define <128 x i8> @f0(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f0( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[A1]], <128 x i8>* [[A0:%.*]], i32 4, <128 x i1> [[V0]]) +; CHECK-NEXT: [[V1:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: ret <128 x i8> [[V1]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %a1, <128 x i8>* %a0, i32 4, <128 x i1> %v0) + %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + ret <128 x i8> %v1 +} + +define <128 x i8> @f1(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f1( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0:%.*]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[V1]], <128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]]) +; CHECK-NEXT: ret <128 x i8> [[V1]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %v1, <128 x i8>* %a0, i32 4, <128 x i1> %v0) + ret <128 x i8> %v1 +} + +define <128 x i8> @f2(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f2( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0:%.*]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: [[V3:%.*]] = add <128 x i8> [[V1]], [[V1]] +; CHECK-NEXT: ret <128 x i8> [[V3]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v3 = add <128 x i8> %v1, %v2 + ret <128 x i8> %v3 +} + +declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32, <128 x i1>, <128 x i8>) +declare void @llvm.masked.store.v128i8.p0v128i8(<128 x i8>, <128 x i8>*, i32, <128 x i1>) diff --git a/llvm/test/Transforms/EarlyCSE/reuse-preserved-memoryssa.ll b/llvm/test/Transforms/EarlyCSE/reuse-preserved-memoryssa.ll new file mode 100644 index 00000000000000..744389c24db284 --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/reuse-preserved-memoryssa.ll @@ -0,0 +1,7 @@ +; RUN: opt -memoryssa -gvn -early-cse-memssa %s -S | FileCheck %s + +; CHECK: define void @foo( + +define void @foo() { + ret void +} diff --git a/llvm/test/Transforms/GVN/masked-load-store.ll b/llvm/test/Transforms/GVN/masked-load-store.ll new file mode 100644 index 00000000000000..0b71a10a067db0 --- /dev/null +++ b/llvm/test/Transforms/GVN/masked-load-store.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -gvn -S < %s | FileCheck %s + +; Check that in both cases the second load is recognized as redundant +; and is removed. + +define <128 x i8> @f0(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f0( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0:%.*]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: [[V3:%.*]] = add <128 x i8> [[V1]], [[V1]] +; CHECK-NEXT: ret <128 x i8> [[V3]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v3 = add <128 x i8> %v1, %v2 + ret <128 x i8> %v3 +} + +define <128 x i8> @f1(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f1( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = getelementptr <128 x i8>, <128 x i8>* [[A0:%.*]], i32 1 +; CHECK-NEXT: [[V2:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[A2]], <128 x i8>* [[V1]], i32 4, <128 x i1> [[V0]]) +; CHECK-NEXT: [[V4:%.*]] = add <128 x i8> [[V2]], [[V2]] +; CHECK-NEXT: ret <128 x i8> [[V4]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a0, i32 1 + %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %a2, <128 x i8>* %v1, i32 4, <128 x i1> %v0) + %v3 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v4 = add <128 x i8> %v2, %v3 + ret <128 x i8> %v4 +} + +declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32, <128 x i1>, <128 x i8>) +declare void @llvm.masked.store.v128i8.p0v128i8(<128 x i8>, <128 x i8>*, i32, <128 x i1>) + diff --git a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll index 2b939767284a4c..7c4bad11a5ea58 100644 --- a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll +++ b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -indvars < %s | FileCheck %s ; Check that we replace signed comparisons between non-negative values with @@ -6,13 +7,35 @@ target datalayout = "n8:16:32:64" define i32 @test_01(i32 %a, i32 %b, i32* %p) { - ; CHECK-LABEL: @test_01( -; CHECK-NOT: icmp slt -; CHECK: %cmp1 = icmp ult i32 %iv, 100 -; CHECK: %cmp2 = icmp ult i32 %iv, 100 -; CHECK-NOT: %cmp3 -; CHECK: %exitcond = icmp ne i32 %iv.next, 1000 +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_ENTRY:%.*]] +; CHECK: loop.entry: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_BE:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[IV]], 100 +; CHECK-NEXT: br i1 [[CMP1]], label [[B1:%.*]], label [[B2:%.*]] +; CHECK: b1: +; CHECK-NEXT: store i32 [[IV]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: b2: +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IV]], 100 +; CHECK-NEXT: br i1 [[CMP2]], label [[B3:%.*]], label [[B4:%.*]] +; CHECK: b3: +; CHECK-NEXT: store i32 [[IV]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[LOOP_BE]] +; CHECK: b4: +; CHECK-NEXT: store i32 [[B:%.*]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[LOOP_BE]] +; CHECK: loop.be: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_ENTRY]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 999 +; entry: br label %loop.entry @@ -52,13 +75,35 @@ exit: } define i32 @test_02(i32 %a, i32 %b, i32* %p) { - ; CHECK-LABEL: @test_02( -; CHECK-NOT: icmp sgt -; CHECK: %cmp1 = icmp ugt i32 100, %iv -; CHECK: %cmp2 = icmp ugt i32 100, %iv -; CHECK-NOT: %cmp3 -; CHECK: %exitcond = icmp ne i32 %iv.next, 1000 +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_ENTRY:%.*]] +; CHECK: loop.entry: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_BE:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 100, [[IV]] +; CHECK-NEXT: br i1 [[CMP1]], label [[B1:%.*]], label [[B2:%.*]] +; CHECK: b1: +; CHECK-NEXT: store i32 [[IV]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: b2: +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 100, [[IV]] +; CHECK-NEXT: br i1 [[CMP2]], label [[B3:%.*]], label [[B4:%.*]] +; CHECK: b3: +; CHECK-NEXT: store i32 [[IV]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[LOOP_BE]] +; CHECK: b4: +; CHECK-NEXT: store i32 [[B:%.*]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[LOOP_BE]] +; CHECK: loop.be: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_ENTRY]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 999 +; entry: br label %loop.entry diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll index 66951eda7a575c..7dfd4ebc001583 100644 --- a/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll @@ -19,7 +19,7 @@ define void @analyzeable_early_exit(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store i32 [[IV]], i32* @A +; CHECK-NEXT: store i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -49,12 +49,12 @@ define void @unanalyzeable_early_exit() { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A, align 4 ; CHECK-NEXT: [[EARLYCND:%.*]] = icmp ne i32 [[VOL]], 0 ; CHECK-NEXT: br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store i32 [[IV]], i32* @A +; CHECK-NEXT: store i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -89,12 +89,12 @@ define void @multiple_early_exits(i32 %n, i32 %m) { ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[CONTINUE:%.*]], label [[EXIT:%.*]] ; CHECK: continue: -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV]], [[M:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LATCH]], label [[EXIT]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -137,7 +137,7 @@ define void @compound_early_exit(i32 %n, i32 %m) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -174,8 +174,8 @@ define void @unanalyzeable_latch(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: store i32 [[IV]], i32* @A -; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A +; CHECK-NEXT: store i32 [[IV]], i32* @A, align 4 +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A, align 4 ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[VOL]], 1000 ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -210,7 +210,7 @@ define void @single_exit_no_latch(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: store i32 [[IV]], i32* @A +; CHECK-NEXT: store i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: br label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -243,11 +243,11 @@ define void @no_latch_exit(i32 %n, i32 %m) { ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[CONTINUE:%.*]], label [[EXIT:%.*]] ; CHECK: continue: -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV]], [[M:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LATCH]], label [[EXIT]] ; CHECK: latch: -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br label [[LOOP]] ; CHECK: exit: @@ -287,7 +287,7 @@ define void @combine_ivs(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 999 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -324,7 +324,7 @@ define void @combine_ivs2(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -362,7 +362,7 @@ define void @simplify_exit_test(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 65 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -396,13 +396,13 @@ define void @simplify_exit_test2(i32 %n) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A, align 4 ; CHECK-NEXT: [[EARLYCND:%.*]] = icmp ne i32 [[VOL]], 0 ; CHECK-NEXT: br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[FX:%.*]] = udiv i32 [[IV]], 4 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[FX]], 1024 ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -442,12 +442,12 @@ define void @nested(i32 %n) { ; CHECK-NEXT: br label [[OUTER:%.*]] ; CHECK: outer: ; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV1_NEXT:%.*]], [[OUTER_LATCH:%.*]] ] -; CHECK-NEXT: store volatile i32 [[IV1]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV1]], i32* @A, align 4 ; CHECK-NEXT: [[IV1_NEXT]] = add nuw nsw i32 [[IV1]], 1 ; CHECK-NEXT: br label [[INNER:%.*]] ; CHECK: inner: ; CHECK-NEXT: [[IV2:%.*]] = phi i32 [ 0, [[OUTER]] ], [ [[IV2_NEXT:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: store volatile i32 [[IV2]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV2]], i32* @A, align 4 ; CHECK-NEXT: [[IV2_NEXT]] = add nuw nsw i32 [[IV2]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV2]], 20 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER_LATCH]], label [[EXIT_LOOPEXIT:%.*]] diff --git a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll index 988b3923263f63..048254427c5fad 100644 --- a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll +++ b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll @@ -83,8 +83,8 @@ exit: ret i32 0 } -; Monotonic incrementing iv. we should be able to prove that %iv.next s len +; basing on its nsw and the fact that its starting value >s len. define i32 @test_02(i32* %p) { ; CHECK-LABEL: @test_02( ; CHECK-NEXT: entry: @@ -164,6 +164,84 @@ exit: ret i32 0 } +define i32 @test_03(i32* %p) { +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG2:!range !.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[RC:%.*]] = icmp ugt i32 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: fail: +; CHECK-NEXT: ret i32 -1 +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %len = load i32, i32* %p, !range !2 + br label %loop + +loop: + %iv = phi i32 [%len, %entry], [%iv.next, %backedge] + %iv.next = add i32 %iv, 1 + %rc = icmp sgt i32 %iv.next, %len + br i1 %rc, label %backedge, label %fail + +backedge: + %loop.cond = icmp ne i32 %iv, 1000 + br i1 %loop.cond, label %loop, label %exit + +fail: + ret i32 -1 + +exit: + ret i32 0 +} + +define i32 @test_04(i32* %p) { +; CHECK-LABEL: @test_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG2]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], -1 +; CHECK-NEXT: [[RC:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: fail: +; CHECK-NEXT: ret i32 -1 +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %len = load i32, i32* %p, !range !2 + br label %loop + +loop: + %iv = phi i32 [%len, %entry], [%iv.next, %backedge] + %iv.next = add i32 %iv, -1 + %rc = icmp slt i32 %iv.next, %len + br i1 %rc, label %backedge, label %fail + +backedge: + %loop.cond = icmp ne i32 %iv, 0 + br i1 %loop.cond, label %loop, label %exit + +fail: + ret i32 -1 + +exit: + ret i32 0 +} !0 = !{i32 0, i32 2147483647} !1 = !{i32 -2147483648, i32 0} +!2 = !{i32 0, i32 1000} diff --git a/llvm/test/Transforms/IndVarSimplify/pr18223.ll b/llvm/test/Transforms/IndVarSimplify/pr18223.ll index f922aa424a17e7..da620c80621989 100644 --- a/llvm/test/Transforms/IndVarSimplify/pr18223.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr18223.ll @@ -1,12 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -indvars -S < %s | FileCheck %s ; indvars should transform the phi node pair from the for-loop -; CHECK-LABEL: @main( -; CHECK: ret = phi i32 [ 0, %entry ], [ 0, {{.*}} ] @c = common global i32 0, align 4 define i32 @main() #0 { +; CHECK-LABEL: @main( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @c, align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[RET:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 0, [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RET]] +; entry: %0 = load i32, i32* @c, align 4 %tobool = icmp eq i32 %0, 0 diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index b00681d44d26c9..30e5a9ddab3c61 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s +declare i8 @llvm.abs.i8(i8, i1) declare i32 @llvm.abs.i32(i32, i1) declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) declare <3 x i82> @llvm.abs.v3i82(<3 x i82>, i1) @@ -233,7 +234,7 @@ define i32 @abs_assume_neg(i32 %x) { ; CHECK-LABEL: @abs_assume_neg( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-NEXT: [[ABS:%.*]] = sub i32 0, [[X]] ; CHECK-NEXT: ret i32 [[ABS]] ; %cmp = icmp slt i32 %x, 0 @@ -245,12 +246,49 @@ define i32 @abs_assume_neg(i32 %x) { define i32 @abs_known_neg(i16 %x) { ; CHECK-LABEL: @abs_known_neg( ; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[EXT]], -1 -; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[NEG]], i1 false) -; CHECK-NEXT: ret i32 [[ABS]] +; CHECK-NEXT: [[NEG_NEG:%.*]] = add nuw nsw i32 [[EXT]], 1 +; CHECK-NEXT: ret i32 [[NEG_NEG]] ; %ext = zext i16 %x to i32 %neg = sub nsw i32 -1, %ext %abs = call i32 @llvm.abs.i32(i32 %neg, i1 false) ret i32 %abs } + +define i1 @abs_eq_int_min_poison(i8 %x) { +; CHECK-LABEL: @abs_eq_int_min_poison( +; CHECK-NEXT: ret i1 false +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 true) + %cmp = icmp eq i8 %abs, -128 + ret i1 %cmp +} + +define i1 @abs_ne_int_min_poison(i8 %x) { +; CHECK-LABEL: @abs_ne_int_min_poison( +; CHECK-NEXT: ret i1 true +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 true) + %cmp = icmp ne i8 %abs, -128 + ret i1 %cmp +} + +define i1 @abs_eq_int_min_nopoison(i8 %x) { +; CHECK-LABEL: @abs_eq_int_min_nopoison( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], -128 +; CHECK-NEXT: ret i1 [[CMP]] +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) + %cmp = icmp eq i8 %abs, -128 + ret i1 %cmp +} + +define i1 @abs_ne_int_min_nopoison(i8 %x) { +; CHECK-LABEL: @abs_ne_int_min_nopoison( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X:%.*]], -128 +; CHECK-NEXT: ret i1 [[CMP]] +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) + %cmp = icmp ne i8 %abs, -128 + ret i1 %cmp +} diff --git a/llvm/test/Transforms/InstCombine/icmp-mul.ll b/llvm/test/Transforms/InstCombine/icmp-mul.ll index 8e7d9056726e48..e2aff1c304adf0 100644 --- a/llvm/test/Transforms/InstCombine/icmp-mul.ll +++ b/llvm/test/Transforms/InstCombine/icmp-mul.ll @@ -365,3 +365,306 @@ define i1 @ne_rem_zero_nonuw(i8 %x) { %b = icmp ne i8 %a, 30 ret i1 %b } + +define i1 @mul_constant_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_eq( +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 5 + %B = mul i32 %y, 5 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define <2 x i1> @mul_constant_ne_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_constant_ne_splat( +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul <2 x i32> %x, + %B = mul <2 x i32> %y, + %C = icmp ne <2 x i32> %A, %B + ret <2 x i1> %C +} + +define i1 @mul_constant_ne_extra_use1(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_extra_use1( +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i8 %x, 5 + call void @use(i8 %A) + %B = mul i8 %y, 5 + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_extra_use2(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_eq_extra_use2( +; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i8 %x, 5 + %B = mul i8 %y, 5 + call void @use(i8 %B) + %C = icmp eq i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_ne_extra_use3(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_extra_use3( +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i8 %x, 5 + call void @use(i8 %A) + %B = mul i8 %y, 5 + call void @use(i8 %B) + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_nsw(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_eq_nsw( +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i32 %x, 6 + %B = mul nsw i32 %y, 6 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define <2 x i1> @mul_constant_ne_nsw_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_constant_ne_nsw_splat( +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul nsw <2 x i32> %x, + %B = mul nsw <2 x i32> %y, + %C = icmp ne <2 x i32> %A, %B + ret <2 x i1> %C +} + +define i1 @mul_constant_ne_nsw_extra_use1(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nsw_extra_use1( +; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[X:%.*]], 74 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i8 %x, 74 + call void @use(i8 %A) + %B = mul nsw i8 %y, 74 + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_nsw_extra_use2(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_eq_nsw_extra_use2( +; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 20 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i8 %x, 20 + %B = mul nsw i8 %y, 20 + call void @use(i8 %B) + %C = icmp eq i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_ne_nsw_extra_use3(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nsw_extra_use3( +; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[X:%.*]], 24 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 24 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i8 %x, 24 + call void @use(i8 %A) + %B = mul nsw i8 %y, 24 + call void @use(i8 %B) + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_nuw_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_nuw_eq( +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i32 %x, 22 + %B = mul nuw i32 %y, 22 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define <2 x i1> @mul_constant_ne_nuw_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_constant_ne_nuw_splat( +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul nuw <2 x i32> %x, + %B = mul nuw <2 x i32> %y, + %C = icmp ne <2 x i32> %A, %B + ret <2 x i1> %C +} + +define i1 @mul_constant_ne_nuw_extra_use1(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nuw_extra_use1( +; CHECK-NEXT: [[A:%.*]] = mul nuw i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i8 %x, 6 + call void @use(i8 %A) + %B = mul nuw i8 %y, 6 + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_nuw_extra_use2(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_eq_nuw_extra_use2( +; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 36 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i8 %x, 36 + %B = mul nuw i8 %y, 36 + call void @use(i8 %B) + %C = icmp eq i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_ne_nuw_extra_use3(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nuw_extra_use3( +; CHECK-NEXT: [[A:%.*]] = mul nuw i8 [[X:%.*]], 38 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 38 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i8 %x, 38 + call void @use(i8 %A) + %B = mul nuw i8 %y, 38 + call void @use(i8 %B) + %C = icmp ne i8 %A, %B + ret i1 %C +} + +; Negative test - wrong pred + +define i1 @mul_constant_ult(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_ult( +; CHECK-NEXT: [[A:%.*]] = mul i32 [[X:%.*]], 47 +; CHECK-NEXT: [[B:%.*]] = mul i32 [[Y:%.*]], 47 +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 47 + %B = mul i32 %y, 47 + %C = icmp ult i32 %A, %B + ret i1 %C +} + +; Negative test - wrong pred + +define i1 @mul_constant_nuw_sgt(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_nuw_sgt( +; CHECK-NEXT: [[A:%.*]] = mul nuw i32 [[X:%.*]], 46 +; CHECK-NEXT: [[B:%.*]] = mul nuw i32 [[Y:%.*]], 46 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i32 %x, 46 + %B = mul nuw i32 %y, 46 + %C = icmp sgt i32 %A, %B + ret i1 %C +} + +; Negative test - wrong constants + +define i1 @mul_mismatch_constant_nuw_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_mismatch_constant_nuw_eq( +; CHECK-NEXT: [[A:%.*]] = mul nuw i32 [[X:%.*]], 46 +; CHECK-NEXT: [[B:%.*]] = mul nuw i32 [[Y:%.*]], 44 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i32 %x, 46 + %B = mul nuw i32 %y, 44 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +; If the multiply constant has any trailing zero bits but could overflow, +; we get something completely different. +; We mask off the high bits of each input and then convert: +; (X&Z) == (Y&Z) -> (X^Y) & Z == 0 + +define i1 @mul_constant_partial_nuw_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_partial_nuw_eq( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 44 + %B = mul nuw i32 %y, 44 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define i1 @mul_constant_mismatch_wrap_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_mismatch_wrap_eq( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483647 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i32 %x, 54 + %B = mul nuw i32 %y, 54 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) { +; CHECK-LABEL: @eq_mul_constants_with_tz( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823 +; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 12 + %B = mul i32 %y, 12 + %C = icmp ne i32 %A, %B + ret i1 %C +} + +define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @eq_mul_constants_with_tz_splat( +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul <2 x i32> %x, + %B = mul <2 x i32> %y, + %C = icmp eq <2 x i32> %A, %B + ret <2 x i1> %C +} diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index a9bda13e15b905..683518121789cc 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -3397,58 +3397,6 @@ define i1 @eq_add_constants(i32 %x, i32 %y) { ret i1 %C } -define i1 @eq_mul_constants(i32 %x, i32 %y) { -; CHECK-LABEL: @eq_mul_constants( -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[C]] -; - %A = mul i32 %x, 5 - %B = mul i32 %y, 5 - %C = icmp eq i32 %A, %B - ret i1 %C -} - -define <2 x i1> @eq_mul_constants_splat(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @eq_mul_constants_splat( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: ret <2 x i1> [[C]] -; - %A = mul <2 x i32> %x, - %B = mul <2 x i32> %y, - %C = icmp ne <2 x i32> %A, %B - ret <2 x i1> %C -} - -; If the multiply constant has any trailing zero bits, we get something completely different. -; We mask off the high bits of each input and then convert: -; (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - -define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) { -; CHECK-LABEL: @eq_mul_constants_with_tz( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK-NEXT: ret i1 [[C]] -; - %A = mul i32 %x, 12 - %B = mul i32 %y, 12 - %C = icmp ne i32 %A, %B - ret i1 %C -} - -define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @eq_mul_constants_with_tz_splat( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[C]] -; - %A = mul <2 x i32> %x, - %B = mul <2 x i32> %y, - %C = icmp eq <2 x i32> %A, %B - ret <2 x i1> %C -} - declare i32 @llvm.bswap.i32(i32) define i1 @bswap_ne(i32 %x, i32 %y) { diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll new file mode 100644 index 00000000000000..797f85d9444747 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +declare i8 @llvm.umin.i8(i8, i8) +declare i8 @llvm.umax.i8(i8, i8) +declare i8 @llvm.smin.i8(i8, i8) +declare i8 @llvm.smax.i8(i8, i8) + +define i8 @umin_known_bits(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_known_bits( +; CHECK-NEXT: ret i8 0 +; + %x2 = and i8 %x, 127 + %m = call i8 @llvm.umin.i8(i8 %x2, i8 %y) + %r = and i8 %m, -128 + ret i8 %r +} + +define i8 @umax_known_bits(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_known_bits( +; CHECK-NEXT: ret i8 -128 +; + %x2 = or i8 %x, -128 + %m = call i8 @llvm.umax.i8(i8 %x2, i8 %y) + %r = and i8 %m, -128 + ret i8 %r +} + +define i8 @smin_known_bits(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_known_bits( +; CHECK-NEXT: ret i8 -128 +; + %x2 = or i8 %x, -128 + %m = call i8 @llvm.smin.i8(i8 %x2, i8 %y) + %r = and i8 %m, -128 + ret i8 %r +} + +define i8 @smax_known_bits(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_known_bits( +; CHECK-NEXT: ret i8 0 +; + %x2 = and i8 %x, 127 + %m = call i8 @llvm.smax.i8(i8 %x2, i8 %y) + %r = and i8 %m, -128 + ret i8 %r +} diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index cf9604223f6c15..f31eeb46d88231 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -14,6 +14,32 @@ define i64 @test_inbounds([0 x i32]* %base, i64 %idx) { ret i64 %d } +define i64 @test_partial_inbounds1([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_partial_inbounds1( +; CHECK-NEXT: [[P2_IDX:%.*]] = shl i64 [[IDX:%.*]], 2 +; CHECK-NEXT: ret i64 [[P2_IDX]] +; + %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p1 to i64 + %i2 = ptrtoint i32* %p2 to i64 + %d = sub i64 %i2, %i1 + ret i64 %d +} + +define i64 @test_partial_inbounds2([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_partial_inbounds2( +; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX:%.*]], 2 +; CHECK-NEXT: ret i64 [[P2_IDX]] +; + %p1 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p1 to i64 + %i2 = ptrtoint i32* %p2 to i64 + %d = sub i64 %i2, %i1 + ret i64 %d +} + define i64 @test_inbounds_nuw([0 x i32]* %base, i64 %idx) { ; CHECK-LABEL: @test_inbounds_nuw( ; CHECK-NEXT: [[P2_IDX:%.*]] = shl nuw nsw i64 [[IDX:%.*]], 2 @@ -69,13 +95,39 @@ define i64 @test_inbounds_nuw_swapped([0 x i32]* %base, i64 %idx) { ret i64 %d } +define i64 @test_inbounds1_nuw_swapped([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_inbounds1_nuw_swapped( +; CHECK-NEXT: [[P2_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 +; CHECK-NEXT: ret i64 [[P2_IDX_NEG]] +; + %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p2 to i64 + %i2 = ptrtoint i32* %p1 to i64 + %d = sub nuw i64 %i2, %i1 + ret i64 %d +} + +define i64 @test_inbounds2_nuw_swapped([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_inbounds2_nuw_swapped( +; CHECK-NEXT: [[P2_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 +; CHECK-NEXT: ret i64 [[P2_IDX_NEG]] +; + %p1 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p2 to i64 + %i2 = ptrtoint i32* %p1 to i64 + %d = sub nuw i64 %i2, %i1 + ret i64 %d +} + ; The sub and shl here could be nuw, but this is harder to handle. define i64 @test_inbounds_nuw_two_gep([0 x i32]* %base, i64 %idx, i64 %idx2) { ; CHECK-LABEL: @test_inbounds_nuw_two_gep( +; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX2:%.*]], 2 ; CHECK-NEXT: [[P1_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 -; CHECK-NEXT: [[P2_IDX_NEG_NEG:%.*]] = shl i64 [[IDX2:%.*]], 2 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i64 [[P2_IDX_NEG_NEG]], [[P1_IDX_NEG]] -; CHECK-NEXT: ret i64 [[DOTNEG]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i64 [[P1_IDX_NEG]], [[P2_IDX]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx2 diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index dbe1631226d658..437d8f8c5c0231 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -505,9 +505,9 @@ define i64 @test24b(i8* %P, i64 %A){ define i64 @test25(i8* %P, i64 %A){ ; CHECK-LABEL: @test25( -; CHECK-NEXT: [[B_IDX_NEG_NEG:%.*]] = shl i64 [[A:%.*]], 1 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = add i64 [[B_IDX_NEG_NEG]], -84 -; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1 +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i64 [[B_IDX]], -84 +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A %C = ptrtoint i16* %B to i64 @@ -520,9 +520,9 @@ define i64 @test25(i8* %P, i64 %A){ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { ; CHECK-LABEL: @test25_as1( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16 -; CHECK-NEXT: [[B_IDX_NEG_NEG:%.*]] = shl i16 [[TMP1]], 1 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = add i16 [[B_IDX_NEG_NEG]], -84 -; CHECK-NEXT: ret i16 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i16 [[TMP1]], 1 +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i16 [[B_IDX]], -84 +; CHECK-NEXT: ret i16 [[GEPDIFF]] ; %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A %C = ptrtoint i16 addrspace(1)* %B to i16 @@ -825,8 +825,8 @@ define i32 @test28commuted(i32 %x, i32 %y, i32 %z) { define i64 @test29(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test29( -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] -; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j @@ -838,9 +838,9 @@ define i64 @test29(i8* %foo, i64 %i, i64 %j) { define i64 @test30(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test30( -; CHECK-NEXT: [[GEP1_IDX_NEG_NEG:%.*]] = shl i64 [[I:%.*]], 2 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i64 [[GEP1_IDX_NEG_NEG]], [[J:%.*]] -; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %bit = bitcast i8* %foo to i32* %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i @@ -853,9 +853,9 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) { define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ; CHECK-LABEL: @test30_as1( -; CHECK-NEXT: [[GEP1_IDX_NEG_NEG:%.*]] = shl i16 [[I:%.*]], 2 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i16 [[GEP1_IDX_NEG_NEG]], [[J:%.*]] -; CHECK-NEXT: ret i16 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i16 [[I:%.*]], 2 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i16 [[GEPDIFF]] ; %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i @@ -1234,10 +1234,10 @@ define i64 @test58([100 x [100 x i8]]* %foo, i64 %i, i64 %j) { ; "%sub = i64 %i, %j, ret i64 %sub" ; gep1 and gep2 have only one use ; CHECK-LABEL: @test58( -; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200 ; CHECK-NEXT: [[GEP1_OFFS:%.*]] = add i64 [[I:%.*]], 4200 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]] -; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %i %gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %j diff --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll index 312b0125f626f2..ba275a6066419d 100644 --- a/llvm/test/Transforms/InstCombine/xor.ll +++ b/llvm/test/Transforms/InstCombine/xor.ll @@ -1171,3 +1171,77 @@ define i8 @not_ashr_wrong_const(i8 %x) { %r = xor i8 %a, -2 ret i8 %r } + +; (~A & B) ^ A --> (A | B) +; The division ops are here to thwart complexity-based canonicalization: all ops are binops. + +define i32 @test52(i32 %p1, i32 %p2) { +; CHECK-LABEL: @test52( +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[P1:%.*]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 42, [[P2:%.*]] +; CHECK-NEXT: [[O:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[R:%.*]] = and i32 [[B]], [[O]] +; CHECK-NEXT: [[Z:%.*]] = xor i32 [[R]], [[A]] +; CHECK-NEXT: ret i32 [[Z]] +; + %a = udiv i32 42, %p1 + %b = udiv i32 42, %p2 + %o = xor i32 %a, -1 + %r = and i32 %o, %b + %z = xor i32 %r, %a + ret i32 %z +} + +; (~B & A) ^ B --> (A | B) +; The division ops are here to thwart complexity-based canonicalization: all ops are binops. + +define i32 @test53(i32 %p1, i32 %p2) { +; CHECK-LABEL: @test53( +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[P1:%.*]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 42, [[P2:%.*]] +; CHECK-NEXT: [[O:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[R:%.*]] = and i32 [[A]], [[O]] +; CHECK-NEXT: [[Z:%.*]] = xor i32 [[R]], [[B]] +; CHECK-NEXT: ret i32 [[Z]] +; + %a = udiv i32 42, %p1 + %b = udiv i32 42, %p2 + %o = xor i32 %b, -1 + %r = and i32 %o, %a + %z = xor i32 %r, %b + ret i32 %z +} + +define i32 @test54(i32 %p1, i32 %p2) { +; CHECK-LABEL: @test54( +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[P1:%.*]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 42, [[P2:%.*]] +; CHECK-NEXT: [[O:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[R:%.*]] = and i32 [[B]], [[O]] +; CHECK-NEXT: [[Z:%.*]] = xor i32 [[R]], [[A]] +; CHECK-NEXT: ret i32 [[Z]] +; + %a = udiv i32 42, %p1 + %b = udiv i32 42, %p2 + %o = xor i32 %a, -1 + %r = and i32 %b, %o + %z = xor i32 %r, %a + ret i32 %z +} + +define i32 @test55(i32 %p1, i32 %p2) { +; CHECK-LABEL: @test55( +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[P1:%.*]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 42, [[P2:%.*]] +; CHECK-NEXT: [[O:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[R:%.*]] = and i32 [[B]], [[O]] +; CHECK-NEXT: [[Z:%.*]] = xor i32 [[A]], [[R]] +; CHECK-NEXT: ret i32 [[Z]] +; + %a = udiv i32 42, %p1 + %b = udiv i32 42, %p2 + %o = xor i32 %a, -1 + %r = and i32 %o, %b + %z = xor i32 %a, %r + ret i32 %z +} diff --git a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll index 70b50da9f0415a..4598c5732e1213 100644 --- a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll +++ b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll @@ -47,11 +47,14 @@ define i32 @test_abs_abs_3(i32 %x) { } ; If the sign bit is known zero, the abs is not needed. +; These cases are only folded by InstCombine, to avoid computing known bits +; twice, for the non-negative and the negative case. define i32 @zext_abs(i31 %x) { ; CHECK-LABEL: @zext_abs( ; CHECK-NEXT: [[ZEXT:%.*]] = zext i31 [[X:%.*]] to i32 -; CHECK-NEXT: ret i32 [[ZEXT]] +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[ZEXT]], i1 false) +; CHECK-NEXT: ret i32 [[ABS]] ; %zext = zext i31 %x to i32 %abs = call i32 @llvm.abs.i32(i32 %zext, i1 false) @@ -61,7 +64,8 @@ define i32 @zext_abs(i31 %x) { define <3 x i82> @lshr_abs(<3 x i82> %x) { ; CHECK-LABEL: @lshr_abs( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], -; CHECK-NEXT: ret <3 x i82> [[LSHR]] +; CHECK-NEXT: [[ABS:%.*]] = call <3 x i82> @llvm.abs.v3i82(<3 x i82> [[LSHR]], i1 true) +; CHECK-NEXT: ret <3 x i82> [[ABS]] ; %lshr = lshr <3 x i82> %x, %abs = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %lshr, i1 true) @@ -71,7 +75,8 @@ define <3 x i82> @lshr_abs(<3 x i82> %x) { define i32 @and_abs(i32 %x) { ; CHECK-LABEL: @and_abs( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 2147483644 -; CHECK-NEXT: ret i32 [[AND]] +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[AND]], i1 true) +; CHECK-NEXT: ret i32 [[ABS]] ; %and = and i32 %x, 2147483644 %abs = call i32 @llvm.abs.i32(i32 %and, i1 true) @@ -81,7 +86,8 @@ define i32 @and_abs(i32 %x) { define <3 x i82> @select_abs(<3 x i1> %cond) { ; CHECK-LABEL: @select_abs( ; CHECK-NEXT: [[SEL:%.*]] = select <3 x i1> [[COND:%.*]], <3 x i82> zeroinitializer, <3 x i82> -; CHECK-NEXT: ret <3 x i82> [[SEL]] +; CHECK-NEXT: [[ABS:%.*]] = call <3 x i82> @llvm.abs.v3i82(<3 x i82> [[SEL]], i1 false) +; CHECK-NEXT: ret <3 x i82> [[ABS]] ; %sel = select <3 x i1> %cond, <3 x i82> zeroinitializer, <3 x i82> %abs = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %sel, i1 false) @@ -94,7 +100,8 @@ define i32 @assume_abs(i32 %x) { ; CHECK-LABEL: @assume_abs( ; CHECK-NEXT: [[ASSUME:%.*]] = icmp sge i32 [[X:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[ASSUME]]) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 true) +; CHECK-NEXT: ret i32 [[ABS]] ; %assume = icmp sge i32 %x, 0 call void @llvm.assume(i1 %assume) diff --git a/llvm/test/Transforms/LoopLoadElim/pr47457.ll b/llvm/test/Transforms/LoopLoadElim/pr47457.ll new file mode 100644 index 00000000000000..1b102944cd767a --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/pr47457.ll @@ -0,0 +1,45 @@ +; RUN: opt -loop-load-elim -S %s | FileCheck %s +; RUN: opt -passes=loop-load-elim -S %s | FileCheck %s +; REQUIRES: asserts +; XFAIL: * + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" +target triple = "x86_64-unknown-linux-gnu" + +define void @test() { +; CHECK-LABEL: test + +bb: + br label %bb1 + +bb1: ; preds = %bb6, %bb1, %bb + %tmp = phi i32 [ undef, %bb ], [ 0, %bb1 ], [ %tmp3, %bb6 ] + br i1 undef, label %bb1, label %bb2 + +bb2: ; preds = %bb1 + %tmp3 = add i32 %tmp, 1 + %tmp4 = icmp ult i32 %tmp, undef + br i1 %tmp4, label %bb6, label %bb5 + +bb5: ; preds = %bb2 + ret void + +bb6: ; preds = %bb2 + br i1 undef, label %bb7, label %bb1 + +bb7: ; preds = %bb7, %bb6 + %tmp8 = phi i32 [ %tmp15, %bb7 ], [ %tmp3, %bb6 ] + %tmp9 = phi i32 [ %tmp8, %bb7 ], [ %tmp, %bb6 ] + %tmp10 = zext i32 %tmp9 to i64 + %tmp11 = getelementptr inbounds float, float addrspace(1)* null, i64 %tmp10 + %tmp12 = load float, float addrspace(1)* %tmp11, align 4 + %tmp13 = zext i32 %tmp8 to i64 + %tmp14 = getelementptr inbounds float, float addrspace(1)* null, i64 %tmp13 + store float 1.000000e+00, float addrspace(1)* %tmp14, align 4 + %tmp15 = add nuw nsw i32 %tmp8, 1 + %tmp16 = icmp sgt i32 %tmp8, 78 + br i1 %tmp16, label %bb17, label %bb7 + +bb17: ; preds = %bb7 + unreachable +} diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll new file mode 100644 index 00000000000000..bd2d6b4b0b4cac --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll @@ -0,0 +1,299 @@ +; RUN: opt < %s -loop-reduce +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@d = internal unnamed_addr global i32** null, align 8 + +define dso_local i32 @main() local_unnamed_addr { +entry: + %.pre.pre = load i32**, i32*** @d, align 8 + br label %for.body9 + +for.body9: ; preds = %for.body9, %entry + %i = phi i32** [ %.pre.pre, %entry ], [ %incdec.ptr, %for.body9 ] + %incdec.ptr = getelementptr inbounds i32*, i32** %i, i64 -1 + br i1 undef, label %for.body9, label %for.inc + +for.inc: ; preds = %for.body9 + br label %for.body9.118 + +for.body9.1: ; preds = %for.inc.547, %for.body9.1 + %i1 = phi i32** [ %incdec.ptr.1, %for.body9.1 ], [ %incdec.ptr.542, %for.inc.547 ] + %incdec.ptr.1 = getelementptr inbounds i32*, i32** %i1, i64 -1 + br i1 undef, label %for.body9.1, label %for.inc.1 + +for.inc.1: ; preds = %for.body9.1 + br label %for.body9.1.1 + +for.body9.2: ; preds = %for.inc.1.5, %for.body9.2 + %i2 = phi i32** [ %incdec.ptr.2, %for.body9.2 ], [ %incdec.ptr.1.5, %for.inc.1.5 ] + %incdec.ptr.2 = getelementptr inbounds i32*, i32** %i2, i64 -1 + br i1 undef, label %for.body9.2, label %for.inc.2 + +for.inc.2: ; preds = %for.body9.2 + br label %for.body9.2.1 + +for.body9.3: ; preds = %for.inc.2.5, %for.body9.3 + %i3 = phi i32** [ %incdec.ptr.3, %for.body9.3 ], [ %incdec.ptr.2.5, %for.inc.2.5 ] + %incdec.ptr.3 = getelementptr inbounds i32*, i32** %i3, i64 -1 + br i1 undef, label %for.body9.3, label %for.inc.3 + +for.inc.3: ; preds = %for.body9.3 + br label %for.body9.3.1 + +for.body9.4: ; preds = %for.inc.3.5, %for.body9.4 + %i4 = phi i32** [ %incdec.ptr.4, %for.body9.4 ], [ %incdec.ptr.3.5, %for.inc.3.5 ] + %incdec.ptr.4 = getelementptr inbounds i32*, i32** %i4, i64 -1 + br i1 undef, label %for.body9.4, label %for.inc.4 + +for.inc.4: ; preds = %for.body9.4 + br label %for.body9.4.1 + +for.body9.5: ; preds = %for.inc.4.5, %for.body9.5 + %i5 = phi i32** [ %incdec.ptr.5, %for.body9.5 ], [ %incdec.ptr.4.5, %for.inc.4.5 ] + %incdec.ptr.5 = getelementptr inbounds i32*, i32** %i5, i64 -1 + br i1 undef, label %for.body9.5, label %for.inc.5 + +for.inc.5: ; preds = %for.body9.5 + br label %for.body9.5.1 + +for.body9.5.1: ; preds = %for.body9.5.1, %for.inc.5 + %i6 = phi i32** [ %incdec.ptr.5.1, %for.body9.5.1 ], [ %incdec.ptr.5, %for.inc.5 ] + %incdec.ptr.5.1 = getelementptr inbounds i32*, i32** %i6, i64 -1 + br i1 undef, label %for.body9.5.1, label %for.inc.5.1 + +for.inc.5.1: ; preds = %for.body9.5.1 + br label %for.body9.5.2 + +for.body9.5.2: ; preds = %for.body9.5.2, %for.inc.5.1 + %i7 = phi i32** [ %incdec.ptr.5.2, %for.body9.5.2 ], [ %incdec.ptr.5.1, %for.inc.5.1 ] + %incdec.ptr.5.2 = getelementptr inbounds i32*, i32** %i7, i64 -1 + br i1 undef, label %for.body9.5.2, label %for.inc.5.2 + +for.inc.5.2: ; preds = %for.body9.5.2 + br label %for.body9.5.3 + +for.body9.5.3: ; preds = %for.body9.5.3, %for.inc.5.2 + %i8 = phi i32** [ %incdec.ptr.5.3, %for.body9.5.3 ], [ %incdec.ptr.5.2, %for.inc.5.2 ] + %incdec.ptr.5.3 = getelementptr inbounds i32*, i32** %i8, i64 -1 + br i1 undef, label %for.body9.5.3, label %for.inc.5.3 + +for.inc.5.3: ; preds = %for.body9.5.3 + br label %for.body9.5.4 + +for.body9.5.4: ; preds = %for.body9.5.4, %for.inc.5.3 + %i9 = phi i32** [ %incdec.ptr.5.4, %for.body9.5.4 ], [ %incdec.ptr.5.3, %for.inc.5.3 ] + %incdec.ptr.5.4 = getelementptr inbounds i32*, i32** %i9, i64 -1 + br i1 undef, label %for.body9.5.4, label %for.inc.5.4 + +for.inc.5.4: ; preds = %for.body9.5.4 + br label %for.body9.5.5 + +for.body9.5.5: ; preds = %for.body9.5.5, %for.inc.5.4 + %i10 = phi i32** [ undef, %for.body9.5.5 ], [ %incdec.ptr.5.4, %for.inc.5.4 ] + %i11 = bitcast i32** %i10 to i64* + %i12 = load i64, i64* %i11, align 8 + br label %for.body9.5.5 + +for.body9.4.1: ; preds = %for.body9.4.1, %for.inc.4 + %i13 = phi i32** [ %incdec.ptr.4.1, %for.body9.4.1 ], [ %incdec.ptr.4, %for.inc.4 ] + %incdec.ptr.4.1 = getelementptr inbounds i32*, i32** %i13, i64 -1 + br i1 undef, label %for.body9.4.1, label %for.inc.4.1 + +for.inc.4.1: ; preds = %for.body9.4.1 + br label %for.body9.4.2 + +for.body9.4.2: ; preds = %for.body9.4.2, %for.inc.4.1 + %i14 = phi i32** [ %incdec.ptr.4.2, %for.body9.4.2 ], [ %incdec.ptr.4.1, %for.inc.4.1 ] + %incdec.ptr.4.2 = getelementptr inbounds i32*, i32** %i14, i64 -1 + br i1 undef, label %for.body9.4.2, label %for.inc.4.2 + +for.inc.4.2: ; preds = %for.body9.4.2 + br label %for.body9.4.3 + +for.body9.4.3: ; preds = %for.body9.4.3, %for.inc.4.2 + %i15 = phi i32** [ %incdec.ptr.4.3, %for.body9.4.3 ], [ %incdec.ptr.4.2, %for.inc.4.2 ] + %incdec.ptr.4.3 = getelementptr inbounds i32*, i32** %i15, i64 -1 + br i1 undef, label %for.body9.4.3, label %for.inc.4.3 + +for.inc.4.3: ; preds = %for.body9.4.3 + br label %for.body9.4.4 + +for.body9.4.4: ; preds = %for.body9.4.4, %for.inc.4.3 + %i16 = phi i32** [ %incdec.ptr.4.4, %for.body9.4.4 ], [ %incdec.ptr.4.3, %for.inc.4.3 ] + %incdec.ptr.4.4 = getelementptr inbounds i32*, i32** %i16, i64 -1 + br i1 undef, label %for.body9.4.4, label %for.inc.4.4 + +for.inc.4.4: ; preds = %for.body9.4.4 + br label %for.body9.4.5 + +for.body9.4.5: ; preds = %for.body9.4.5, %for.inc.4.4 + %i17 = phi i32** [ %incdec.ptr.4.5, %for.body9.4.5 ], [ %incdec.ptr.4.4, %for.inc.4.4 ] + %incdec.ptr.4.5 = getelementptr inbounds i32*, i32** %i17, i64 -1 + br i1 undef, label %for.body9.4.5, label %for.inc.4.5 + +for.inc.4.5: ; preds = %for.body9.4.5 + br label %for.body9.5 + +for.body9.3.1: ; preds = %for.body9.3.1, %for.inc.3 + %i18 = phi i32** [ %incdec.ptr.3.1, %for.body9.3.1 ], [ %incdec.ptr.3, %for.inc.3 ] + %incdec.ptr.3.1 = getelementptr inbounds i32*, i32** %i18, i64 -1 + br i1 undef, label %for.body9.3.1, label %for.inc.3.1 + +for.inc.3.1: ; preds = %for.body9.3.1 + br label %for.body9.3.2 + +for.body9.3.2: ; preds = %for.body9.3.2, %for.inc.3.1 + %i19 = phi i32** [ %incdec.ptr.3.2, %for.body9.3.2 ], [ %incdec.ptr.3.1, %for.inc.3.1 ] + %incdec.ptr.3.2 = getelementptr inbounds i32*, i32** %i19, i64 -1 + br i1 undef, label %for.body9.3.2, label %for.inc.3.2 + +for.inc.3.2: ; preds = %for.body9.3.2 + br label %for.body9.3.3 + +for.body9.3.3: ; preds = %for.body9.3.3, %for.inc.3.2 + %i20 = phi i32** [ %incdec.ptr.3.3, %for.body9.3.3 ], [ %incdec.ptr.3.2, %for.inc.3.2 ] + %incdec.ptr.3.3 = getelementptr inbounds i32*, i32** %i20, i64 -1 + br i1 undef, label %for.body9.3.3, label %for.inc.3.3 + +for.inc.3.3: ; preds = %for.body9.3.3 + br label %for.body9.3.4 + +for.body9.3.4: ; preds = %for.body9.3.4, %for.inc.3.3 + %i21 = phi i32** [ %incdec.ptr.3.4, %for.body9.3.4 ], [ %incdec.ptr.3.3, %for.inc.3.3 ] + %incdec.ptr.3.4 = getelementptr inbounds i32*, i32** %i21, i64 -1 + br i1 undef, label %for.body9.3.4, label %for.inc.3.4 + +for.inc.3.4: ; preds = %for.body9.3.4 + br label %for.body9.3.5 + +for.body9.3.5: ; preds = %for.body9.3.5, %for.inc.3.4 + %i22 = phi i32** [ %incdec.ptr.3.5, %for.body9.3.5 ], [ %incdec.ptr.3.4, %for.inc.3.4 ] + %incdec.ptr.3.5 = getelementptr inbounds i32*, i32** %i22, i64 -1 + br i1 undef, label %for.body9.3.5, label %for.inc.3.5 + +for.inc.3.5: ; preds = %for.body9.3.5 + br label %for.body9.4 + +for.body9.2.1: ; preds = %for.body9.2.1, %for.inc.2 + %i23 = phi i32** [ %incdec.ptr.2.1, %for.body9.2.1 ], [ %incdec.ptr.2, %for.inc.2 ] + %incdec.ptr.2.1 = getelementptr inbounds i32*, i32** %i23, i64 -1 + br i1 undef, label %for.body9.2.1, label %for.inc.2.1 + +for.inc.2.1: ; preds = %for.body9.2.1 + br label %for.body9.2.2 + +for.body9.2.2: ; preds = %for.body9.2.2, %for.inc.2.1 + %i24 = phi i32** [ %incdec.ptr.2.2, %for.body9.2.2 ], [ %incdec.ptr.2.1, %for.inc.2.1 ] + %incdec.ptr.2.2 = getelementptr inbounds i32*, i32** %i24, i64 -1 + br i1 undef, label %for.body9.2.2, label %for.inc.2.2 + +for.inc.2.2: ; preds = %for.body9.2.2 + br label %for.body9.2.3 + +for.body9.2.3: ; preds = %for.body9.2.3, %for.inc.2.2 + %i25 = phi i32** [ %incdec.ptr.2.3, %for.body9.2.3 ], [ %incdec.ptr.2.2, %for.inc.2.2 ] + %incdec.ptr.2.3 = getelementptr inbounds i32*, i32** %i25, i64 -1 + br i1 undef, label %for.body9.2.3, label %for.inc.2.3 + +for.inc.2.3: ; preds = %for.body9.2.3 + br label %for.body9.2.4 + +for.body9.2.4: ; preds = %for.body9.2.4, %for.inc.2.3 + %i26 = phi i32** [ %incdec.ptr.2.4, %for.body9.2.4 ], [ %incdec.ptr.2.3, %for.inc.2.3 ] + %incdec.ptr.2.4 = getelementptr inbounds i32*, i32** %i26, i64 -1 + br i1 undef, label %for.body9.2.4, label %for.inc.2.4 + +for.inc.2.4: ; preds = %for.body9.2.4 + br label %for.body9.2.5 + +for.body9.2.5: ; preds = %for.body9.2.5, %for.inc.2.4 + %i27 = phi i32** [ %incdec.ptr.2.5, %for.body9.2.5 ], [ %incdec.ptr.2.4, %for.inc.2.4 ] + %incdec.ptr.2.5 = getelementptr inbounds i32*, i32** %i27, i64 -1 + br i1 undef, label %for.body9.2.5, label %for.inc.2.5 + +for.inc.2.5: ; preds = %for.body9.2.5 + br label %for.body9.3 + +for.body9.1.1: ; preds = %for.body9.1.1, %for.inc.1 + %i28 = phi i32** [ %incdec.ptr.1.1, %for.body9.1.1 ], [ %incdec.ptr.1, %for.inc.1 ] + %incdec.ptr.1.1 = getelementptr inbounds i32*, i32** %i28, i64 -1 + br i1 undef, label %for.body9.1.1, label %for.inc.1.1 + +for.inc.1.1: ; preds = %for.body9.1.1 + br label %for.body9.1.2 + +for.body9.1.2: ; preds = %for.body9.1.2, %for.inc.1.1 + %i29 = phi i32** [ %incdec.ptr.1.2, %for.body9.1.2 ], [ %incdec.ptr.1.1, %for.inc.1.1 ] + %incdec.ptr.1.2 = getelementptr inbounds i32*, i32** %i29, i64 -1 + br i1 undef, label %for.body9.1.2, label %for.inc.1.2 + +for.inc.1.2: ; preds = %for.body9.1.2 + br label %for.body9.1.3 + +for.body9.1.3: ; preds = %for.body9.1.3, %for.inc.1.2 + %i30 = phi i32** [ %incdec.ptr.1.3, %for.body9.1.3 ], [ %incdec.ptr.1.2, %for.inc.1.2 ] + %incdec.ptr.1.3 = getelementptr inbounds i32*, i32** %i30, i64 -1 + br i1 undef, label %for.body9.1.3, label %for.inc.1.3 + +for.inc.1.3: ; preds = %for.body9.1.3 + br label %for.body9.1.4 + +for.body9.1.4: ; preds = %for.body9.1.4, %for.inc.1.3 + %i31 = phi i32** [ %incdec.ptr.1.4, %for.body9.1.4 ], [ %incdec.ptr.1.3, %for.inc.1.3 ] + %incdec.ptr.1.4 = getelementptr inbounds i32*, i32** %i31, i64 -1 + br i1 undef, label %for.body9.1.4, label %for.inc.1.4 + +for.inc.1.4: ; preds = %for.body9.1.4 + br label %for.body9.1.5 + +for.body9.1.5: ; preds = %for.body9.1.5, %for.inc.1.4 + %i32 = phi i32** [ %incdec.ptr.1.5, %for.body9.1.5 ], [ %incdec.ptr.1.4, %for.inc.1.4 ] + %incdec.ptr.1.5 = getelementptr inbounds i32*, i32** %i32, i64 -1 + br i1 undef, label %for.body9.1.5, label %for.inc.1.5 + +for.inc.1.5: ; preds = %for.body9.1.5 + br label %for.body9.2 + +for.body9.118: ; preds = %for.body9.118, %for.inc + %i33 = phi i32** [ %incdec.ptr, %for.inc ], [ %incdec.ptr.114, %for.body9.118 ] + %incdec.ptr.114 = getelementptr inbounds i32*, i32** %i33, i64 -1 + br i1 undef, label %for.body9.118, label %for.inc.119 + +for.inc.119: ; preds = %for.body9.118 + br label %for.body9.225 + +for.body9.225: ; preds = %for.body9.225, %for.inc.119 + %i34 = phi i32** [ %incdec.ptr.114, %for.inc.119 ], [ %incdec.ptr.221, %for.body9.225 ] + %incdec.ptr.221 = getelementptr inbounds i32*, i32** %i34, i64 -1 + %i35 = bitcast i32** %i34 to i64* + %i36 = load i64, i64* %i35, align 8 + br i1 undef, label %for.body9.225, label %for.inc.226 + +for.inc.226: ; preds = %for.body9.225 + br label %for.body9.332 + +for.body9.332: ; preds = %for.body9.332, %for.inc.226 + %i37 = phi i32** [ %incdec.ptr.221, %for.inc.226 ], [ %incdec.ptr.328, %for.body9.332 ] + %incdec.ptr.328 = getelementptr inbounds i32*, i32** %i37, i64 -1 + br i1 undef, label %for.body9.332, label %for.inc.333 + +for.inc.333: ; preds = %for.body9.332 + br label %for.body9.439 + +for.body9.439: ; preds = %for.body9.439, %for.inc.333 + %i38 = phi i32** [ %incdec.ptr.328, %for.inc.333 ], [ %incdec.ptr.435, %for.body9.439 ] + %incdec.ptr.435 = getelementptr inbounds i32*, i32** %i38, i64 -1 + br i1 undef, label %for.body9.439, label %for.inc.440 + +for.inc.440: ; preds = %for.body9.439 + br label %for.body9.546 + +for.body9.546: ; preds = %for.body9.546, %for.inc.440 + %i39 = phi i32** [ %incdec.ptr.435, %for.inc.440 ], [ %incdec.ptr.542, %for.body9.546 ] + %incdec.ptr.542 = getelementptr inbounds i32*, i32** %i39, i64 -1 + br i1 undef, label %for.body9.546, label %for.inc.547 + +for.inc.547: ; preds = %for.body9.546 + br label %for.body9.1 +} diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll index c2e1ae4f53a0f8..1a22d7f0b84983 100644 --- a/llvm/test/Transforms/PGOProfile/chr.ll +++ b/llvm/test/Transforms/PGOProfile/chr.ll @@ -2006,9 +2006,16 @@ define i64 @test_chr_22(i1 %i, i64* %j, i64 %v0) !prof !14 { ; CHECK-NEXT: bb0: ; CHECK-NEXT: [[REASS_ADD:%.*]] = shl i64 [[V0:%.*]], 1 ; CHECK-NEXT: [[V2:%.*]] = add i64 [[REASS_ADD]], 3 +; CHECK-NEXT: [[C1:%.*]] = icmp slt i64 [[V2]], 100 +; CHECK-NEXT: br i1 [[C1]], label [[BB0_SPLIT:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0.split: ; CHECK-NEXT: [[V299:%.*]] = mul i64 [[V2]], 7860086430977039991 ; CHECK-NEXT: store i64 [[V299]], i64* [[J:%.*]], align 4 ; CHECK-NEXT: ret i64 99 +; CHECK: bb0.split.nonchr: +; CHECK-NEXT: [[V299_NONCHR:%.*]] = mul i64 [[V2]], 7860086430977039991 +; CHECK-NEXT: store i64 [[V299_NONCHR]], i64* [[J]], align 4 +; CHECK-NEXT: ret i64 99 ; bb0: %v1 = add i64 %v0, 3 diff --git a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll index 1d8cce6879e9d6..314af1c1414548 100644 --- a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll +++ b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll @@ -5,14 +5,11 @@ ; RUN: opt -O3 -rotation-max-header-size=1 -S < %s | FileCheck %s --check-prefixes=HOIST,THR1,FALLBACK2 ; RUN: opt -passes='default' -rotation-max-header-size=1 -S < %s | FileCheck %s --check-prefixes=HOIST,THR1,FALLBACK3 -; RUN: opt -O3 -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefixes=HOIST,THR2,FALLBACK4 -; RUN: opt -passes='default' -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefixes=HOIST,THR2,FALLBACK5 +; RUN: opt -O3 -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_OLDPM,FALLBACK4 +; RUN: opt -passes='default' -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_NEWPM,FALLBACK5 -; RUN: opt -O3 -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_OLDPM,FALLBACK6 -; RUN: opt -passes='default' -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_NEWPM,FALLBACK7 - -; RUN: opt -O3 -rotation-max-header-size=4 -S < %s | FileCheck %s --check-prefixes=ROTATE,ROTATE_OLDPM,FALLBACK8 -; RUN: opt -passes='default' -rotation-max-header-size=4 -S < %s | FileCheck %s --check-prefixes=ROTATE,ROTATE_NEWPM,FALLBACK9 +; RUN: opt -O3 -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefixes=ROTATE,ROTATE_OLDPM,FALLBACK6 +; RUN: opt -passes='default' -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefixes=ROTATE,ROTATE_NEWPM,FALLBACK7 ; This example is produced from a very basic C code: ; @@ -61,8 +58,8 @@ define void @_Z4loopi(i32 %width) { ; HOIST-NEXT: br label [[FOR_COND:%.*]] ; HOIST: for.cond: ; HOIST-NEXT: [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ] -; HOIST-NEXT: tail call void @f0() ; HOIST-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[TMP0]] +; HOIST-NEXT: tail call void @f0() ; HOIST-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] ; HOIST: for.cond.cleanup: ; HOIST-NEXT: tail call void @f2() @@ -80,17 +77,17 @@ define void @_Z4loopi(i32 %width) { ; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_OLDPM: for.cond.preheader: ; ROTATED_LATER_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0 ; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] ; ROTATED_LATER_OLDPM: for.cond.cleanup: +; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: tail call void @f2() ; ROTATED_LATER_OLDPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_OLDPM: for.body: ; ROTATED_LATER_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] +; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: tail call void @f1() ; ROTATED_LATER_OLDPM-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 -; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] ; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATED_LATER_OLDPM: return: @@ -102,19 +99,19 @@ define void @_Z4loopi(i32 %width) { ; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_NEWPM: for.cond.preheader: ; ROTATED_LATER_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0 ; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE:%.*]] ; ROTATED_LATER_NEWPM: for.cond.preheader.for.body_crit_edge: ; ROTATED_LATER_NEWPM-NEXT: [[INC_1:%.*]] = add nuw i32 0, 1 ; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATED_LATER_NEWPM: for.cond.cleanup: +; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f2() ; ROTATED_LATER_NEWPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_NEWPM: for.body: ; ROTATED_LATER_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE]] ] -; ROTATED_LATER_NEWPM-NEXT: tail call void @f1() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() +; ROTATED_LATER_NEWPM-NEXT: tail call void @f1() ; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] ; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ; ROTATED_LATER_NEWPM: for.body.for.body_crit_edge: @@ -129,19 +126,19 @@ define void @_Z4loopi(i32 %width) { ; ROTATE_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATE_OLDPM: for.cond.preheader: ; ROTATE_OLDPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; ROTATE_OLDPM: for.body.preheader: ; ROTATE_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATE_OLDPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATE_OLDPM: for.cond.cleanup: +; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: tail call void @f2() ; ROTATE_OLDPM-NEXT: br label [[RETURN]] ; ROTATE_OLDPM: for.body: ; ROTATE_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: tail call void @f1() ; ROTATE_OLDPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 -; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] ; ROTATE_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATE_OLDPM: return: @@ -153,19 +150,19 @@ define void @_Z4loopi(i32 %width) { ; ROTATE_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATE_NEWPM: for.cond.preheader: ; ROTATE_NEWPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATE_NEWPM-NEXT: tail call void @f0() ; ROTATE_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; ROTATE_NEWPM: for.body.preheader: ; ROTATE_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATE_NEWPM-NEXT: [[INC_1:%.*]] = add nuw nsw i32 0, 1 ; ROTATE_NEWPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATE_NEWPM: for.cond.cleanup: +; ROTATE_NEWPM-NEXT: tail call void @f0() ; ROTATE_NEWPM-NEXT: tail call void @f2() ; ROTATE_NEWPM-NEXT: br label [[RETURN]] ; ROTATE_NEWPM: for.body: ; ROTATE_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ] -; ROTATE_NEWPM-NEXT: tail call void @f1() ; ROTATE_NEWPM-NEXT: tail call void @f0() +; ROTATE_NEWPM-NEXT: tail call void @f1() ; ROTATE_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] ; ROTATE_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ; ROTATE_NEWPM: for.body.for.body_crit_edge: diff --git a/llvm/test/Transforms/SCCP/intrinsics.ll b/llvm/test/Transforms/SCCP/intrinsics.ll index d06b94162b5be4..e261a59d3d6bcb 100644 --- a/llvm/test/Transforms/SCCP/intrinsics.ll +++ b/llvm/test/Transforms/SCCP/intrinsics.ll @@ -12,10 +12,8 @@ define void @abs1(i8* %p) { ; CHECK-LABEL: @abs1( ; CHECK-NEXT: [[X:%.*]] = load i8, i8* [[P:%.*]], align 1, [[RNG0:!range !.*]] ; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X]], i1 false) -; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i8 [[ABS]], 0 -; CHECK-NEXT: call void @use(i1 [[CMP1]]) -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i8 [[ABS]], 10 -; CHECK-NEXT: call void @use(i1 [[CMP2]]) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[CMP3:%.*]] = icmp sge i8 [[ABS]], 1 ; CHECK-NEXT: call void @use(i1 [[CMP3]]) ; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i8 [[ABS]], 9 @@ -40,8 +38,7 @@ define void @abs1(i8* %p) { define void @abs2(i8 %x) { ; CHECK-LABEL: @abs2( ; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 true) -; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[ABS]], 0 -; CHECK-NEXT: call void @use(i1 [[CMP]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: ret void ; %abs = call i8 @llvm.abs.i8(i8 %x, i1 true) @@ -68,10 +65,8 @@ define void @umax1(i8* %p1, i8* %p2) { ; CHECK-NEXT: [[X1:%.*]] = load i8, i8* [[P1:%.*]], align 1, [[RNG1:!range !.*]] ; CHECK-NEXT: [[X2:%.*]] = load i8, i8* [[P2:%.*]], align 1, [[RNG2:!range !.*]] ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X1]], i8 [[X2]]) -; CHECK-NEXT: [[CMP1:%.*]] = icmp uge i8 [[M]], 5 -; CHECK-NEXT: call void @use(i1 [[CMP1]]) -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[M]], 15 -; CHECK-NEXT: call void @use(i1 [[CMP2]]) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[CMP3:%.*]] = icmp uge i8 [[M]], 6 ; CHECK-NEXT: call void @use(i1 [[CMP3]]) ; CHECK-NEXT: [[CMP4:%.*]] = icmp ult i8 [[M]], 14 @@ -95,8 +90,7 @@ define void @umax1(i8* %p1, i8* %p2) { define void @umax2(i8 %x) { ; CHECK-LABEL: @umax2( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 10) -; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[M]], 10 -; CHECK-NEXT: call void @use(i1 [[CMP]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: ret void ; %m = call i8 @llvm.umax.i8(i8 %x, i8 10) diff --git a/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll b/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll index b58017ba7ef0b9..37cbc4640e4153 100644 --- a/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll +++ b/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -simplifycfg -hoist-common-insts=1 -S < %s | FileCheck %s --check-prefixes=HOIST ; RUN: opt -simplifycfg -hoist-common-insts=0 -S < %s | FileCheck %s --check-prefixes=NOHOIST -; RUN: opt -simplifycfg -S < %s | FileCheck %s --check-prefixes=HOIST,DEFAULT +; RUN: opt -simplifycfg -S < %s | FileCheck %s --check-prefixes=NOHOIST,DEFAULT ; This example is produced from a very basic C code: ; diff --git a/llvm/test/tools/llvm-cov/gcov-4.7.c b/llvm/test/tools/llvm-cov/gcov-4.7.c index d92953a6b0b65f..211c635f51283d 100644 --- a/llvm/test/tools/llvm-cov/gcov-4.7.c +++ b/llvm/test/tools/llvm-cov/gcov-4.7.c @@ -1,27 +1,25 @@ /// Test that llvm-cov supports gcov [4.7,8) compatible format. #include #include -int main() { // GCOV: #####: [[@LINE]]:int main - double a[11], result; // GCOV-NEXT: -: [[@LINE]]: - for (int i = 0; i < 11; i++) // GCOV-NEXT: #####: [[@LINE]]: +int main() { // GCOV: 1: [[@LINE]]:int main + double a[11], result; // GCOV-NEXT: -: [[@LINE]]: + for (int i = 0; i < 11; i++) // GCOV-NEXT: 12: [[@LINE]]: scanf("%lf", &a[i]); // GCOV-NEXT: 11: [[@LINE]]: - for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 4: [[@LINE]]: + for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 12: [[@LINE]]: result = sqrt(fabs(a[i])) + 5 * pow(a[i], 3); // GCOV-NEXT: 11: [[@LINE]]: printf("\nf(%lf) = "); // GCOV-NEXT: 11: [[@LINE]]: - if (result > 400) printf("Overflow!"); // GCOV-NEXT: #####: [[@LINE]]: - else printf("%lf", result); // GCOV-NEXT: 4: [[@LINE]]: - } // GCOV-NEXT: -: [[@LINE]]: - return 0; // GCOV-NEXT: #####: [[@LINE]]: -} // GCOV-NEXT: -: [[@LINE]]: -/// FIXME several lines do not match gcov 7 + if (result > 400) printf("Overflow!"); // GCOV-NEXT: 11: [[@LINE]]: + else printf("%lf", result); // GCOV-NEXT: 4: [[@LINE]]: + } // GCOV-NEXT: -: [[@LINE]]: + return 0; // GCOV-NEXT: 1: [[@LINE]]: +} // GCOV-NEXT: -: [[@LINE]]: // RUN: rm -rf %t && mkdir %t && cd %t // RUN: cp %s %p/Inputs/gcov-4.7.gc* . -/// FIXME Lines executed:100.00% of 12 // RUN: llvm-cov gcov gcov-4.7.c | FileCheck %s // CHECK: File 'gcov-4.7.c' -// CHECK-NEXT: Lines executed:55.56% of 9 +// CHECK-NEXT: Lines executed:100.00% of 9 // CHECK-NEXT: Creating 'gcov-4.7.c.gcov' // RUN: FileCheck --input-file=%t/gcov-4.7.c.gcov --check-prefix=HEADER %s diff --git a/llvm/test/tools/llvm-cov/gcov-8.c b/llvm/test/tools/llvm-cov/gcov-8.c index eef3511e93a7c9..996e4cbe71b33d 100644 --- a/llvm/test/tools/llvm-cov/gcov-8.c +++ b/llvm/test/tools/llvm-cov/gcov-8.c @@ -1,29 +1,27 @@ /// Test that llvm-cov supports gcov 8 compatible format. #include #include -int main() { // GCOV: 1: [[@LINE]]:int main - double a[11], result; // GCOV-NEXT: -: [[@LINE]]: +int main() { // GCOV: 1: [[@LINE]]:int main + double a[11], result; // GCOV-NEXT: -: [[@LINE]]: for (int i = 0; i < 11; i++) // GCOV-NEXT: 12: [[@LINE]]: scanf("%lf", &a[i]); // GCOV-NEXT: 11: [[@LINE]]: - for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 7: [[@LINE]]: + for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 12: [[@LINE]]: result = sqrt(fabs(a[i])) + 5 * pow(a[i], 3); // GCOV-NEXT: 11: [[@LINE]]: printf("\nf(%lf) = "); // GCOV-NEXT: 11: [[@LINE]]: if (result > 400) printf("Overflow!"); // GCOV-NEXT: 11: [[@LINE]]: - else printf("%lf", result); // GCOV-NEXT: #####: [[@LINE]]: - } // GCOV-NEXT: -: [[@LINE]]: - return 0; // GCOV-NEXT: #####: [[@LINE]]: -} // GCOV-NEXT: -: [[@LINE]]: -/// FIXME several lines do not match gcov 8 + else printf("%lf", result); // GCOV-NEXT: 4: [[@LINE]]: + } // GCOV-NEXT: -: [[@LINE]]: + return 0; // GCOV-NEXT: 1: [[@LINE]]: +} // GCOV-NEXT: -: [[@LINE]]: // RUN: rm -rf %t && mkdir %t && cd %t // RUN: cp %s %p/Inputs/gcov-8.gc* . -/// FIXME Lines executed:100.00% of 12 // RUN: llvm-cov gcov gcov-8.c | FileCheck %s --check-prefixes=OUT,OUTFILE // OUT: File 'gcov-8.c' -// OUT-NEXT: Lines executed:77.78% of 9 +// OUT-NEXT: Lines executed:100.00% of 9 // OUT-B-NEXT: Branches executed:85.71% of 14 -// OUT-B-NEXT: Taken at least once:42.86% of 14 +// OUT-B-NEXT: Taken at least once:71.43% of 14 // OUT-B-NEXT: No calls // OUTFILE-NEXT: Creating 'gcov-8.c.gcov' // OUT-EMPTY: @@ -51,23 +49,23 @@ int main() { // GCOV: 1: [[@LINE]]:int // I-NEXT:lcount:4,1 // I-NEXT:lcount:6,12 // I-B-NEXT:branch:6,taken -// I-B-NEXT:branch:6,nottaken +// I-B-NEXT:branch:6,taken // I-NEXT:lcount:7,11 // I-B-NEXT:branch:7,taken // I-B-NEXT:branch:7,nottaken -// I-NEXT:lcount:8,7 +// I-NEXT:lcount:8,12 +// I-B-NEXT:branch:8,taken // I-B-NEXT:branch:8,taken -// I-B-NEXT:branch:8,nottaken // I-NEXT:lcount:9,11 // I-NEXT:lcount:10,11 // I-B-NEXT:branch:10,taken // I-B-NEXT:branch:10,nottaken // I-NEXT:lcount:11,11 // I-B-NEXT:branch:11,taken -// I-B-NEXT:branch:11,nottaken +// I-B-NEXT:branch:11,taken // I-B-NEXT:branch:11,taken // I-B-NEXT:branch:11,nottaken -// I-NEXT:lcount:12,0 +// I-NEXT:lcount:12,4 // I-B-NEXT:branch:12,notexec // I-B-NEXT:branch:12,notexec -// I-NEXT:lcount:14,0 +// I-NEXT:lcount:14,1 diff --git a/llvm/test/tools/llvm-cov/gcov-9.c b/llvm/test/tools/llvm-cov/gcov-9.c index 335e6c0663dbef..a2e9cf47497363 100644 --- a/llvm/test/tools/llvm-cov/gcov-9.c +++ b/llvm/test/tools/llvm-cov/gcov-9.c @@ -1,27 +1,25 @@ /// Test that llvm-cov supports gcov 9 compatible format. #include #include -int main() { // GCOV: 1: [[@LINE]]:int main - double a[11], result; // GCOV-NEXT: -: [[@LINE]]: +int main() { // GCOV: 1: [[@LINE]]:int main + double a[11], result; // GCOV-NEXT: -: [[@LINE]]: for (int i = 0; i < 11; i++) // GCOV-NEXT: 12: [[@LINE]]: scanf("%lf", &a[i]); // GCOV-NEXT: 11: [[@LINE]]: - for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 7: [[@LINE]]: + for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 12: [[@LINE]]: result = sqrt(fabs(a[i])) + 5 * pow(a[i], 3); // GCOV-NEXT: 11: [[@LINE]]: printf("\nf(%lf) = "); // GCOV-NEXT: 11: [[@LINE]]: if (result > 400) printf("Overflow!"); // GCOV-NEXT: 11: [[@LINE]]: - else printf("%lf", result); // GCOV-NEXT: #####: [[@LINE]]: - } // GCOV-NEXT: -: [[@LINE]]: - return 0; // GCOV-NEXT: #####: [[@LINE]]: -} // GCOV-NEXT: -: [[@LINE]]: -/// FIXME several lines do not match gcov 9 + else printf("%lf", result); // GCOV-NEXT: 4: [[@LINE]]: + } // GCOV-NEXT: -: [[@LINE]]: + return 0; // GCOV-NEXT: 1: [[@LINE]]: +} // GCOV-NEXT: -: [[@LINE]]: // RUN: rm -rf %t && mkdir %t && cd %t // RUN: cp %s %p/Inputs/gcov-9.gc* . -/// FIXME Lines executed:100.00% of 12 // RUN: llvm-cov gcov gcov-9.c | FileCheck %s // CHECK: File 'gcov-9.c' -// CHECK-NEXT: Lines executed:77.78% of 9 +// CHECK-NEXT: Lines executed:100.00% of 9 // CHECK-NEXT: Creating 'gcov-9.c.gcov' // RUN: FileCheck --input-file=%t/gcov-9.c.gcov --check-prefix=HEADER %s diff --git a/llvm/test/tools/llvm-dwarfdump/X86/lookup.s b/llvm/test/tools/llvm-dwarfdump/X86/lookup.s index 74f3314a4f4ec1..fed2271f70a065 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/lookup.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/lookup.s @@ -37,9 +37,9 @@ # LEX: DW_AT_low_pc (0x0000000000000004) # LEX: DW_AT_high_pc (0x0000000000000014) -# A: Line info: file 'foo.c', line 3, column 9, start line 1 -# B: Line info: file 'foo.c', line 4, column 6, start line 1 -# C: Line info: file 'foo.c', line 6, column 1, start line 1 +# A: Line info: file 'foo.c', line 3, column 9, start file 'foo.c', start line 1 +# B: Line info: file 'foo.c', line 4, column 6, start file 'foo.c', start line 1 +# C: Line info: file 'foo.c', line 6, column 1, start file 'foo.c', start line 1 .section __TEXT,__text,regular,pure_instructions .macosx_version_min 10, 13 diff --git a/llvm/test/tools/llvm-ml/named_bitwise_operators.test b/llvm/test/tools/llvm-ml/named_bitwise_operators.test new file mode 100644 index 00000000000000..f122dbe842d0f7 --- /dev/null +++ b/llvm/test/tools/llvm-ml/named_bitwise_operators.test @@ -0,0 +1,20 @@ +; RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.data + +t1 BYTE NOT 1 +; CHECK: t1: +; CHECK-NEXT: .byte -2 + +t2 BYTE 1 OR 2 +; CHECK: t2: +; CHECK-NEXT: .byte 3 + +t3 BYTE 6 AND 10 +; CHECK: t3: +; CHECK-NEXT: .byte 2 + +.code +xor eax, eax + +END diff --git a/llvm/test/tools/llvm-ml/struct.test b/llvm/test/tools/llvm-ml/struct.test index fa85ecd455dda5..38fc763fc7e1f4 100644 --- a/llvm/test/tools/llvm-ml/struct.test +++ b/llvm/test/tools/llvm-ml/struct.test @@ -34,11 +34,9 @@ t1 foobar <> ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 2 ; -; , with internal alignment padding +; , with no alignment padding (field size < alignment) ; CHECK-NEXT: .byte 6 -; CHECK-NEXT: .zero 1 ; CHECK-NEXT: .byte 7 -; CHECK-NEXT: .zero 1 ; ; BYTE "abcde", plus alignment padding ; CHECK-NEXT: .byte 97 @@ -65,11 +63,9 @@ t2 FOOBAR <"gh",,<10,11>,<12>,"ijk"> ; CHECK-NEXT: .byte 10 ; CHECK-NEXT: .byte 11 ; -; , with internal alignment padding +; , with no alignment padding (field size < alignment) ; CHECK-NEXT: .byte 12 -; CHECK-NEXT: .zero 1 ; CHECK-NEXT: .byte 7 -; CHECK-NEXT: .zero 1 ; ; BYTE "ijk", padded with " ", plus alignment padding ; CHECK-NEXT: .byte 105 @@ -87,16 +83,16 @@ mov eax, [t2].f.h mov eax, [t2.f.h] ; CHECK: t3: -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] t4: mov eax, j.FOOBAR.f.h mov eax, j.baz.b ; CHECK: t4: -; CHECK-NEXT: mov eax, dword ptr [rip + j+12] +; CHECK-NEXT: mov eax, dword ptr [rip + j+11] ; CHECK-NEXT: mov eax, dword ptr [rip + j+1] t5: @@ -105,9 +101,9 @@ mov eax, [ebx.FOOBAR].f.h mov eax, [ebx.FOOBAR.f.h] ; CHECK: t5: -; CHECK-NEXT: mov eax, dword ptr [ebx + 12] -; CHECK-NEXT: mov eax, dword ptr [ebx + 12] -; CHECK-NEXT: mov eax, dword ptr [ebx + 12] +; CHECK-NEXT: mov eax, dword ptr [ebx + 11] +; CHECK-NEXT: mov eax, dword ptr [ebx + 11] +; CHECK-NEXT: mov eax, dword ptr [ebx + 11] t6: mov eax, t2.FOOBAR.f.h @@ -116,10 +112,10 @@ mov eax, [t2.FOOBAR].f.h mov eax, [t2.FOOBAR.f.h] ; CHECK: t6: -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] t7: mov eax, [ebx].FOOBAR.e.b @@ -185,7 +181,7 @@ mov eax, FOOBAR.f.h ; CHECK: t10: ; CHECK-NEXT: mov eax, 10 -; CHECK-NEXT: mov eax, 12 +; CHECK-NEXT: mov eax, 11 t11: mov eax, (FOOBAR PTR [ebx]).f diff --git a/llvm/test/tools/llvm-ml/variable.test b/llvm/test/tools/llvm-ml/variable.test new file mode 100644 index 00000000000000..4e89d67bd59dd7 --- /dev/null +++ b/llvm/test/tools/llvm-ml/variable.test @@ -0,0 +1,13 @@ +# RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.data +t1_value equ 1 or 2 + +t1 BYTE t1_value DUP (0) +; CHECK: t1: +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 0 +; CHECK-NOT: .byte 0 + +END diff --git a/llvm/test/tools/llvm-objcopy/ELF/objcopy-version.test b/llvm/test/tools/llvm-objcopy/ELF/objcopy-version.test deleted file mode 100644 index 7494ccd2866d34..00000000000000 --- a/llvm/test/tools/llvm-objcopy/ELF/objcopy-version.test +++ /dev/null @@ -1,4 +0,0 @@ -# RUN: llvm-objcopy --version | FileCheck %s -# RUN: llvm-objcopy -V | FileCheck %s - -# CHECK: {{ version }} diff --git a/llvm/test/tools/llvm-objcopy/ELF/strip-version.test b/llvm/test/tools/llvm-objcopy/ELF/strip-version.test deleted file mode 100644 index 4b2f137ce2aad2..00000000000000 --- a/llvm/test/tools/llvm-objcopy/ELF/strip-version.test +++ /dev/null @@ -1,5 +0,0 @@ -# RUN: llvm-strip --version | FileCheck %s -# RUN: llvm-strip -V | FileCheck %s - -# CHECK-DAG: {{ version }} -# CHECK-DAG: GNU strip diff --git a/llvm/test/tools/llvm-objcopy/MachO/install-name-tool-version.test b/llvm/test/tools/llvm-objcopy/MachO/install-name-tool-version.test deleted file mode 100644 index 295e5735610122..00000000000000 --- a/llvm/test/tools/llvm-objcopy/MachO/install-name-tool-version.test +++ /dev/null @@ -1,2 +0,0 @@ -# RUN: llvm-install-name-tool --version | FileCheck %s -# CHECK: {{ version }} diff --git a/llvm/test/tools/llvm-objcopy/tool-help-message.test b/llvm/test/tools/llvm-objcopy/tool-help-message.test index 1a0712b7a7ce5a..3f99d910ee97e6 100644 --- a/llvm/test/tools/llvm-objcopy/tool-help-message.test +++ b/llvm/test/tools/llvm-objcopy/tool-help-message.test @@ -18,6 +18,7 @@ # RUN: not llvm-install-name-tool -abcabc 2>&1 | FileCheck --check-prefix=UNKNOWN-ARG %s # RUN: not llvm-install-name-tool --abcabc 2>&1 | FileCheck --check-prefix=UNKNOWN-ARG %s # RUN: not llvm-install-name-tool -add_rpath @executable 2>&1 | FileCheck %s --check-prefix=NO-INPUT-FILES +# RUN: not llvm-install-name-tool -add_rpath @executable f1 f2 2>&1 | FileCheck %s --check-prefix=MULTIPLE-INPUT-FILES # OBJCOPY-USAGE: USAGE: llvm-objcopy [options] input [output] # OBJCOPY-USAGE: Pass @FILE as argument to read options from FILE. @@ -30,3 +31,4 @@ # UNKNOWN-ARG: unknown argument '{{-+}}abcabc' # NO-INPUT-FILES: no input file specified +# MULTIPLE-INPUT-FILES: expects a single input file diff --git a/llvm/test/tools/llvm-objcopy/tool-version.test b/llvm/test/tools/llvm-objcopy/tool-version.test new file mode 100644 index 00000000000000..5fe33eb8e7173b --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/tool-version.test @@ -0,0 +1,15 @@ +# RUN: llvm-objcopy --version | FileCheck --check-prefix=OBJCOPY %s +# RUN: llvm-objcopy -V | FileCheck --check-prefix=OBJCOPY %s + +# RUN: llvm-strip --version | FileCheck --check-prefix=STRIP %s +# RUN: llvm-strip -V | FileCheck --check-prefix=STRIP %s + +# RUN: llvm-install-name-tool --version | FileCheck %s + +# OBJCOPY-DAG: {{ version }} +# OBJCOPY-DAG: GNU objcopy + +# STRIP-DAG: {{ version }} +# STRIP-DAG: GNU strip + +# CHECK: {{ version }} diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s new file mode 100644 index 00000000000000..eee3fd4b7103e3 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s @@ -0,0 +1,37 @@ +;; Failure test. We create a malformed kernel descriptor (KD) by manually +;; setting the bytes, because one can't create a malformed KD using the +;; assembler directives. + +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t.o + +; RUN: printf ".type my_kernel.kd, @object \nmy_kernel.kd:\n.size my_kernel.kd, 64\n" > %t1.sym_info +; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t.o \ +; RUN: | tail -n +9 > %t1.sym_content +; RUN: cat %t1.sym_info %t1.sym_content > %t1.s + +; RUN: llvm-mc %t1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t-re-assemble.o +; RUN: diff %t.o %t-re-assemble.o + +;; Test failure by setting one of the reserved bytes to non-zero value. + +.type my_kernel.kd, @object +.size my_kernel.kd, 64 +my_kernel.kd: + .long 0x00000000 ;; group_segment_fixed_size + .long 0x00000000 ;; private_segment_fixed_size + .quad 0x00FF000000000000 ;; reserved bytes. + .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. + + ;; 20 reserved bytes. + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .long 0x00000000 + + .long 0x00000000 ;; compute_PGM_RSRC3 + .long 0x00000000 ;; compute_PGM_RSRC1 + .long 0x00000000 ;; compute_PGM_RSRC2 + .short 0x0000 ;; additional fields. + + ;; 6 reserved bytes. + .long 0x0000000 + .short 0x0000 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s new file mode 100644 index 00000000000000..0b798a298d398e --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s @@ -0,0 +1,49 @@ +;; Test disassembly for GRANULATED_WAVEFRONT_SGPR_COUNT in the kernel descriptor. + +; RUN: split-file %s %t.dir + +; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble +; RUN: diff %t1 %t1-re-assemble + +; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble +; RUN: diff %t2 %t2-re-assemble + +; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble +; RUN: diff %t3 %t3-re-assemble + + +;--- 1.s +;; Only set next_free_sgpr. +.amdhsa_kernel my_kernel_1 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 42 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 0 + .amdhsa_reserve_vcc 0 +.end_amdhsa_kernel + +;--- 2.s +;; Only set other directives. +.amdhsa_kernel my_kernel_2 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_reserve_flat_scratch 1 + .amdhsa_reserve_xnack_mask 1 + .amdhsa_reserve_vcc 1 +.end_amdhsa_kernel + +;--- 3.s +;; Set all affecting directives. +.amdhsa_kernel my_kernel_3 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 35 + .amdhsa_reserve_flat_scratch 1 + .amdhsa_reserve_xnack_mask 1 + .amdhsa_reserve_vcc 1 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s new file mode 100644 index 00000000000000..a8883d2f74be70 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s @@ -0,0 +1,36 @@ +;; Test disassembly for GRANULATED_WORKITEM_VGPR_COUNT in the kernel descriptor. + +; RUN: split-file %s %t.dir + +; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble +; RUN: diff %t1 %t1-re-assemble + +; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble +; RUN: diff %t2 %t2-re-assemble + +; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble +; RUN: diff %t3 %t3-re-assemble + +;--- 1.s +.amdhsa_kernel my_kernel_1 + .amdhsa_next_free_vgpr 23 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +;--- 2.s +.amdhsa_kernel my_kernel_2 + .amdhsa_next_free_vgpr 14 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +;--- 3.s +.amdhsa_kernel my_kernel_3 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s new file mode 100644 index 00000000000000..803507a130c03e --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s @@ -0,0 +1,58 @@ +;; Entirely zeroed kernel descriptor (for GFX10). + +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj -o %t +; RUN: llvm-objdump -s -j .text %t | FileCheck --check-prefix=OBJDUMP %s + +;; TODO: +;; This file and kd-zeroed-raw.s should produce the same output for the kernel +;; descriptor - a block of 64 zeroed bytes. But looks like the assembler sets +;; the FWD_PROGRESS bit in COMPUTE_PGM_RSRC1 to 1 even when the directive +;; mentions 0 (see line 36). + +;; Check the raw bytes right now. + +; OBJDUMP: 0000 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0030 01000000 00000000 00000000 00000000 + +.amdhsa_kernel my_kernel + .amdhsa_group_segment_fixed_size 0 + .amdhsa_private_segment_fixed_size 0 + .amdhsa_next_free_vgpr 8 + .amdhsa_reserve_vcc 0 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 0 + .amdhsa_next_free_sgpr 8 + .amdhsa_float_round_mode_32 0 + .amdhsa_float_round_mode_16_64 0 + .amdhsa_float_denorm_mode_32 0 + .amdhsa_float_denorm_mode_16_64 0 + .amdhsa_dx10_clamp 0 + .amdhsa_ieee_mode 0 + .amdhsa_fp16_overflow 0 + .amdhsa_workgroup_processor_mode 0 + .amdhsa_memory_ordered 0 + .amdhsa_forward_progress 0 + .amdhsa_system_sgpr_private_segment_wavefront_offset 0 + .amdhsa_system_sgpr_workgroup_id_x 0 + .amdhsa_system_sgpr_workgroup_id_y 0 + .amdhsa_system_sgpr_workgroup_id_z 0 + .amdhsa_system_sgpr_workgroup_info 0 + .amdhsa_system_vgpr_workitem_id 0 + .amdhsa_exception_fp_ieee_invalid_op 0 + .amdhsa_exception_fp_denorm_src 0 + .amdhsa_exception_fp_ieee_div_zero 0 + .amdhsa_exception_fp_ieee_overflow 0 + .amdhsa_exception_fp_ieee_underflow 0 + .amdhsa_exception_fp_ieee_inexact 0 + .amdhsa_exception_int_div_zero 0 + .amdhsa_user_sgpr_private_segment_buffer 0 + .amdhsa_user_sgpr_dispatch_ptr 0 + .amdhsa_user_sgpr_queue_ptr 0 + .amdhsa_user_sgpr_kernarg_segment_ptr 0 + .amdhsa_user_sgpr_dispatch_id 0 + .amdhsa_user_sgpr_flat_scratch_init 0 + .amdhsa_user_sgpr_private_segment_size 0 + .amdhsa_wavefront_size32 0 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s new file mode 100644 index 00000000000000..de4fdf74d88e09 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s @@ -0,0 +1,53 @@ +;; Entirely zeroed kernel descriptor (for GFX9). + +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ +; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: diff %t1 %t2 + +; RUN: llvm-objdump -s -j .text %t1 | FileCheck --check-prefix=OBJDUMP %s + +; OBJDUMP: 0000 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 + +;; This file and kd-zeroed-raw.s produce the same output for the kernel +;; descriptor - a block of 64 zeroed bytes. + +.amdhsa_kernel my_kernel + .amdhsa_group_segment_fixed_size 0 + .amdhsa_private_segment_fixed_size 0 + .amdhsa_next_free_vgpr 0 + .amdhsa_reserve_vcc 0 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_float_round_mode_32 0 + .amdhsa_float_round_mode_16_64 0 + .amdhsa_float_denorm_mode_32 0 + .amdhsa_float_denorm_mode_16_64 0 + .amdhsa_dx10_clamp 0 + .amdhsa_ieee_mode 0 + .amdhsa_fp16_overflow 0 + .amdhsa_system_sgpr_private_segment_wavefront_offset 0 + .amdhsa_system_sgpr_workgroup_id_x 0 + .amdhsa_system_sgpr_workgroup_id_y 0 + .amdhsa_system_sgpr_workgroup_id_z 0 + .amdhsa_system_sgpr_workgroup_info 0 + .amdhsa_system_vgpr_workitem_id 0 + .amdhsa_exception_fp_ieee_invalid_op 0 + .amdhsa_exception_fp_denorm_src 0 + .amdhsa_exception_fp_ieee_div_zero 0 + .amdhsa_exception_fp_ieee_overflow 0 + .amdhsa_exception_fp_ieee_underflow 0 + .amdhsa_exception_fp_ieee_inexact 0 + .amdhsa_exception_int_div_zero 0 + .amdhsa_user_sgpr_private_segment_buffer 0 + .amdhsa_user_sgpr_dispatch_ptr 0 + .amdhsa_user_sgpr_queue_ptr 0 + .amdhsa_user_sgpr_kernarg_segment_ptr 0 + .amdhsa_user_sgpr_dispatch_id 0 + .amdhsa_user_sgpr_flat_scratch_init 0 + .amdhsa_user_sgpr_private_segment_size 0 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s new file mode 100644 index 00000000000000..85554209d5d8fb --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s @@ -0,0 +1,41 @@ +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ +; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: llvm-objdump -s -j .text %t2 | FileCheck --check-prefix=OBJDUMP %s + +;; Not running lit-test over gfx10 (see kd-zeroed-gfx10.s for details). +;; kd-zeroed-raw.s and kd-zeroed-*.s should produce the same output for the +;; kernel descriptor - a block of 64 zeroed bytes. + +;; The disassembly will produce the contents of kd-zeroed-*.s which on being +;; assembled contains additional relocation info. A diff over the entire object +;; will fail in this case. So we check by looking the bytes in .text. + +; OBJDUMP: 0000 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 + +;; The entire object is zeroed out. + +.type my_kernel.kd, @object +.size my_kernel.kd, 64 +my_kernel.kd: + .long 0x00000000 ;; group_segment_fixed_size + .long 0x00000000 ;; private_segment_fixed_size + .quad 0x0000000000000000 ;; reserved bytes. + .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. + + ;; 20 reserved bytes. + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .long 0x00000000 + + .long 0x00000000 ;; compute_PGM_RSRC3 + .long 0x00000000 ;; compute_PGM_RSRC1 + .long 0x00000000 ;; compute_PGM_RSRC2 + .short 0x0000 ;; additional fields. + + ;; 6 reserved bytes. + .long 0x0000000 + .short 0x0000 diff --git a/llvm/test/tools/llvm-symbolizer/sym-verbose.test b/llvm/test/tools/llvm-symbolizer/sym-verbose.test index c12eb3b530e1bd..15292903790938 100644 --- a/llvm/test/tools/llvm-symbolizer/sym-verbose.test +++ b/llvm/test/tools/llvm-symbolizer/sym-verbose.test @@ -18,11 +18,13 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x400590 #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 7 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -30,12 +32,14 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005a5 #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 17 #CHECK-NEXT: Discriminator: 2 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -43,12 +47,14 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005ad #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 0 #CHECK-NEXT: Column: 30 #CHECK-NEXT: Discriminator: 4 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -56,11 +62,13 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005b9 #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 7 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -69,12 +77,14 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005ce #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 17 #CHECK-NEXT: Discriminator: 2 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -83,12 +93,14 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005d4 #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 30 #CHECK-NEXT: Discriminator: 4 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 diff --git a/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml index e058642877243b..76c1c5c1b36505 100644 --- a/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml +++ b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml @@ -99,3 +99,27 @@ FileHeader: Type: ET_EXEC DWARF: debug_str: [] + +## d) Test that yaml2obj stops parsing the .debug_str section if it encounters a +## string without a null terminator. The output uses a raw content section instead of +## the DWARF tag to represent the broken .debug_str section. + +# RUN: yaml2obj --docnum=3 %s | obj2yaml | FileCheck %s --check-prefix=NO-TERMINATOR + +# NO-TERMINATOR-NOT: DWARF: +# NO-TERMINATOR: Sections: +# NO-TERMINATOR-NEXT: - Name: .debug_str +# NO-TERMINATOR-NEXT: Type: SHT_PROGBITS +# NO-TERMINATOR-NEXT: Flags: [ SHF_MERGE, SHF_STRINGS ] +# NO-TERMINATOR-NEXT: Content: '61626300616263' +# NO-TERMINATOR-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .debug_str + Type: SHT_PROGBITS + Content: "61626300616263" ## "abc\0abc" diff --git a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-ranges.yaml b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-ranges.yaml index 6a9cd7a6195e7f..f80dd6de53689c 100644 --- a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-ranges.yaml +++ b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-ranges.yaml @@ -407,3 +407,17 @@ DWARF: Entries: - LowOffset: 0x1234 HighOffset: 0x5678 + +## l) Test that the .debug_ranges section header is emitted if the "debug_ranges" +## entry is empty. + +# RUN: yaml2obj --docnum=12 %s -o %t12.o +# RUN: llvm-readobj -S %t12.o | FileCheck -DSIZE=0 -DADDRALIGN=1 %s --check-prefix=DWARF-HEADER + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +DWARF: + debug_ranges: [] diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index b63d08b90ff51a..46ed7414dbb31e 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1854,23 +1854,6 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, outs() << SectionName << ":\n"; } - if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { - if (Symbols[SI].Type == ELF::STT_AMDGPU_HSA_KERNEL) { - // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes) - Start += 256; - } - if (SI == SE - 1 || - Symbols[SI + 1].Type == ELF::STT_AMDGPU_HSA_KERNEL) { - // cut trailing zeroes at the end of kernel - // cut up to 256 bytes - const uint64_t EndAlign = 256; - const auto Limit = End - (std::min)(EndAlign, End - Start); - while (End > Limit && - *reinterpret_cast(&Bytes[End - 4]) == 0) - End -= 4; - } - } - outs() << '\n'; if (!NoLeadingAddr) outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", diff --git a/llvm/tools/obj2yaml/dwarf2yaml.cpp b/llvm/tools/obj2yaml/dwarf2yaml.cpp index 513fa0fdef01df..1dcf6d42d6ada7 100644 --- a/llvm/tools/obj2yaml/dwarf2yaml.cpp +++ b/llvm/tools/obj2yaml/dwarf2yaml.cpp @@ -46,14 +46,20 @@ void dumpDebugAbbrev(DWARFContext &DCtx, DWARFYAML::Data &Y) { } } -void dumpDebugStrings(DWARFContext &DCtx, DWARFYAML::Data &Y) { - StringRef RemainingTable = DCtx.getDWARFObj().getStrSection(); - Y.DebugStrings.emplace(); - while (RemainingTable.size() > 0) { - auto SymbolPair = RemainingTable.split('\0'); - RemainingTable = SymbolPair.second; - Y.DebugStrings->push_back(SymbolPair.first); +Error dumpDebugStrings(DWARFContext &DCtx, DWARFYAML::Data &Y) { + DataExtractor StrData = DCtx.getStringExtractor(); + uint64_t Offset = 0; + std::vector DebugStr; + Error Err = Error::success(); + while (StrData.isValidOffset(Offset)) { + const char *CStr = StrData.getCStr(&Offset, &Err); + if (Err) + return Err; + DebugStr.push_back(CStr); } + + Y.DebugStrings = DebugStr; + return Err; } Error dumpDebugARanges(DWARFContext &DCtx, DWARFYAML::Data &Y) { @@ -108,6 +114,7 @@ Error dumpDebugRanges(DWARFContext &DCtx, DWARFYAML::Data &Y) { DCtx.isLittleEndian(), AddrSize); uint64_t Offset = 0; DWARFDebugRangeList DwarfRanges; + std::vector DebugRanges; while (Data.isValidOffset(Offset)) { DWARFYAML::Ranges YamlRanges; @@ -117,8 +124,10 @@ Error dumpDebugRanges(DWARFContext &DCtx, DWARFYAML::Data &Y) { return E; for (const auto &RLE : DwarfRanges.getEntries()) YamlRanges.Entries.push_back({RLE.StartAddress, RLE.EndAddress}); - Y.DebugRanges.push_back(std::move(YamlRanges)); + DebugRanges.push_back(std::move(YamlRanges)); } + + Y.DebugRanges = DebugRanges; return ErrorSuccess(); } diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 9f524479bb04c3..264bc4d1dbf361 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -415,7 +415,7 @@ Optional ELFDumper::dumpDWARFSections( if (RawSec->Name == ".debug_aranges") Err = dumpDebugARanges(*DWARFCtx.get(), DWARF); else if (RawSec->Name == ".debug_str") - dumpDebugStrings(*DWARFCtx.get(), DWARF); + Err = dumpDebugStrings(*DWARFCtx.get(), DWARF); // If the DWARF section cannot be successfully parsed, emit raw content // instead of an entry in the DWARF section of the YAML. diff --git a/llvm/tools/obj2yaml/macho2yaml.cpp b/llvm/tools/obj2yaml/macho2yaml.cpp index 3a93d5c6846b59..49347431b9a4f4 100644 --- a/llvm/tools/obj2yaml/macho2yaml.cpp +++ b/llvm/tools/obj2yaml/macho2yaml.cpp @@ -154,10 +154,8 @@ static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx, } if (SecName == "__debug_ranges") return dumpDebugRanges(DCtx, DWARF); - if (SecName == "__debug_str") { - dumpDebugStrings(DCtx, DWARF); - return Error::success(); - } + if (SecName == "__debug_str") + return dumpDebugStrings(DCtx, DWARF); return createStringError(errc::not_supported, "dumping " + SecName + " section is not supported"); } diff --git a/llvm/tools/obj2yaml/obj2yaml.h b/llvm/tools/obj2yaml/obj2yaml.h index 85a7ac9a4787b1..66a2d2753622cd 100644 --- a/llvm/tools/obj2yaml/obj2yaml.h +++ b/llvm/tools/obj2yaml/obj2yaml.h @@ -47,6 +47,7 @@ void dumpDebugPubSections(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); void dumpDebugInfo(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); void dumpDebugLines(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); llvm::Error dumpDebugRanges(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); -void dumpDebugStrings(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); +llvm::Error dumpDebugStrings(llvm::DWARFContext &DCtx, + llvm::DWARFYAML::Data &Y); #endif diff --git a/llvm/unittests/Support/LockFileManagerTest.cpp b/llvm/unittests/Support/LockFileManagerTest.cpp index 587e442be19660..0b5a0d982a8fcb 100644 --- a/llvm/unittests/Support/LockFileManagerTest.cpp +++ b/llvm/unittests/Support/LockFileManagerTest.cpp @@ -81,7 +81,7 @@ TEST(LockFileManagerTest, RelativePath) { char PathBuf[1024]; const char *OrigPath = getcwd(PathBuf, 1024); - ASSERT_FALSE(chdir(LockFileManagerTestDir.path().data())); + ASSERT_FALSE(chdir(LockFileManagerTestDir.c_str())); TempDir inner("inner"); SmallString<64> LockedFile(inner.path()); diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn index 81c9ec0ede11fe..18aa728b0db900 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn @@ -42,6 +42,7 @@ group("all-checks") { # If you add a check, also add it to ClangTidyForceLinker.h. deps = [ "//clang-tools-extra/clang-tidy/abseil", + "//clang-tools-extra/clang-tidy/altera", "//clang-tools-extra/clang-tidy/android", "//clang-tools-extra/clang-tidy/boost", "//clang-tools-extra/clang-tidy/bugprone", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/altera/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/altera/BUILD.gn new file mode 100644 index 00000000000000..52f2e3d5f23d68 --- /dev/null +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/altera/BUILD.gn @@ -0,0 +1,18 @@ +static_library("altera") { + output_name = "clangTidyAlteraModule" + configs += [ "//llvm/utils/gn/build:clang_code" ] + deps = [ + "//clang-tools-extra/clang-tidy", + "//clang-tools-extra/clang-tidy/utils", + "//clang/lib/AST", + "//clang/lib/ASTMatchers", + "//clang/lib/Analysis", + "//clang/lib/Basic", + "//clang/lib/Lex", + "//llvm/lib/Support", + ] + sources = [ + "AlteraTidyModule.cpp", + "StructPackAlignCheck.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn index bb3d69d046bef1..4d645799dbf655 100644 --- a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn @@ -81,7 +81,6 @@ static_library("AST") { "ExternalASTMerger.cpp", "ExternalASTSource.cpp", "FormatString.cpp", - "IgnoreExpr.cpp", "InheritViz.cpp", "Interp/ByteCodeEmitter.cpp", "Interp/ByteCodeExprGen.cpp", diff --git a/mlir/docs/PassManagement.md b/mlir/docs/PassManagement.md index 92ca92218219c0..6e577db4501c1d 100644 --- a/mlir/docs/PassManagement.md +++ b/mlir/docs/PassManagement.md @@ -104,6 +104,15 @@ struct MyOperationPass : public OperationPass { }; ``` +### Dependent Dialects + +Dialects must be loaded in the MLIRContext before entities from these dialects +(operations, types, attributes, ...) can be created. Dialects must be loaded +before starting the multi-threaded pass pipeline execution. To this end, a pass +that can create an entity from a dialect that isn't already loaded must express +this by overriding the `getDependentDialects()` method and declare this list of +Dialects explicitly. + ## Analysis Management An important concept, along with transformation passes, are analyses. These are @@ -684,6 +693,8 @@ It contains the following fields: * description - A longer, more detailed description of the pass. This is used when generating pass documentation. +* dependentDialects + - A list of strings that are the Dialect classes this pass can introduce. * constructor - A piece of C++ code used to create a default instance of the pass. * options diff --git a/mlir/docs/Tutorials/UnderstandingTheIRStructure.md b/mlir/docs/Tutorials/UnderstandingTheIRStructure.md new file mode 100644 index 00000000000000..8b4f7724741fa3 --- /dev/null +++ b/mlir/docs/Tutorials/UnderstandingTheIRStructure.md @@ -0,0 +1,287 @@ +# Understanding the IR Structure + +The MLIR Language Reference describes the +[High Level Structure](../LangRef/#high-level-structure), this document +illustrates this structure through examples, and introduces at the same time the +C++ APIs involved in manipulating it. + +We will implement a [pass](../PassManagement/#operation-pass) that traverses any +MLIR input and prints the entity inside the IR. A pass (or in general almost any +piece of IR) is always rooted with an operation. Most of the time the top-level +operation is a `ModuleOp`, the MLIR `PassManager` is actually limited to +operation on a top-level `ModuleOp`. As such a pass starts with an operation, +and so will our traversal: + +``` + void runOnOperation() override { + Operation *op = getOperation(); + resetIndent(); + printOperation(op); + } +``` + +## Traversing the IR Nesting + +The IR is recursively nested, an `Operation` can have one or multiple nested +`Region`s, each of which is actually a list of `Blocks`, each of which itself +wraps a list of `Operation`s. Our traversal will follow this structure with +three methods: `printOperation()`, `printRegion()`, and `printBlock()`. + +The first method inspects the properties of an operation, before iterating on +the nested regions and print them individually: + +```c++ + void printOperation(Operation *op) { + // Print the operation itself and some of its properties + printIndent() << "visiting op: '" << op->getName() << "' with " + << op->getNumOperands() << " operands and " + << op->getNumResults() << " results\n"; + // Print the operation attributes + if (!op->getAttrs().empty()) { + printIndent() << op->getAttrs().size() << " attributes:\n"; + for (NamedAttribute attr : op->getAttrs()) + printIndent() << " - '" << attr.first << "' : '" << attr.second + << "'\n"; + } + + // Recurse into each of the regions attached to the operation. + printIndent() << " " << op->getNumRegions() << " nested regions:\n"; + auto indent = pushIndent(); + for (Region ®ion : op->getRegions()) + printRegion(region); + } +``` + +A `Region` does not hold anything other than a list of `Block`s: + +```c++ + void printRegion(Region ®ion) { + // A region does not hold anything by itself other than a list of blocks. + printIndent() << "Region with " << region.getBlocks().size() + << " blocks:\n"; + auto indent = pushIndent(); + for (Block &block : region.getBlocks()) + printBlock(block); + } +``` + +Finally, a `Block` has a list of arguments, and holds a list of `Operation`s: + +```c++ + void printBlock(Block &block) { + // Print the block intrinsics properties (basically: argument list) + printIndent() + << "Block with " << block.getNumArguments() << " arguments, " + << block.getNumSuccessors() + << " successors, and " + // Note, this `.size()` is traversing a linked-list and is O(n). + << block.getOperations().size() << " operations\n"; + + // A block main role is to hold a list of Operations: let's recurse into + // printing each operation. + auto indent = pushIndent(); + for (Operation &op : block.getOperations()) + printOperation(&op); + } +``` + +The code for the pass is available +[here in the repo](https://github.com/llvm/llvm-project/blob/master/mlir/test/lib/IR/TestPrintNesting.cpp) +and can be exercised with `mlir-opt -test-print-nesting`. + +### Example + +The Pass introduced in the previous section can be applied on the following IR +with `mlir-opt -test-print-nesting -allow-unregistered-dialect +llvm-project/mlir/test/IR/print-ir-nesting.mlir`: + +```mlir +"module"() ( { + %0:4 = "dialect.op1"() {"attribute name" = 42 : i32} : () -> (i1, i16, i32, i64) + "dialect.op2"() ( { + "dialect.innerop1"(%0#0, %0#1) : (i1, i16) -> () + }, { + "dialect.innerop2"() : () -> () + "dialect.innerop3"(%0#0, %0#2, %0#3)[^bb1, ^bb2] : (i1, i32, i64) -> () + ^bb1(%1: i32): // pred: ^bb0 + "dialect.innerop4"() : () -> () + "dialect.innerop5"() : () -> () + ^bb2(%2: i64): // pred: ^bb0 + "dialect.innerop6"() : () -> () + "dialect.innerop7"() : () -> () + }) {"other attribute" = 42 : i64} : () -> () + "module_terminator"() : () -> () +}) : () -> () +``` + +And will yield the following output: + +``` +visiting op: 'module' with 0 operands and 0 results + 1 nested regions: + Region with 1 blocks: + Block with 0 arguments, 0 successors, and 3 operations + visiting op: 'dialect.op1' with 0 operands and 4 results + 1 attributes: + - 'attribute name' : '42 : i32' + 0 nested regions: + visiting op: 'dialect.op2' with 0 operands and 0 results + 2 nested regions: + Region with 1 blocks: + Block with 0 arguments, 0 successors, and 1 operations + visiting op: 'dialect.innerop1' with 2 operands and 0 results + 0 nested regions: + Region with 3 blocks: + Block with 0 arguments, 2 successors, and 2 operations + visiting op: 'dialect.innerop2' with 0 operands and 0 results + 0 nested regions: + visiting op: 'dialect.innerop3' with 3 operands and 0 results + 0 nested regions: + Block with 1 arguments, 0 successors, and 2 operations + visiting op: 'dialect.innerop4' with 0 operands and 0 results + 0 nested regions: + visiting op: 'dialect.innerop5' with 0 operands and 0 results + 0 nested regions: + Block with 1 arguments, 0 successors, and 2 operations + visiting op: 'dialect.innerop6' with 0 operands and 0 results + 0 nested regions: + visiting op: 'dialect.innerop7' with 0 operands and 0 results + 0 nested regions: + visiting op: 'module_terminator' with 0 operands and 0 results + 0 nested regions: +``` + +## Other IR Traversal Methods. + +In many cases, unwrapping the recursive structure of the IR is cumbersome and +you may be interested in using other helpers. + +### Filtered iterator: `getOps()` + +For example the `Block` class exposes a convenient templated method +`getOps()` that provided a filtered iterator. Here is an example: + +```c++ + auto varOps = entryBlock.getOps(); + for (spirv::GlobalVariableOp gvOp : varOps) { + // process each GlobalVariable Operation in the block. + ... + } +``` + +Similarly, the `Region` class exposes the same `getOps` method that will iterate +on all the blocks in the region. + +### Walkers + +The `getOps()` is useful to iterate on some Operations immediately listed +inside a single block (or a single region), however it is frequently interesting +to traverse the IR in a nested fashion. To this end MLIR exposes the `walk()` +helper on `Operation`, `Block`, and `Region`. This helper takes a single +argument: a callback method that will be invoked for every operation recursively +nested under the provided entity. + +```c++ + // Recursively traverse all the regions and blocks nested inside the function + // and apply the callback on every single operation in post-order. + getFunction().walk([&](mlir::Operation *op) { + // process Operation `op`. + }); +``` + +The provided callback can be specialized to filter on a particular type of +Operation, for example the following will apply the callback only on `LinalgOp` +operations nested inside the function: + +```c++ + getFunction.walk([](LinalgOp linalgOp) { + // process LinalgOp `linalgOp`. + }); +``` + +Finally, the callback can optionally stop the walk by returning a +`WalkResult::interrupt()` value. For example the following walk will find all +`AllocOp` nested inside the function and interrupt the traversal if one of them +does not satisfy a criteria: + +```c++ + WalkResult result = getFunction().walk([&](AllocOp allocOp) { + if (!isValid(allocOp)) + return WalkResult::interrupt(); + return WalkResult::advance(); + }); + if (result.wasInterrupted()) + // One alloc wasn't matching. + ... +``` + +## Traversing the def-use chains + +Another relationship in the IR is the one that links a `Value` with its users. +As defined in the +[language reference](https://mlir.llvm.org/docs/LangRef/#high-level-structure), +each Value is either a `BlockArgument` or the result of exactly one `Operation` +(an `Operation` can have multiple results, each of them is a separate `Value`). +The users of a `Value` are `Operation`s, through their arguments: each +`Operation` argument references a single `Value`. + +Here is a code sample that inspects the operands of an `Operation` and prints +some information about them: + +```c++ + // Print information about the producer of each of the operands. + for (Value operand : op->getOperands()) { + if (Operation *producer = operand.getDefiningOp()) { + llvm::outs() << " - Operand produced by operation '" + << producer->getName() << "'\n"; + } else { + // If there is no defining op, the Value is necessarily a Block + // argument. + auto blockArg = operand.cast(); + llvm::outs() << " - Operand produced by Block argument, number " + << blockArg.getArgNumber() << "\n"; + } + } +``` + +Similarly, the following code sample iterates through the result `Value`s +produced by an `Operation` and for each result will iterate the users of these +results and print informations about them: + +```c++ + // Print information about the user of each of the result. + llvm::outs() << "Has " << op->getNumResults() << " results:\n"; + for (auto indexedResult : llvm::enumerate(op->getResults())) { + Value result = indexedResult.value(); + llvm::outs() << " - Result " << indexedResult.index(); + if (result.use_empty()) { + llvm::outs() << " has no uses\n"; + continue; + } + if (result.hasOneUse()) { + llvm::outs() << " has a single use: "; + } else { + llvm::outs() << " has " + << std::distance(result.getUses().begin(), + result.getUses().end()) + << " uses:\n"; + } + for (Operation *userOp : result.getUsers()) { + llvm::outs() << " - " << userOp->getName() << "\n"; + } + } +``` + +The illustrating code for this pass is available +[here in the repo](https://github.com/llvm/llvm-project/blob/master/mlir/test/lib/IR/TestPrintDefUse.cpp) +and can be exercised with `mlir-opt -test-print-defuse`. + +The chaining of `Value`s and their uses can be viewed as following: + +![Index Map Example](/includes/img/DefUseChains.svg) + +The uses of a `Value` (`OpOperand` or `BlockOperand`) are also chained in a +doubly linked-list, which is particularly useful when replacing all uses of a +`Value` with a new one ("RAUW"): + +![Index Map Example](/includes/img/Use-list.svg) diff --git a/mlir/docs/includes/img/DefUseChains.svg b/mlir/docs/includes/img/DefUseChains.svg new file mode 100644 index 00000000000000..2d5b75246772ab --- /dev/null +++ b/mlir/docs/includes/img/DefUseChains.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mlir/docs/includes/img/Use-list.svg b/mlir/docs/includes/img/Use-list.svg new file mode 100644 index 00000000000000..4840619f067412 --- /dev/null +++ b/mlir/docs/includes/img/Use-list.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h index db1c3bfead94f7..580fbf53ae4f26 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.h +++ b/mlir/include/mlir/Dialect/Affine/Passes.h @@ -61,7 +61,8 @@ std::unique_ptr> createLoopTilingPass(); /// and no callback is provided, anything passed from the command-line (if at /// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor). std::unique_ptr> createLoopUnrollPass( - int unrollFactor = -1, bool unrollFull = false, + int unrollFactor = -1, bool unrollUpToFactor = false, + bool unrollFull = false, const std::function &getUnrollFactor = nullptr); /// Creates a loop unroll jam pass to unroll jam by the specified factor. A diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td index 0e7f3e43661eff..7515dbaa33d863 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.td +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -71,6 +71,8 @@ def AffineLoopUnroll : FunctionPass<"affine-loop-unroll"> { let options = [ Option<"unrollFactor", "unroll-factor", "unsigned", /*default=*/"4", "Use this unroll factor for all loops being unrolled">, + Option<"unrollUpToFactor", "unroll-up-to-factor", "bool", /*default=*/"false", + "Allow unroling up to the factor specicied">, Option<"unrollFull", "unroll-full", "bool", /*default=*/"false", "Fully unroll loops">, Option<"numRepetitions", "unroll-num-reps", "unsigned", /*default=*/"1", diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index f438b6587c8bce..ce3b5fd2fd2479 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -30,6 +30,10 @@ struct TiledLinalgOp { SmallVector loops; }; +/// Populates patterns for vectorization of all ConvN-D ops. +void populateConvVectorizationPatterns(MLIRContext *context, + OwningRewritePatternList &patterns); + /// Performs standalone tiling of a single LinalgOp by `tileSizes`. /// and permute the loop nest according to `interchangeVector` /// The permutation is expressed as a list of integers that specify @@ -531,6 +535,53 @@ struct AffineMinSCFCanonicalizationPattern PatternRewriter &rewriter) const override; }; +/// Converts Convolution op into vector contraction. +/// +/// Conversion expects ConvOp to have dimensions marked in the *mask* as +/// false of size 1. This ensures that the ConvOp can be lowered to vector +/// contraction of dimensions marked in the *mask* as true. +/// +/// A good example is ConvNHWCOp which is 2D Conv op with channels as the last +/// dimension. For this op we contract last 3 dimensions. +/// The initial op definition looks like this: +/// ``` +/// linalg.conv_2d_nhwc %arg0, %arg1, %arg2 : +/// (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref) +/// ``` +/// This op can be expressed as a dot product between %arg0 (input) and +/// %arg1 (kernel) which is written into first entry of %arg2 (output). This is +/// the ConvOp this pass expects and converts into: +/// ``` +/// #map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +/// #map1 = affine_map<(d0, d1, d2) -> ()> +/// ..... +/// %0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %c0_f32 +/// : memref<1x3x3x3xf32>, vector<3x3x3xf32> +/// %1 = vector.transfer_read %arg1[%c0, %c0, %c0, %c0], %c0_f32 +/// : memref<1x3x3x3xf32>, vector<3x3x3xf32> +/// %2 = vector.contract {indexing_maps = [#map0, #map0, #map1], +/// iterator_types = ["reduction", "reduction", "reduction"]} %0, %1, +/// %c0_f32 : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +/// store %2, %arg2[%c0, %c0, %c0, %c0] : memref +/// ``` +/// where first 2 operations read input and kernel memory buffers into vectors. +/// Subsequently, they are contracted together and the result is written to +/// the first entry of the output buffer. +template +struct ConvOpVectorization : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + SmallVector mask; + + ConvOpVectorization(MLIRContext *context, SmallVector msk) + : OpRewritePattern(context) { + assert(msk.size() == N && "Mask size does not match rank"); + this->mask = msk; + } + + LogicalResult matchAndRewrite(ConvOp minOp, + PatternRewriter &rewriter) const override; +}; + //===----------------------------------------------------------------------===// // Support for staged pattern application. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h b/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h index 36df24f60c7045..ffb3ba30b699a3 100644 --- a/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h @@ -20,10 +20,10 @@ namespace edsc { class BoundsCapture { public: unsigned rank() const { return lbs.size(); } - Value lb(unsigned idx) { return lbs[idx]; } - Value ub(unsigned idx) { return ubs[idx]; } - int64_t step(unsigned idx) { return steps[idx]; } - std::tuple range(unsigned idx) { + Value lb(unsigned idx) const { return lbs[idx]; } + Value ub(unsigned idx) const { return ubs[idx]; } + int64_t step(unsigned idx) const { return steps[idx]; } + std::tuple range(unsigned idx) const { return std::make_tuple(lbs[idx], ubs[idx], steps[idx]); } void swapRanges(unsigned i, unsigned j) { @@ -34,9 +34,9 @@ class BoundsCapture { std::swap(steps[i], steps[j]); } - ArrayRef getLbs() { return lbs; } - ArrayRef getUbs() { return ubs; } - ArrayRef getSteps() { return steps; } + ArrayRef getLbs() const { return lbs; } + ArrayRef getUbs() const { return ubs; } + ArrayRef getSteps() const { return steps; } protected: SmallVector lbs; @@ -52,8 +52,6 @@ class BoundsCapture { class MemRefBoundsCapture : public BoundsCapture { public: explicit MemRefBoundsCapture(Value v); - MemRefBoundsCapture(const MemRefBoundsCapture &) = default; - MemRefBoundsCapture &operator=(const MemRefBoundsCapture &) = default; unsigned fastestVarying() const { return rank() - 1; } @@ -69,8 +67,6 @@ class VectorBoundsCapture : public BoundsCapture { public: explicit VectorBoundsCapture(Value v); explicit VectorBoundsCapture(VectorType t); - VectorBoundsCapture(const VectorBoundsCapture &) = default; - VectorBoundsCapture &operator=(const VectorBoundsCapture &) = default; private: Value base; diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index f326ae55786500..44bbb423b2d950 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -1504,6 +1504,13 @@ def DynamicTensorFromElementsOp : Std_Op<"dynamic_tensor_from_elements", let arguments = (ins Variadic:$dynamicExtents); let results = (outs AnyRankedTensor:$result); let regions = (region SizedRegion<1>:$body); + + let builders = [ + // Build op and populate its body per callback function. + OpBuilder<"OpBuilder &b, OperationState &result, Type resultTy, " + "ValueRange dynamicExtents, " + "function_ref">, + ]; } //===----------------------------------------------------------------------===// @@ -1621,14 +1628,9 @@ def TensorFromElementsOp : Std_Op<"tensor_from_elements", let results = (outs AnyTensor:$result); let skipDefaultBuilders = 1; - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, ValueRange elements", [{ - assert(!elements.empty() && "expected at least one element"); - result.addOperands(elements); - result.addTypes( - RankedTensorType::get({static_cast(elements.size())}, - *elements.getTypes().begin())); - }]>]; + let builders = [ + OpBuilder<"OpBuilder &b, OperationState &result, ValueRange elements"> + ]; let hasCanonicalizer = 1; } diff --git a/mlir/include/mlir/IR/AttributeSupport.h b/mlir/include/mlir/IR/AttributeSupport.h index 35084a20493f58..c0e3a0bb9b26e0 100644 --- a/mlir/include/mlir/IR/AttributeSupport.h +++ b/mlir/include/mlir/IR/AttributeSupport.h @@ -16,6 +16,7 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/IR/StorageUniquerSupport.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/Twine.h" namespace mlir { class MLIRContext; @@ -142,6 +143,14 @@ class AttributeUniquer { static typename std::enable_if_t< !std::is_same::value, T> get(MLIRContext *ctx, Args &&...args) { +#ifndef NDEBUG + if (!ctx->getAttributeUniquer().isParametricStorageInitialized( + T::getTypeID())) + llvm::report_fatal_error(llvm::Twine("can't create Attribute '") + + llvm::getTypeName() + + "' because storage uniquer isn't initialized: " + "the dialect was likely not loaded."); +#endif return ctx->getAttributeUniquer().get( [ctx](AttributeStorage *storage) { initializeAttributeStorage(storage, ctx, T::getTypeID()); @@ -153,6 +162,14 @@ class AttributeUniquer { static typename std::enable_if_t< std::is_same::value, T> get(MLIRContext *ctx) { +#ifndef NDEBUG + if (!ctx->getAttributeUniquer().isSingletonStorageInitialized( + T::getTypeID())) + llvm::report_fatal_error(llvm::Twine("can't create Attribute '") + + llvm::getTypeName() + + "' because storage uniquer isn't initialized: " + "the dialect was likely not loaded."); +#endif return ctx->getAttributeUniquer().get(T::getTypeID()); } diff --git a/mlir/include/mlir/IR/BlockSupport.h b/mlir/include/mlir/IR/BlockSupport.h index f3dd6140420e48..fc16effbba70da 100644 --- a/mlir/include/mlir/IR/BlockSupport.h +++ b/mlir/include/mlir/IR/BlockSupport.h @@ -75,6 +75,47 @@ class SuccessorRange final friend RangeBaseT; }; +//===----------------------------------------------------------------------===// +// BlockRange +//===----------------------------------------------------------------------===// + +/// This class provides an abstraction over the different types of ranges over +/// Blocks. In many cases, this prevents the need to explicitly materialize a +/// SmallVector/std::vector. This class should be used in places that are not +/// suitable for a more derived type (e.g. ArrayRef) or a template range +/// parameter. +class BlockRange final + : public llvm::detail::indexed_accessor_range_base< + BlockRange, llvm::PointerUnion, + Block *, Block *, Block *> { +public: + using RangeBaseT::RangeBaseT; + BlockRange(ArrayRef blocks = llvm::None); + BlockRange(SuccessorRange successors); + template , Arg>::value>> + BlockRange(Arg &&arg) + : BlockRange(ArrayRef(std::forward(arg))) {} + BlockRange(std::initializer_list blocks) + : BlockRange(ArrayRef(blocks)) {} + +private: + /// The owner of the range is either: + /// * A pointer to the first element of an array of block operands. + /// * A pointer to the first element of an array of Block *. + using OwnerT = llvm::PointerUnion; + + /// See `llvm::detail::indexed_accessor_range_base` for details. + static OwnerT offset_base(OwnerT object, ptrdiff_t index); + + /// See `llvm::detail::indexed_accessor_range_base` for details. + static Block *dereference_iterator(OwnerT object, ptrdiff_t index); + + /// Allow access to `offset_base` and `dereference_iterator`. + friend RangeBaseT; +}; + //===----------------------------------------------------------------------===// // Operation Iterators //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/Operation.h b/mlir/include/mlir/IR/Operation.h index 5f5e9017ae5124..6de7677dbf0528 100644 --- a/mlir/include/mlir/IR/Operation.h +++ b/mlir/include/mlir/IR/Operation.h @@ -32,25 +32,25 @@ class Operation final public: /// Create a new Operation with the specific fields. static Operation *create(Location location, OperationName name, - ArrayRef resultTypes, ArrayRef operands, + TypeRange resultTypes, ValueRange operands, ArrayRef attributes, - ArrayRef successors, unsigned numRegions); + BlockRange successors, unsigned numRegions); /// Overload of create that takes an existing MutableDictionaryAttr to avoid /// unnecessarily uniquing a list of attributes. static Operation *create(Location location, OperationName name, - ArrayRef resultTypes, ArrayRef operands, + TypeRange resultTypes, ValueRange operands, MutableDictionaryAttr attributes, - ArrayRef successors, unsigned numRegions); + BlockRange successors, unsigned numRegions); /// Create a new Operation from the fields stored in `state`. static Operation *create(const OperationState &state); /// Create a new Operation with the specific fields. static Operation *create(Location location, OperationName name, - ArrayRef resultTypes, ArrayRef operands, + TypeRange resultTypes, ValueRange operands, MutableDictionaryAttr attributes, - ArrayRef successors = {}, + BlockRange successors = {}, RegionRange regions = {}); /// The name of an operation is the key identifier for it. @@ -633,7 +633,7 @@ class Operation final bool hasValidOrder() { return orderIndex != kInvalidOrderIdx; } private: - Operation(Location location, OperationName name, ArrayRef resultTypes, + Operation(Location location, OperationName name, TypeRange resultTypes, unsigned numSuccessors, unsigned numRegions, const MutableDictionaryAttr &attributes, bool hasOperandStorage); diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h index 7fce4b808d2e44..11e85f20af4458 100644 --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -29,6 +29,7 @@ namespace mlir { class Block; +class BlockRange; class Dialect; class Operation; struct OperationState; @@ -42,7 +43,6 @@ class Pattern; class Region; class ResultRange; class RewritePattern; -class SuccessorRange; class Type; class Value; class ValueRange; @@ -394,12 +394,8 @@ struct OperationState { attributes.append(newAttributes); } - /// Add an array of successors. - void addSuccessors(ArrayRef newSuccessors) { - successors.append(newSuccessors.begin(), newSuccessors.end()); - } void addSuccessors(Block *successor) { successors.push_back(successor); } - void addSuccessors(SuccessorRange newSuccessors); + void addSuccessors(BlockRange newSuccessors); /// Create a region that should be attached to the operation. These regions /// can be filled in immediately without waiting for Operation to be diff --git a/mlir/include/mlir/IR/TypeSupport.h b/mlir/include/mlir/IR/TypeSupport.h index ace5eaa733454d..c1de5895791541 100644 --- a/mlir/include/mlir/IR/TypeSupport.h +++ b/mlir/include/mlir/IR/TypeSupport.h @@ -15,6 +15,7 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/IR/StorageUniquerSupport.h" +#include "llvm/ADT/Twine.h" namespace mlir { class Dialect; @@ -126,6 +127,13 @@ struct TypeUniquer { static typename std::enable_if_t< !std::is_same::value, T> get(MLIRContext *ctx, Args &&...args) { +#ifndef NDEBUG + if (!ctx->getTypeUniquer().isParametricStorageInitialized(T::getTypeID())) + llvm::report_fatal_error(llvm::Twine("can't create type '") + + llvm::getTypeName() + + "' because storage uniquer isn't initialized: " + "the dialect was likely not loaded."); +#endif return ctx->getTypeUniquer().get( [&](TypeStorage *storage) { storage->initialize(AbstractType::lookup(T::getTypeID(), ctx)); @@ -137,6 +145,13 @@ struct TypeUniquer { static typename std::enable_if_t< std::is_same::value, T> get(MLIRContext *ctx) { +#ifndef NDEBUG + if (!ctx->getTypeUniquer().isSingletonStorageInitialized(T::getTypeID())) + llvm::report_fatal_error(llvm::Twine("can't create type '") + + llvm::getTypeName() + + "' because storage uniquer isn't initialized: " + "the dialect was likely not loaded."); +#endif return ctx->getTypeUniquer().get(T::getTypeID()); } diff --git a/mlir/include/mlir/Support/StorageUniquer.h b/mlir/include/mlir/Support/StorageUniquer.h index eb04688be19026..d0a6170805bfdf 100644 --- a/mlir/include/mlir/Support/StorageUniquer.h +++ b/mlir/include/mlir/Support/StorageUniquer.h @@ -210,6 +210,16 @@ class StorageUniquer { return get(TypeID::get()); } + /// Test if there is a singleton storage uniquer initialized for the provided + /// TypeID. This is only useful for debugging/diagnostic purpose: the uniquer + /// is initialized when a dialect is loaded. + bool isSingletonStorageInitialized(TypeID id); + + /// Test if there is a parametric storage uniquer initialized for the provided + /// TypeID. This is only useful for debugging/diagnostic purpose: the uniquer + /// is initialized when a dialect is loaded. + bool isParametricStorageInitialized(TypeID id); + /// Changes the mutable component of 'storage' by forwarding the trailing /// arguments to the 'mutate' function of the derived class. template diff --git a/mlir/include/mlir/Transforms/BufferPlacement.h b/mlir/include/mlir/Transforms/BufferPlacement.h index b3db7794fd9712..6d88ac3599cf17 100644 --- a/mlir/include/mlir/Transforms/BufferPlacement.h +++ b/mlir/include/mlir/Transforms/BufferPlacement.h @@ -24,34 +24,6 @@ namespace mlir { -/// Prepares a buffer placement phase. It can place (user-defined) alloc -/// nodes. This simplifies the integration of the actual buffer-placement -/// pass. Sample usage: -/// BufferAssignmentPlacer baHelper(regionOp); -/// -> determine alloc positions -/// auto allocPosition = baHelper.computeAllocPosition(value); -/// -> place alloc -/// allocBuilder.setInsertionPoint(positions.getAllocPosition()); -/// -/// Note: this class is intended to be used during legalization. In order -/// to move alloc and dealloc nodes into the right places you can use the -/// createBufferPlacementPass() function. -class BufferAssignmentPlacer { -public: - /// Creates a new assignment builder. - explicit BufferAssignmentPlacer(Operation *op); - - /// Returns the operation this analysis was constructed from. - Operation *getOperation() const { return operation; } - - /// Computes the actual position to place allocs for the given result. - OpBuilder::InsertPoint computeAllocPosition(OpResult result); - -private: - /// The operation this analysis was constructed from. - Operation *operation; -}; - /// A helper type converter class for using inside Buffer Assignment operation /// conversion patterns. The default constructor keeps all the types intact /// except for the ranked-tensor types which is converted to memref types. @@ -157,31 +129,20 @@ class BufferAssignmentTypeConverter : public TypeConverter { SmallVector decomposeTypeConversions; }; -/// Helper conversion pattern that encapsulates a BufferAssignmentPlacer -/// instance. Sample usage: -/// class CustomConversionPattern : public -/// BufferAssignmentOpConversionPattern -/// { -/// ... matchAndRewrite(...) { -/// -> Access stored BufferAssignmentPlacer -/// bufferAssignment->computeAllocPosition(resultOp); -/// } -/// }; +/// Helper conversion pattern that encapsulates a BufferAssignmentTypeConverter +/// instance. template class BufferAssignmentOpConversionPattern : public OpConversionPattern { public: explicit BufferAssignmentOpConversionPattern( - MLIRContext *context, BufferAssignmentPlacer *bufferAssignment = nullptr, - BufferAssignmentTypeConverter *converter = nullptr, + MLIRContext *context, BufferAssignmentTypeConverter *converter, PatternBenefit benefit = 1) - : OpConversionPattern(context, benefit), - bufferAssignment(bufferAssignment), converter(converter) { + : OpConversionPattern(context, benefit), converter(converter) { assert(converter && "The type converter has not been defined"); } protected: - BufferAssignmentPlacer *bufferAssignment; BufferAssignmentTypeConverter *converter; }; @@ -282,8 +243,7 @@ class BufferAssignmentCallOpConverter template static void populateWithBufferAssignmentOpConversionPatterns( - MLIRContext *context, BufferAssignmentPlacer *placer, - BufferAssignmentTypeConverter *converter, + MLIRContext *context, BufferAssignmentTypeConverter *converter, OwningRewritePatternList *patterns) { // clang-format off patterns->insert< @@ -291,7 +251,7 @@ static void populateWithBufferAssignmentOpConversionPatterns( BufferAssignmentFuncOpConverter, BufferAssignmentReturnOpConverter - >(context, placer, converter); + >(context, converter); // clang-format on } } // end namespace mlir diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir index 8d965779dfc6df..38cbabc329989a 100644 --- a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir +++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir @@ -4,6 +4,7 @@ // RUN: FileCheck %s #map0 = affine_map<(d0, d1) -> (d1, d0)> +#map1 = affine_map<(d0, d1) -> (d1)> func @print_memref_f32(memref<*xf32>) @@ -29,6 +30,7 @@ func @main() { %c0 = constant 0 : index %c1 = constant 1 : index %c2 = constant 2 : index + %c3 = constant 3 : index %c6 = constant 6 : index %cst = constant -4.2e+01 : f32 %0 = call @alloc_2d_filled_f32(%c6, %c6) : (index, index) -> memref @@ -76,6 +78,28 @@ func @main() { // CHECK-SAME: ( 205, 305, 405, 505, 504 ), // CHECK-SAME: ( 105, 205, 305, 405, 505 ) ) + %3 = vector.transfer_read %0[%c2, %c3], %cst : memref, vector<5x5xf32> + vector.print %3 : vector<5x5xf32> + // New 5x5 block rooted @{2, 3} in memory. + // CHECK-NEXT: ( ( 403, 503, 502, -42, -42 ), + // CHECK-SAME: ( 404, 504, 503, -42, -42 ), + // CHECK-SAME: ( 405, 505, 504, -42, -42 ), + // CHECK-SAME: ( 305, 405, 505, -42, -42 ), + // CHECK-SAME: ( -42, -42, -42, -42, -42 ) ) + + %4 = vector.transfer_read %0[%c2, %c3], %cst {permutation_map = #map0} : memref, vector<5x5xf32> + vector.print %4 : vector<5x5xf32> + // Transposed 5x5 block rooted @{2, 3} in memory. + // CHECK-NEXT: ( ( 403, 404, 405, 305, -42 ), + // CHECK-SAME: ( 503, 504, 505, 405, -42 ), + // CHECK-SAME: ( 502, 503, 504, 505, -42 ), + // CHECK-SAME: ( -42, -42, -42, -42, -42 ), + // CHECK-SAME: ( -42, -42, -42, -42, -42 ) ) + + %5 = vector.transfer_read %0[%c2, %c3], %cst {permutation_map = #map1} : memref, vector<5xf32> + vector.print %5 : vector<5xf32> + // CHECK-NEXT: ( 403, 503, 502, -42, -42 ) + dealloc %0 : memref return } diff --git a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp index 8c917e08f942cd..f3f11e89af02fb 100644 --- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp +++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp @@ -422,6 +422,7 @@ LogicalResult ShapeOfOpConversion::matchAndRewrite( return failure(); // For ranked tensor arguments, lower to `tensor_from_elements`. + auto loc = op.getLoc(); ShapeOfOp::Adaptor transformed(operands); Value tensor = transformed.arg(); Type tensorTy = tensor.getType(); @@ -431,7 +432,6 @@ LogicalResult ShapeOfOpConversion::matchAndRewrite( SmallVector extentValues; RankedTensorType rankedTensorTy = tensorTy.cast(); int64_t rank = rankedTensorTy.getRank(); - auto loc = op.getLoc(); for (int64_t i = 0; i < rank; i++) { if (rankedTensorTy.isDynamicDim(i)) { Value extent = rewriter.create(loc, tensor, i); @@ -451,26 +451,17 @@ LogicalResult ShapeOfOpConversion::matchAndRewrite( return success(); } - // Allocate stack memory. - auto loc = op.getLoc(); + // Lower to `dynamic_tensor_from_elements` otherwise. + auto *ctx = rewriter.getContext(); Value rank = rewriter.create(loc, tensor); - Type indexTy = rewriter.getIndexType(); - Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); - Value mem = rewriter.create(loc, memTy, ValueRange{rank}); - - // Copy shape extents to stack-allocated memory. - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - rewriter.create( - loc, zero, rank, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { - Value dim = rewriter.create(loc, tensor, iv); - rewriter.create(loc, dim, mem, ValueRange{iv}); - rewriter.create(loc); + rewriter.replaceOpWithNewOp( + op, getExtentTensorType(ctx), ValueRange{rank}, + [&](OpBuilder &b, Location loc, ValueRange args) { + Value dim = args.front(); + Value extent = b.create(loc, tensor, dim); + b.create(loc, extent); }); - // Load extents to tensor value. - rewriter.replaceOpWithNewOp(op.getOperation(), mem); return success(); } diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index a43bec855ff0ad..d51a96dca3849d 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -1096,7 +1096,7 @@ static bool isContiguous(MemRefType memRefType, SmallVectorImpl &strides) { int64_t offset; auto successStrides = getStridesAndOffset(memRefType, strides, offset); - bool isContiguous = (strides.back() == 1); + bool isContiguous = strides.empty() || strides.back() == 1; if (isContiguous) { auto sizes = memRefType.getShape(); for (int index = 0, e = strides.size() - 2; index < e; ++index) { diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 8f7d43829846b8..c0d283d7af451b 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -108,17 +108,10 @@ class NDTransferOpHelper { private: /// Creates the loop nest on the "major" dimensions and calls the /// `loopBodyBuilder` lambda in the context of the loop nest. - template - void emitLoops(Lambda loopBodyBuilder); - - /// Operate within the body of `emitLoops` to: - /// 1. Compute the indexings `majorIvs + majorOffsets` and save them in - /// `majorIvsPlusOffsets`. - /// 2. Return a boolean that determines whether the first `majorIvs.rank()` - /// dimensions `majorIvs + majorOffsets` are all within `memrefBounds`. - Value emitInBoundsCondition(ValueRange majorIvs, ValueRange majorOffsets, - MemRefBoundsCapture &memrefBounds, - SmallVectorImpl &majorIvsPlusOffsets); + void + emitLoops(llvm::function_ref + loopBodyBuilder); /// Common state to lower vector transfer ops. PatternRewriter &rewriter; @@ -140,8 +133,10 @@ class NDTransferOpHelper { }; template -template -void NDTransferOpHelper::emitLoops(Lambda loopBodyBuilder) { +void NDTransferOpHelper::emitLoops( + llvm::function_ref + loopBodyBuilder) { /// Loop nest operates on the major dimensions MemRefBoundsCapture memrefBoundsCapture(xferOp.memref()); @@ -196,11 +191,16 @@ static Value onTheFlyFoldSLT(Value v, Value ub) { return slt(v, ub); } -template -Value NDTransferOpHelper::emitInBoundsCondition( - ValueRange majorIvs, ValueRange majorOffsets, - MemRefBoundsCapture &memrefBounds, - SmallVectorImpl &majorIvsPlusOffsets) { +/// 1. Compute the indexings `majorIvs + majorOffsets` and save them in +/// `majorIvsPlusOffsets`. +/// 2. Return a value of i1 that determines whether the first `majorIvs.rank()` +/// dimensions `majorIvs + majorOffsets` are all within `memrefBounds`. +static Value +emitInBoundsCondition(PatternRewriter &rewriter, + VectorTransferOpInterface xferOp, unsigned leadingRank, + ValueRange majorIvs, ValueRange majorOffsets, + const MemRefBoundsCapture &memrefBounds, + SmallVectorImpl &majorIvsPlusOffsets) { Value inBoundsCondition; majorIvsPlusOffsets.reserve(majorIvs.size()); unsigned idx = 0; @@ -246,7 +246,7 @@ LogicalResult NDTransferOpHelper::doReplace() { emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets, ValueRange majorOffsets, ValueRange minorOffsets, - MemRefBoundsCapture &memrefBounds) { + const MemRefBoundsCapture &memrefBounds) { /// Lambda to load 1-D vector in the current loop ivs + offset context. auto load1DVector = [&](ValueRange majorIvsPlusOffsets) -> Value { SmallVector indexing; @@ -271,7 +271,8 @@ LogicalResult NDTransferOpHelper::doReplace() { // context. SmallVector majorIvsPlusOffsets; Value inBoundsCondition = emitInBoundsCondition( - majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); + rewriter, cast(xferOp.getOperation()), + leadingRank, majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); if (inBoundsCondition) { // 2. If the condition is not null, we need an IfOp, which may yield @@ -344,7 +345,7 @@ LogicalResult NDTransferOpHelper::doReplace() { emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets, ValueRange majorOffsets, ValueRange minorOffsets, - MemRefBoundsCapture &memrefBounds) { + const MemRefBoundsCapture &memrefBounds) { // Lower to 1-D vector_transfer_write and let recursion handle it. auto emitTransferWrite = [&](ValueRange majorIvsPlusOffsets) { SmallVector indexing; @@ -374,7 +375,8 @@ LogicalResult NDTransferOpHelper::doReplace() { // context. SmallVector majorIvsPlusOffsets; Value inBoundsCondition = emitInBoundsCondition( - majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); + rewriter, cast(xferOp.getOperation()), + leadingRank, majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); if (inBoundsCondition) { // 2.a. If the condition is not null, we need an IfOp, to write @@ -424,120 +426,90 @@ static int computeCoalescedIndex(TransferOpTy transfer) { return coalescedIdx; } -/// Emits remote memory accesses that are clipped to the boundaries of the -/// MemRef. template -static SmallVector -clip(TransferOpTy transfer, MemRefBoundsCapture &bounds, ArrayRef ivs) { - using namespace mlir::edsc; - - Value zero(std_constant_index(0)), one(std_constant_index(1)); - SmallVector memRefAccess(transfer.indices()); - SmallVector clippedScalarAccessExprs(memRefAccess.size()); - // Indices accessing to remote memory are clipped and their expressions are - // returned in clippedScalarAccessExprs. - for (unsigned memRefDim = 0; memRefDim < clippedScalarAccessExprs.size(); +VectorTransferRewriter::VectorTransferRewriter( + VectorTransferToSCFOptions options, MLIRContext *context) + : RewritePattern(TransferOpTy::getOperationName(), 1, context), + options(options) {} + +/// Used for staging the transfer in a local buffer. +template +MemRefType VectorTransferRewriter::tmpMemRefType( + TransferOpTy transfer) const { + auto vectorType = transfer.getVectorType(); + return MemRefType::get(vectorType.getShape().drop_back(), + VectorType::get(vectorType.getShape().take_back(), + vectorType.getElementType()), + {}, 0); +} + +static void emitWithBoundsChecks( + PatternRewriter &rewriter, VectorTransferOpInterface transfer, + ValueRange ivs, const MemRefBoundsCapture &memRefBoundsCapture, + function_ref)> inBoundsFun, + function_ref)> outOfBoundsFun = nullptr) { + // Permute the incoming indices according to the permutation map. + SmallVector indices = + linalg::applyMapToValues(rewriter, transfer.getLoc(), + transfer.permutation_map(), transfer.indices()); + + // Generate a bounds check if necessary. + SmallVector majorIvsPlusOffsets; + Value inBoundsCondition = + emitInBoundsCondition(rewriter, transfer, 0, ivs, indices, + memRefBoundsCapture, majorIvsPlusOffsets); + + // Apply the permutation map to the ivs. The permutation map may not use all + // the inputs. + SmallVector scalarAccessExprs(transfer.indices().size()); + for (unsigned memRefDim = 0; memRefDim < transfer.indices().size(); ++memRefDim) { // Linear search on a small number of entries. int loopIndex = -1; auto exprs = transfer.permutation_map().getResults(); for (auto en : llvm::enumerate(exprs)) { auto expr = en.value(); - auto dim = expr.template dyn_cast(); + auto dim = expr.dyn_cast(); // Sanity check. - assert( - (dim || expr.template cast().getValue() == 0) && - "Expected dim or 0 in permutationMap"); + assert((dim || expr.cast().getValue() == 0) && + "Expected dim or 0 in permutationMap"); if (dim && memRefDim == dim.getPosition()) { loopIndex = en.index(); break; } } - // We cannot distinguish atm between unrolled dimensions that implement - // the "always full" tile abstraction and need clipping from the other - // ones. So we conservatively clip everything. using namespace edsc::op; - auto N = bounds.ub(memRefDim); - auto i = memRefAccess[memRefDim]; - if (loopIndex < 0) { - auto N_minus_1 = N - one; - auto select_1 = std_select(slt(i, N), i, N_minus_1); - clippedScalarAccessExprs[memRefDim] = - std_select(slt(i, zero), zero, select_1); - } else { - auto ii = ivs[loopIndex]; - auto i_plus_ii = i + ii; - auto N_minus_1 = N - one; - auto select_1 = std_select(slt(i_plus_ii, N), i_plus_ii, N_minus_1); - clippedScalarAccessExprs[memRefDim] = - std_select(slt(i_plus_ii, zero), zero, select_1); - } + auto i = transfer.indices()[memRefDim]; + scalarAccessExprs[memRefDim] = loopIndex < 0 ? i : i + ivs[loopIndex]; } - return clippedScalarAccessExprs; + if (inBoundsCondition) + conditionBuilder( + /* scf.if */ inBoundsCondition, // { + [&] { inBoundsFun(scalarAccessExprs); }, + // } else { + outOfBoundsFun ? [&] { outOfBoundsFun(scalarAccessExprs); } + : function_ref() + // } + ); + else + inBoundsFun(scalarAccessExprs); } namespace mlir { -template -VectorTransferRewriter::VectorTransferRewriter( - VectorTransferToSCFOptions options, MLIRContext *context) - : RewritePattern(TransferOpTy::getOperationName(), 1, context), - options(options) {} - -/// Used for staging the transfer in a local buffer. -template -MemRefType VectorTransferRewriter::tmpMemRefType( - TransferOpTy transfer) const { - auto vectorType = transfer.getVectorType(); - return MemRefType::get(vectorType.getShape().drop_back(), - VectorType::get(vectorType.getShape().take_back(), - vectorType.getElementType()), - {}, 0); -} - /// Lowers TransferReadOp into a combination of: /// 1. local memory allocation; /// 2. perfect loop nest over: /// a. scalar load from local buffers (viewed as a scalar memref); -/// a. scalar store to original memref (with clipping). +/// a. scalar store to original memref (with padding). /// 3. vector_load from local buffer (viewed as a memref<1 x vector>); /// 4. local memory deallocation. /// /// Lowers the data transfer part of a TransferReadOp while ensuring no /// out-of-bounds accesses are possible. Out-of-bounds behavior is handled by -/// clipping. This means that a given value in memory can be read multiple -/// times and concurrently. -/// -/// Important notes about clipping and "full-tiles only" abstraction: -/// ================================================================= -/// When using clipping for dealing with boundary conditions, the same edge -/// value will appear multiple times (a.k.a edge padding). This is fine if the -/// subsequent vector operations are all data-parallel but **is generally -/// incorrect** in the presence of reductions or extract operations. -/// -/// More generally, clipping is a scalar abstraction that is expected to work -/// fine as a baseline for CPUs and GPUs but not for vector_load and DMAs. -/// To deal with real vector_load and DMAs, a "padded allocation + view" -/// abstraction with the ability to read out-of-memref-bounds (but still within -/// the allocated region) is necessary. -/// -/// Whether using scalar loops or vector_load/DMAs to perform the transfer, -/// junk values will be materialized in the vectors and generally need to be -/// filtered out and replaced by the "neutral element". This neutral element is -/// op-dependent so, in the future, we expect to create a vector filter and -/// apply it to a splatted constant vector with the proper neutral element at -/// each ssa-use. This filtering is not necessary for pure data-parallel -/// operations. -/// -/// In the case of vector_store/DMAs, Read-Modify-Write will be required, which -/// also have concurrency implications. Note that by using clipped scalar stores -/// in the presence of data-parallel only operations, we generate code that -/// writes the same value multiple time on the edge locations. -/// -/// TODO: implement alternatives to clipping. -/// TODO: support non-data-parallel operations. +/// padding. /// Performs the rewrite. template <> @@ -584,24 +556,31 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( steps.push_back(std_constant_index(step)); // 2. Emit alloc-copy-load-dealloc. + MLIRContext *ctx = op->getContext(); Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer); StdIndexedValue local(tmp); - Value vec = vector_type_cast(tmp); loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { - auto ivs = llvm::to_vector<8>(loopIvs); + auto ivsStorage = llvm::to_vector<8>(loopIvs); // Swap the ivs which will reorder memory accesses. if (coalescedIdx >= 0) - std::swap(ivs.back(), ivs[coalescedIdx]); - // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). - SmallVector indices = clip(transfer, memRefBoundsCapture, ivs); - ArrayRef indicesRef(indices), ivsRef(ivs); - Value pos = - std_index_cast(IntegerType::get(32, op->getContext()), ivsRef.back()); - Value vector = vector_insert_element(remote(indicesRef), - local(ivsRef.drop_back()), pos); - local(ivsRef.drop_back()) = vector; + std::swap(ivsStorage.back(), ivsStorage[coalescedIdx]); + + ArrayRef ivs(ivsStorage); + Value pos = std_index_cast(IntegerType::get(32, ctx), ivs.back()); + Value inVector = local(ivs.drop_back()); + auto loadValue = [&](ArrayRef indices) { + Value vector = vector_insert_element(remote(indices), inVector, pos); + local(ivs.drop_back()) = vector; + }; + auto loadPadding = [&](ArrayRef) { + Value vector = vector_insert_element(transfer.padding(), inVector, pos); + local(ivs.drop_back()) = vector; + }; + emitWithBoundsChecks( + rewriter, cast(transfer.getOperation()), ivs, + memRefBoundsCapture, loadValue, loadPadding); }); - Value vectorValue = std_load(vec); + Value vectorValue = std_load(vector_type_cast(tmp)); // 3. Propagate. rewriter.replaceOp(op, vectorValue); @@ -613,19 +592,11 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( /// 2. vector_store to local buffer (viewed as a memref<1 x vector>); /// 3. perfect loop nest over: /// a. scalar load from local buffers (viewed as a scalar memref); -/// a. scalar store to original memref (with clipping). +/// a. scalar store to original memref (if in bounds). /// 4. local memory deallocation. /// /// More specifically, lowers the data transfer part while ensuring no -/// out-of-bounds accesses are possible. Out-of-bounds behavior is handled by -/// clipping. This means that a given value in memory can be written to multiple -/// times and concurrently. -/// -/// See `Important notes about clipping and full-tiles only abstraction` in the -/// description of `readClipped` above. -/// -/// TODO: implement alternatives to clipping. -/// TODO: support non-data-parallel operations. +/// out-of-bounds accesses are possible. template <> LogicalResult VectorTransferRewriter::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { @@ -675,17 +646,21 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( Value vec = vector_type_cast(tmp); std_store(vectorValue, vec); loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { - auto ivs = llvm::to_vector<8>(loopIvs); - // Swap the ivs which will reorder memory accesses. + auto ivsStorage = llvm::to_vector<8>(loopIvs); + // Swap the ivsStorage which will reorder memory accesses. if (coalescedIdx >= 0) - std::swap(ivs.back(), ivs[coalescedIdx]); - // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). - SmallVector indices = clip(transfer, memRefBoundsCapture, ivs); - ArrayRef indicesRef(indices), ivsRef(ivs); + std::swap(ivsStorage.back(), ivsStorage[coalescedIdx]); + + ArrayRef ivs(ivsStorage); Value pos = - std_index_cast(IntegerType::get(32, op->getContext()), ivsRef.back()); - Value scalar = vector_extract_element(local(ivsRef.drop_back()), pos); - remote(indices) = scalar; + std_index_cast(IntegerType::get(32, op->getContext()), ivs.back()); + auto storeValue = [&](ArrayRef indices) { + Value scalar = vector_extract_element(local(ivs.drop_back()), pos); + remote(indices) = scalar; + }; + emitWithBoundsChecks( + rewriter, cast(transfer.getOperation()), ivs, + memRefBoundsCapture, storeValue); }); // 3. Erase. diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp index edb21384080f49..3dc236f3c06865 100644 --- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp @@ -9,7 +9,6 @@ // This file implements loop unrolling. // //===----------------------------------------------------------------------===// - #include "PassDetail.h" #include "mlir/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" @@ -45,11 +44,13 @@ struct LoopUnroll : public AffineLoopUnrollBase { : AffineLoopUnrollBase(other), getUnrollFactor(other.getUnrollFactor) {} explicit LoopUnroll( - Optional unrollFactor = None, bool unrollFull = false, + Optional unrollFactor = None, bool unrollUpToFactor = false, + bool unrollFull = false, const std::function &getUnrollFactor = nullptr) : getUnrollFactor(getUnrollFactor) { if (unrollFactor) this->unrollFactor = *unrollFactor; + this->unrollUpToFactor = unrollUpToFactor; this->unrollFull = unrollFull; } @@ -126,13 +127,16 @@ LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) { if (unrollFull) return loopUnrollFull(forOp); // Otherwise, unroll by the given unroll factor. + if (unrollUpToFactor) { + return loopUnrollUpToFactor(forOp, unrollFactor); + } return loopUnrollByFactor(forOp, unrollFactor); } std::unique_ptr> mlir::createLoopUnrollPass( - int unrollFactor, bool unrollFull, + int unrollFactor, bool unrollUpToFactor, bool unrollFull, const std::function &getUnrollFactor) { return std::make_unique( - unrollFactor == -1 ? None : Optional(unrollFactor), unrollFull, - getUnrollFactor); + unrollFactor == -1 ? None : Optional(unrollFactor), + unrollUpToFactor, unrollFull, getUnrollFactor); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp index 89a01f9ca6292f..6af0067c8928c3 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp @@ -51,11 +51,6 @@ class GenericOpConverter return rewriter.notifyMatchFailure( op, "dynamic shapes not currently supported"); auto memrefType = MemRefType::get(type.getShape(), type.getElementType()); - - // Compute alloc position and insert a custom allocation node. - OpBuilder::InsertionGuard guard(rewriter); - rewriter.restoreInsertionPoint( - bufferAssignment->computeAllocPosition(result)); auto alloc = rewriter.create(loc, memrefType); newArgs.push_back(alloc); newResults.push_back(alloc); @@ -99,13 +94,12 @@ class GenericOpConverter /// Populate the given list with patterns to convert Linalg operations on /// tensors to buffers. static void populateConvertLinalgOnTensorsToBuffersPattern( - MLIRContext *context, BufferAssignmentPlacer *placer, - BufferAssignmentTypeConverter *converter, + MLIRContext *context, BufferAssignmentTypeConverter *converter, OwningRewritePatternList *patterns) { populateWithBufferAssignmentOpConversionPatterns< - mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, placer, - converter, patterns); - patterns->insert(context, placer, converter); + mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, converter, + patterns); + patterns->insert(context, converter); } /// Converts Linalg operations that work on tensor-type operands or results to @@ -119,6 +113,8 @@ struct ConvertLinalgOnTensorsToBuffers // Mark all Standard operations legal. target.addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); // Mark all Linalg operations illegal as long as they work on tensors. auto isLegalOperation = [&](Operation *op) { @@ -144,16 +140,11 @@ struct ConvertLinalgOnTensorsToBuffers converter.setResultConversionKind( BufferAssignmentTypeConverter::AppendToArgumentsList); - // Walk over all the functions to apply buffer assignment. - getOperation().walk([&](FuncOp function) -> WalkResult { - OwningRewritePatternList patterns; - BufferAssignmentPlacer placer(function); - populateConvertLinalgOnTensorsToBuffersPattern(&context, &placer, - &converter, &patterns); - - // Applying full conversion - return applyFullConversion(function, target, patterns); - }); + OwningRewritePatternList patterns; + populateConvertLinalgOnTensorsToBuffersPattern(&context, &converter, + &patterns); + if (failed(applyFullConversion(this->getOperation(), target, patterns))) + this->signalPassFailure(); } }; } // end anonymous namespace diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index ada89f1c82b5c8..51781af9cb3049 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -367,3 +367,98 @@ LogicalResult LinalgCopyVTWForwardingPattern::matchAndRewrite( return success(); } + +template +LogicalResult ConvOpVectorization::matchAndRewrite( + ConvOp op, PatternRewriter &rewriter) const { + const unsigned dimSize = 3; + Location loc = op.getLoc(); + MLIRContext *context = op.getContext(); + edsc::ScopedContext scope(rewriter, loc); + + ShapedType inShapeType = op.getInputShapedType(0); + ShapedType kShapeType = op.getInputShapedType(1); + + ArrayRef inShape = inShapeType.getShape(); + ArrayRef kShape = kShapeType.getShape(); + + if (!inShapeType.hasStaticShape() || !kShapeType.hasStaticShape()) + return failure(); + + SmallVector mapping; + // Fail to apply when the size of not vectorized dimension is not 1 or + // when the size of vectorized dimension is not dimSize. + for (unsigned i = 0; i < N; i++) { + if (!mask[i] && (inShape[i] != 1 || kShape[i] != 1)) + return failure(); + if (mask[i] && (inShape[i] != dimSize || kShape[i] != dimSize)) + return failure(); + + if (mask[i]) + mapping.push_back(getAffineDimExpr(i, context)); + } + + Value input = op.getInput(0); + Value kernel = op.getInput(1); + Value output = op.getOutputBuffer(0); + + unsigned rank = inShapeType.getRank(); + unsigned numDims = mapping.size(); + Type elemType = inShapeType.getElementType(); + + auto map = AffineMap::get(rank, 0, mapping, context); + SmallVector zeros(rank, std_constant_index(0)); + auto vecType = + VectorType::get(SmallVector(numDims, dimSize), elemType); + + auto inputVec = vector_transfer_read(vecType, input, zeros, map); + auto kernelVec = vector_transfer_read(vecType, kernel, zeros, map); + + auto acc = std_constant(elemType, rewriter.getZeroAttr(elemType)); + + std::array indexingMaps{ + AffineMap::getMultiDimIdentityMap(numDims, context), + AffineMap::getMultiDimIdentityMap(numDims, context), + AffineMap::get(numDims, 0, {}, context)}; + + std::vector iteratorTypes(numDims, "reduction"); + + auto result = rewriter.create( + loc, inputVec, kernelVec, acc, + rewriter.getAffineMapArrayAttr(indexingMaps), + rewriter.getStrArrayAttr(iteratorTypes)); + + rewriter.create(loc, result, output, ValueRange(zeros)); + rewriter.eraseOp(op); + return success(); +} + +void mlir::linalg::populateConvVectorizationPatterns( + MLIRContext *context, OwningRewritePatternList &patterns) { + patterns.insert>( + context, SmallVector{true}); + + patterns.insert>( + context, SmallVector{false, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true}); + + patterns.insert>( + context, SmallVector{true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true}); + + patterns.insert>( + context, SmallVector{true, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true, true}); +} diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 65f8b83d9a7187..a0ad05852e230d 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -1694,6 +1694,22 @@ static LogicalResult verify(DynamicTensorFromElementsOp op) { return success(); } +void DynamicTensorFromElementsOp::build( + OpBuilder &b, OperationState &result, Type resultTy, + ValueRange dynamicExtents, + function_ref bodyBuilder) { + build(b, result, resultTy, dynamicExtents); + + // Build and populate body. + OpBuilder::InsertionGuard guard(b); + Region *bodyRegion = result.regions.front().get(); + auto rank = resultTy.cast().getRank(); + SmallVector argumentTypes(rank, b.getIndexType()); + Block *bodyBlock = + b.createBlock(bodyRegion, bodyRegion->end(), argumentTypes); + bodyBuilder(b, result.location, bodyBlock->getArguments()); +} + //===----------------------------------------------------------------------===// // ExtractElementOp //===----------------------------------------------------------------------===// @@ -1744,9 +1760,9 @@ static ParseResult parseTensorFromElementsOp(OpAsmParser &parser, OperationState &result) { SmallVector elementsOperands; Type resultType; - if (parser.parseLParen() || parser.parseOperandList(elementsOperands) || - parser.parseRParen() || parser.parseOptionalAttrDict(result.attributes) || - parser.parseColon() || parser.parseType(resultType)) + if (parser.parseOperandList(elementsOperands) || + parser.parseOptionalAttrDict(result.attributes) || + parser.parseColonType(resultType)) return failure(); if (parser.resolveOperands(elementsOperands, @@ -1759,9 +1775,9 @@ static ParseResult parseTensorFromElementsOp(OpAsmParser &parser, } static void print(OpAsmPrinter &p, TensorFromElementsOp op) { - p << "tensor_from_elements(" << op.elements() << ')'; + p << "tensor_from_elements " << op.elements(); p.printOptionalAttrDict(op.getAttrs()); - p << " : " << op.result().getType(); + p << " : " << op.getType(); } static LogicalResult verify(TensorFromElementsOp op) { @@ -1778,6 +1794,14 @@ static LogicalResult verify(TensorFromElementsOp op) { return success(); } +void TensorFromElementsOp::build(OpBuilder &builder, OperationState &result, + ValueRange elements) { + assert(!elements.empty() && "expected at least one element"); + result.addOperands(elements); + result.addTypes(RankedTensorType::get({static_cast(elements.size())}, + *elements.getTypes().begin())); +} + namespace { // Canonicalizes the pattern of the form diff --git a/mlir/lib/IR/Block.cpp b/mlir/lib/IR/Block.cpp index 71f368c49776e9..e039b41ae4b779 100644 --- a/mlir/lib/IR/Block.cpp +++ b/mlir/lib/IR/Block.cpp @@ -282,7 +282,7 @@ unsigned PredecessorIterator::getSuccessorIndex() const { } //===----------------------------------------------------------------------===// -// Successors +// SuccessorRange //===----------------------------------------------------------------------===// SuccessorRange::SuccessorRange(Block *block) : SuccessorRange(nullptr, 0) { @@ -295,3 +295,29 @@ SuccessorRange::SuccessorRange(Operation *term) : SuccessorRange(nullptr, 0) { if ((count = term->getNumSuccessors())) base = term->getBlockOperands().data(); } + +//===----------------------------------------------------------------------===// +// BlockRange +//===----------------------------------------------------------------------===// + +BlockRange::BlockRange(ArrayRef blocks) : BlockRange(nullptr, 0) { + if ((count = blocks.size())) + base = blocks.data(); +} + +BlockRange::BlockRange(SuccessorRange successors) + : BlockRange(successors.begin().getBase(), successors.size()) {} + +/// See `llvm::detail::indexed_accessor_range_base` for details. +BlockRange::OwnerT BlockRange::offset_base(OwnerT object, ptrdiff_t index) { + if (auto *operand = object.dyn_cast()) + return {operand + index}; + return {object.dyn_cast() + index}; +} + +/// See `llvm::detail::indexed_accessor_range_base` for details. +Block *BlockRange::dereference_iterator(OwnerT object, ptrdiff_t index) { + if (const auto *operand = object.dyn_cast()) + return operand[index].get(); + return object.dyn_cast()[index]; +} diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp index b8f9e6c9fdfc4e..f531a6097c257b 100644 --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -71,29 +71,24 @@ OperationName OperationName::getFromOpaquePointer(void *pointer) { /// Create a new Operation with the specific fields. Operation *Operation::create(Location location, OperationName name, - ArrayRef resultTypes, - ArrayRef operands, + TypeRange resultTypes, ValueRange operands, ArrayRef attributes, - ArrayRef successors, - unsigned numRegions) { + BlockRange successors, unsigned numRegions) { return create(location, name, resultTypes, operands, MutableDictionaryAttr(attributes), successors, numRegions); } /// Create a new Operation from operation state. Operation *Operation::create(const OperationState &state) { - return Operation::create(state.location, state.name, state.types, - state.operands, state.attributes, state.successors, - state.regions); + return create(state.location, state.name, state.types, state.operands, + state.attributes, state.successors, state.regions); } /// Create a new Operation with the specific fields. Operation *Operation::create(Location location, OperationName name, - ArrayRef resultTypes, - ArrayRef operands, + TypeRange resultTypes, ValueRange operands, MutableDictionaryAttr attributes, - ArrayRef successors, - RegionRange regions) { + BlockRange successors, RegionRange regions) { unsigned numRegions = regions.size(); Operation *op = create(location, name, resultTypes, operands, attributes, successors, numRegions); @@ -106,11 +101,9 @@ Operation *Operation::create(Location location, OperationName name, /// Overload of create that takes an existing MutableDictionaryAttr to avoid /// unnecessarily uniquing a list of attributes. Operation *Operation::create(Location location, OperationName name, - ArrayRef resultTypes, - ArrayRef operands, + TypeRange resultTypes, ValueRange operands, MutableDictionaryAttr attributes, - ArrayRef successors, - unsigned numRegions) { + BlockRange successors, unsigned numRegions) { // We only need to allocate additional memory for a subset of results. unsigned numTrailingResults = OpResult::getNumTrailing(resultTypes.size()); unsigned numInlineResults = OpResult::getNumInline(resultTypes.size()); @@ -167,7 +160,7 @@ Operation *Operation::create(Location location, OperationName name, } Operation::Operation(Location location, OperationName name, - ArrayRef resultTypes, unsigned numSuccessors, + TypeRange resultTypes, unsigned numSuccessors, unsigned numRegions, const MutableDictionaryAttr &attributes, bool hasOperandStorage) @@ -611,8 +604,8 @@ Operation *Operation::cloneWithoutRegions(BlockAndValueMapping &mapper) { successors.push_back(mapper.lookupOrDefault(successor)); // Create the new operation. - auto *newOp = Operation::create(getLoc(), getName(), getResultTypes(), - operands, attrs, successors, getNumRegions()); + auto *newOp = create(getLoc(), getName(), getResultTypes(), operands, attrs, + successors, getNumRegions()); // Remember the mapping of any results. for (unsigned i = 0, e = getNumResults(); i != e; ++i) diff --git a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp index ab84f4e8cf178a..69aea3bfcf1986 100644 --- a/mlir/lib/IR/OperationSupport.cpp +++ b/mlir/lib/IR/OperationSupport.cpp @@ -186,7 +186,7 @@ void OperationState::addOperands(ValueRange newOperands) { operands.append(newOperands.begin(), newOperands.end()); } -void OperationState::addSuccessors(SuccessorRange newSuccessors) { +void OperationState::addSuccessors(BlockRange newSuccessors) { successors.append(newSuccessors.begin(), newSuccessors.end()); } diff --git a/mlir/lib/IR/PatternMatch.cpp b/mlir/lib/IR/PatternMatch.cpp index a26bc63ed89d0a..d1da8d1d8f263d 100644 --- a/mlir/lib/IR/PatternMatch.cpp +++ b/mlir/lib/IR/PatternMatch.cpp @@ -10,9 +10,12 @@ #include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Operation.h" #include "mlir/IR/Value.h" +#include "llvm/Support/Debug.h" using namespace mlir; +#define DEBUG_TYPE "pattern-match" + PatternBenefit::PatternBenefit(unsigned benefit) : representation(benefit) { assert(representation == benefit && benefit != ImpossibleToMatchSentinel && "This pattern match benefit is too large to represent"); @@ -207,8 +210,14 @@ void PatternApplicator::applyCostModel(CostModel model) { anyOpPatterns.clear(); for (const auto &pat : owningPatternList) { // If the pattern is always impossible to match, just ignore it. - if (pat->getBenefit().isImpossibleToMatch()) + if (pat->getBenefit().isImpossibleToMatch()) { + LLVM_DEBUG({ + llvm::dbgs() + << "Ignoring pattern '" << pat->getRootKind() + << "' because it is impossible to match (by pattern benefit)\n"; + }); continue; + } if (Optional opName = pat->getRootKind()) patterns[*opName].push_back(pat.get()); else @@ -223,8 +232,14 @@ void PatternApplicator::applyCostModel(CostModel model) { auto processPatternList = [&](SmallVectorImpl &list) { // Special case for one pattern in the list, which is the most common case. if (list.size() == 1) { - if (model(*list.front()).isImpossibleToMatch()) + if (model(*list.front()).isImpossibleToMatch()) { + LLVM_DEBUG({ + llvm::dbgs() << "Ignoring pattern '" << list.front()->getRootKind() + << "' because it is impossible to match or cannot lead " + "to legal IR (by cost model)\n"; + }); list.clear(); + } return; } @@ -236,8 +251,14 @@ void PatternApplicator::applyCostModel(CostModel model) { // Sort patterns with highest benefit first, and remove those that are // impossible to match. std::stable_sort(list.begin(), list.end(), cmp); - while (!list.empty() && benefits[list.back()].isImpossibleToMatch()) + while (!list.empty() && benefits[list.back()].isImpossibleToMatch()) { + LLVM_DEBUG({ + llvm::dbgs() << "Ignoring pattern '" << list.back()->getRootKind() + << "' because it is impossible to match or cannot lead to " + "legal IR (by cost model)\n"; + }); list.pop_back(); + } }; for (auto &it : patterns) processPatternList(it.second); diff --git a/mlir/lib/Support/StorageUniquer.cpp b/mlir/lib/Support/StorageUniquer.cpp index 73578b5c91acf2..a3e296e99e7389 100644 --- a/mlir/lib/Support/StorageUniquer.cpp +++ b/mlir/lib/Support/StorageUniquer.cpp @@ -89,6 +89,9 @@ struct StorageUniquerImpl { // Parametric Storage //===--------------------------------------------------------------------===// + /// Check if an instance of a parametric storage class exists. + bool hasParametricStorage(TypeID id) { return parametricUniquers.count(id); } + /// Get or create an instance of a parametric type. BaseStorage * getOrCreate(TypeID id, unsigned hashValue, @@ -176,6 +179,9 @@ struct StorageUniquerImpl { return singletonInstance; } + /// Check if an instance of a singleton storage class exists. + bool hasSingleton(TypeID id) { return singletonInstances.count(id); } + //===--------------------------------------------------------------------===// // Instance Storage //===--------------------------------------------------------------------===// @@ -227,6 +233,16 @@ auto StorageUniquer::getSingletonImpl(TypeID id) -> BaseStorage * { return impl->getSingleton(id); } +/// Test is the storage singleton is initialized. +bool StorageUniquer::isSingletonStorageInitialized(TypeID id) { + return impl->hasSingleton(id); +} + +/// Test is the parametric storage is initialized. +bool StorageUniquer::isParametricStorageInitialized(TypeID id) { + return impl->hasParametricStorage(id); +} + /// Implementation for registering an instance of a derived type with default /// storage. void StorageUniquer::registerSingletonImpl( diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp index 1ab3e7e2e48dcb..9f2c254f91e513 100644 --- a/mlir/lib/Transforms/BufferPlacement.cpp +++ b/mlir/lib/Transforms/BufferPlacement.cpp @@ -48,11 +48,10 @@ // will be freed in the end. // // TODO: -// The current implementation does not support loops and the resulting code will -// be invalid with respect to program semantics. The only thing that is -// currently missing is a high-level loop analysis that allows us to move allocs -// and deallocs outside of the loop blocks. Furthermore, it doesn't also accept -// functions which return buffers already. +// The current implementation does not support explicit-control-flow loops and +// the resulting code will be invalid with respect to program semantics. +// However, structured control-flow loops are fully supported. Furthermore, it +// doesn't accept functions which return buffers already. // //===----------------------------------------------------------------------===// @@ -77,6 +76,22 @@ static void walkReturnOperations(Region *region, const FuncT &func) { } } +/// Wrapper for the actual `RegionBranchOpInterface.getSuccessorRegions` +/// function that initializes the required `operandAttributes` array. +static void getSuccessorRegions(RegionBranchOpInterface regionInterface, + llvm::Optional index, + SmallVectorImpl &successors) { + // Create a list of null attributes for each operand to comply with the + // `getSuccessorRegions` interface definition that requires a single + // attribute per operand. + SmallVector operandAttributes( + regionInterface.getOperation()->getNumOperands()); + + // Get all successor regions using the temporarily allocated + // `operandAttributes`. + regionInterface.getSuccessorRegions(index, operandAttributes, successors); +} + namespace { //===----------------------------------------------------------------------===// // BufferPlacementAliasAnalysis @@ -166,16 +181,10 @@ class BufferPlacementAliasAnalysis { // Query the RegionBranchOpInterface to find potential successor regions. op->walk([&](RegionBranchOpInterface regionInterface) { - // Create an empty attribute for each operand to comply with the - // `getSuccessorRegions` interface definition that requires a single - // attribute per operand. - SmallVector operandAttributes( - regionInterface.getOperation()->getNumOperands()); - // Extract all entry regions and wire all initial entry successor inputs. SmallVector entrySuccessors; - regionInterface.getSuccessorRegions(/*index=*/llvm::None, - operandAttributes, entrySuccessors); + getSuccessorRegions(regionInterface, /*index=*/llvm::None, + entrySuccessors); for (RegionSuccessor &entrySuccessor : entrySuccessors) { // Wire the entry region's successor arguments with the initial // successor inputs. @@ -191,8 +200,8 @@ class BufferPlacementAliasAnalysis { // Iterate over all successor region entries that are reachable from the // current region. SmallVector successorRegions; - regionInterface.getSuccessorRegions( - region.getRegionNumber(), operandAttributes, successorRegions); + getSuccessorRegions(regionInterface, region.getRegionNumber(), + successorRegions); for (RegionSuccessor &successorRegion : successorRegions) { // Iterate over all immediate terminator operations and wire the // successor inputs with the operands of each terminator. @@ -209,6 +218,83 @@ class BufferPlacementAliasAnalysis { ValueMapT aliases; }; +//===----------------------------------------------------------------------===// +// Backedges +//===----------------------------------------------------------------------===// + +/// A straight-forward program analysis which detects loop backedges induced by +/// explicit control flow. +class Backedges { +public: + using BlockSetT = SmallPtrSet; + using BackedgeSetT = llvm::DenseSet>; + +public: + /// Constructs a new backedges analysis using the op provided. + Backedges(Operation *op) { recurse(op, op->getBlock()); } + + /// Returns the number of backedges formed by explicit control flow. + size_t size() const { return edgeSet.size(); } + + /// Returns the start iterator to loop over all backedges. + BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); } + + /// Returns the end iterator to loop over all backedges. + BackedgeSetT::const_iterator end() const { return edgeSet.end(); } + +private: + /// Enters the current block and inserts a backedge into the `edgeSet` if we + /// have already visited the current block. The inserted edge links the given + /// `predecessor` with the `current` block. + bool enter(Block ¤t, Block *predecessor) { + bool inserted = visited.insert(¤t).second; + if (!inserted) + edgeSet.insert(std::make_pair(predecessor, ¤t)); + return inserted; + } + + /// Leaves the current block. + void exit(Block ¤t) { visited.erase(¤t); } + + /// Recurses into the given operation while taking all attached regions into + /// account. + void recurse(Operation *op, Block *predecessor) { + Block *current = op->getBlock(); + // If the current op implements the `BranchOpInterface`, there can be + // cycles in the scope of all successor blocks. + if (isa(op)) { + for (Block *succ : current->getSuccessors()) + recurse(*succ, current); + } + // Recurse into all distinct regions and check for explicit control-flow + // loops. + for (Region ®ion : op->getRegions()) + recurse(region.front(), current); + } + + /// Recurses into explicit control-flow structures that are given by + /// the successor relation defined on the block level. + void recurse(Block &block, Block *predecessor) { + // Try to enter the current block. If this is not possible, we are + // currently processing this block and can safely return here. + if (!enter(block, predecessor)) + return; + + // Recurse into all operations and successor blocks. + for (auto &op : block.getOperations()) + recurse(&op, predecessor); + + // Leave the current block. + exit(block); + } + + /// Stores all blocks that are currently visited and on the processing stack. + BlockSetT visited; + + /// Stores all backedges in the format (source, target). + BackedgeSetT edgeSet; +}; + //===----------------------------------------------------------------------===// // BufferPlacement //===----------------------------------------------------------------------===// @@ -357,9 +443,14 @@ class BufferPlacement { for (Value value : it->second) { if (valuesToFree.count(value) > 0) continue; - // Check whether we have to free this particular block argument. - if (!dominators.dominates(definingBlock, value.getParentBlock())) { - toProcess.emplace_back(value, value.getParentBlock()); + Block *parentBlock = value.getParentBlock(); + // Check whether we have to free this particular block argument or + // generic value. We have to free the current alias if it is either + // defined in a non-dominated block or it is defined in the same block + // but the current value is not dominated by the source value. + if (!dominators.dominates(definingBlock, parentBlock) || + (definingBlock == parentBlock && value.isa())) { + toProcess.emplace_back(value, parentBlock); valuesToFree.insert(value); } else if (visitedValues.insert(std::make_tuple(value, definingBlock)) .second) @@ -431,22 +522,42 @@ class BufferPlacement { // argument belongs to the first block in a region and the parent operation // implements the RegionBranchOpInterface. Region *argRegion = block->getParent(); + Operation *parentOp = argRegion->getParentOp(); RegionBranchOpInterface regionInterface; if (!argRegion || &argRegion->front() != block || - !(regionInterface = - dyn_cast(argRegion->getParentOp()))) + !(regionInterface = dyn_cast(parentOp))) return; introduceCopiesForRegionSuccessors( - regionInterface, argRegion->getParentOp()->getRegions(), + regionInterface, argRegion->getParentOp()->getRegions(), blockArg, [&](RegionSuccessor &successorRegion) { // Find a predecessor of our argRegion. return successorRegion.getSuccessor() == argRegion; - }, - [&](RegionSuccessor &successorRegion) { - // The operand index will be the argument number. - return blockArg.getArgNumber(); }); + + // Check whether the block argument belongs to an entry region of the + // parent operation. In this case, we have to introduce an additional copy + // for buffer that is passed to the argument. + SmallVector successorRegions; + getSuccessorRegions(regionInterface, llvm::None, successorRegions); + auto *it = + llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) { + return successorRegion.getSuccessor() == argRegion; + }); + if (it == successorRegions.end()) + return; + + // Determine the actual operand to introduce a copy for and rewire the + // operand to point to the copy instead. + Value operand = + regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber()) + [llvm::find(it->getSuccessorInputs(), blockArg).getIndex()]; + Value copy = introduceBufferCopy(operand, parentOp); + + auto op = llvm::find(parentOp->getOperands(), operand); + assert(op != parentOp->getOperands().end() && + "parentOp does not contain operand"); + parentOp->setOperand(op.getIndex(), copy); } /// Introduces temporary allocs in front of all associated nested-region @@ -455,42 +566,34 @@ class BufferPlacement { // Get the actual result index in the scope of the parent terminator. Operation *operation = value.getDefiningOp(); auto regionInterface = cast(operation); - introduceCopiesForRegionSuccessors( - regionInterface, operation->getRegions(), - [&](RegionSuccessor &successorRegion) { - // Determine whether this region has a successor entry that leaves - // this region by returning to its parent operation. - return !successorRegion.getSuccessor(); - }, - [&](RegionSuccessor &successorRegion) { - // Find the associated success input index. - return llvm::find(successorRegion.getSuccessorInputs(), value) - .getIndex(); - }); + // Filter successors that return to the parent operation. + auto regionPredicate = [&](RegionSuccessor &successorRegion) { + // If the RegionSuccessor has no associated successor, it will return to + // its parent operation. + return !successorRegion.getSuccessor(); + }; + // Introduce a copy for all region "results" that are returned to the parent + // operation. This is required since the parent's result value has been + // considered critical. Therefore, the algorithm assumes that a copy of a + // previously allocated buffer is returned by the operation (like in the + // case of a block argument). + introduceCopiesForRegionSuccessors(regionInterface, operation->getRegions(), + value, regionPredicate); } /// Introduces buffer copies for all terminators in the given regions. The /// regionPredicate is applied to every successor region in order to restrict - /// the copies to specific regions. Thereby, the operandProvider is invoked - /// for each matching region successor and determines the operand index that - /// requires a buffer copy. - template - void - introduceCopiesForRegionSuccessors(RegionBranchOpInterface regionInterface, - MutableArrayRef regions, - const TPredicate ®ionPredicate, - const TOperandProvider &operandProvider) { - // Create an empty attribute for each operand to comply with the - // `getSuccessorRegions` interface definition that requires a single - // attribute per operand. - SmallVector operandAttributes( - regionInterface.getOperation()->getNumOperands()); + /// the copies to specific regions. + template + void introduceCopiesForRegionSuccessors( + RegionBranchOpInterface regionInterface, MutableArrayRef regions, + Value argValue, const TPredicate ®ionPredicate) { for (Region ®ion : regions) { // Query the regionInterface to get all successor regions of the current // one. SmallVector successorRegions; - regionInterface.getSuccessorRegions(region.getRegionNumber(), - operandAttributes, successorRegions); + getSuccessorRegions(regionInterface, region.getRegionNumber(), + successorRegions); // Try to find a matching region successor. RegionSuccessor *regionSuccessor = llvm::find_if(successorRegions, regionPredicate); @@ -498,7 +601,9 @@ class BufferPlacement { continue; // Get the operand index in the context of the current successor input // bindings. - auto operandIndex = operandProvider(*regionSuccessor); + size_t operandIndex = + llvm::find(regionSuccessor->getSuccessorInputs(), argValue) + .getIndex(); // Iterate over all immediate terminator operations to introduce // new buffer allocations. Thereby, the appropriate terminator operand @@ -518,6 +623,16 @@ class BufferPlacement { /// its content into the newly allocated buffer. The terminator operation is /// used to insert the alloc and copy operations at the right places. Value introduceBufferCopy(Value sourceValue, Operation *terminator) { + // Avoid multiple copies of the same source value. This can happen in the + // presence of loops when a branch acts as a backedge while also having + // another successor that returns to its parent operation. Note: that + // copying copied buffers can introduce memory leaks since the invariant of + // BufferPlacement assumes that a buffer will be only copied once into a + // temporary buffer. Hence, the construction of copy chains introduces + // additional allocations that are not tracked automatically by the + // algorithm. + if (copiedValues.contains(sourceValue)) + return sourceValue; // Create a new alloc at the current location of the terminator. auto memRefType = sourceValue.getType().cast(); OpBuilder builder(terminator); @@ -541,6 +656,8 @@ class BufferPlacement { // allocation to the new one. builder.create(terminator->getLoc(), sourceValue, alloc); + // Remember the copy of original source value. + copiedValues.insert(alloc); return alloc; } @@ -652,6 +769,9 @@ class BufferPlacement { /// Maps allocation nodes to their associated blocks. AllocEntryList allocs; + // Stores already copied allocations to avoid additional copies of copies. + ValueSetT copiedValues; + /// The underlying liveness analysis to compute fine grained information /// about alloc and dealloc positions. Liveness liveness; @@ -673,6 +793,14 @@ class BufferPlacement { struct BufferPlacementPass : BufferPlacementBase { void runOnFunction() override { + // Ensure that there are supported loops only. + Backedges backedges(getFunction()); + if (backedges.size()) { + getFunction().emitError( + "Structured control-flow loops are supported only."); + return; + } + // Place all required alloc, copy and dealloc nodes. BufferPlacement placement(getFunction()); placement.place(); @@ -681,20 +809,6 @@ struct BufferPlacementPass : BufferPlacementBase { } // end anonymous namespace -//===----------------------------------------------------------------------===// -// BufferAssignmentPlacer -//===----------------------------------------------------------------------===// - -/// Creates a new assignment placer. -BufferAssignmentPlacer::BufferAssignmentPlacer(Operation *op) : operation(op) {} - -/// Computes the actual position to place allocs for the given value. -OpBuilder::InsertPoint -BufferAssignmentPlacer::computeAllocPosition(OpResult result) { - Operation *owner = result.getOwner(); - return OpBuilder::InsertPoint(owner->getBlock(), Block::iterator(owner)); -} - //===----------------------------------------------------------------------===// // BufferAssignmentTypeConverter //===----------------------------------------------------------------------===// @@ -891,9 +1005,6 @@ LogicalResult BufferAssignmentCallOpConverter::matchAndRewrite( resultMapping.addMapping(newResultTypes.size() - 1); } else { // kind = BufferAssignmentTypeConverter::AppendToArgumentsList - OpBuilder::InsertionGuard guard(rewriter); - rewriter.restoreInsertionPoint( - bufferAssignment->computeAllocPosition(result.value())); MemRefType memref = converted.dyn_cast(); if (!memref) return callOp.emitError("Cannot allocate for a non-Memref type"); diff --git a/mlir/lib/Transforms/CopyRemoval.cpp b/mlir/lib/Transforms/CopyRemoval.cpp index ccfd02630ac288..c5a8da63295681 100644 --- a/mlir/lib/Transforms/CopyRemoval.cpp +++ b/mlir/lib/Transforms/CopyRemoval.cpp @@ -30,16 +30,35 @@ class CopyRemovalPass : public PassWrapper> { reuseCopySourceAsTarget(copyOp); reuseCopyTargetAsSource(copyOp); }); + for (std::pair &pair : replaceList) + pair.first.replaceAllUsesWith(pair.second); for (Operation *op : eraseList) op->erase(); } private: /// List of operations that need to be removed. - DenseSet eraseList; + llvm::SmallPtrSet eraseList; + + /// List of values that need to be replaced with their counterparts. + llvm::SmallDenseSet, 4> replaceList; + + /// Returns the allocation operation for `value` in `block` if it exists. + /// nullptr otherwise. + Operation *getAllocationOpInBlock(Value value, Block *block) { + assert(block && "Block cannot be null"); + Operation *op = value.getDefiningOp(); + if (op && op->getBlock() == block) { + auto effects = dyn_cast(op); + if (effects && effects.hasEffect()) + return op; + } + return nullptr; + } /// Returns the deallocation operation for `value` in `block` if it exists. - Operation *getDeallocationInBlock(Value value, Block *block) { + /// nullptr otherwise. + Operation *getDeallocationOpInBlock(Value value, Block *block) { assert(block && "Block cannot be null"); auto valueUsers = value.getUsers(); auto it = llvm::find_if(valueUsers, [&](Operation *op) { @@ -119,9 +138,10 @@ class CopyRemovalPass : public PassWrapper> { Value to = copyOp.getTarget(); Operation *copy = copyOp.getOperation(); + Block *copyBlock = copy->getBlock(); Operation *fromDefiningOp = from.getDefiningOp(); - Operation *fromFreeingOp = getDeallocationInBlock(from, copy->getBlock()); - Operation *toDefiningOp = to.getDefiningOp(); + Operation *fromFreeingOp = getDeallocationOpInBlock(from, copyBlock); + Operation *toDefiningOp = getAllocationOpInBlock(to, copyBlock); if (!fromDefiningOp || !fromFreeingOp || !toDefiningOp || !areOpsInTheSameBlock({fromFreeingOp, toDefiningOp, copy}) || hasUsersBetween(to, toDefiningOp, copy) || @@ -129,7 +149,7 @@ class CopyRemovalPass : public PassWrapper> { hasMemoryEffectOpBetween(copy, fromFreeingOp)) return; - to.replaceAllUsesWith(from); + replaceList.insert({to, from}); eraseList.insert(copy); eraseList.insert(toDefiningOp); eraseList.insert(fromFreeingOp); @@ -169,8 +189,9 @@ class CopyRemovalPass : public PassWrapper> { Value to = copyOp.getTarget(); Operation *copy = copyOp.getOperation(); - Operation *fromDefiningOp = from.getDefiningOp(); - Operation *fromFreeingOp = getDeallocationInBlock(from, copy->getBlock()); + Block *copyBlock = copy->getBlock(); + Operation *fromDefiningOp = getAllocationOpInBlock(from, copyBlock); + Operation *fromFreeingOp = getDeallocationOpInBlock(from, copyBlock); if (!fromDefiningOp || !fromFreeingOp || !areOpsInTheSameBlock({fromFreeingOp, fromDefiningOp, copy}) || hasUsersBetween(to, fromDefiningOp, copy) || @@ -178,7 +199,7 @@ class CopyRemovalPass : public PassWrapper> { hasMemoryEffectOpBetween(copy, fromFreeingOp)) return; - from.replaceAllUsesWith(to); + replaceList.insert({from, to}); eraseList.insert(copy); eraseList.insert(fromDefiningOp); eraseList.insert(fromFreeingOp); diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index db6a071367d6c4..7ae45171ddbd3e 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -469,7 +469,6 @@ LogicalResult mlir::loopUnrollFull(AffineForOp forOp) { LogicalResult mlir::loopUnrollUpToFactor(AffineForOp forOp, uint64_t unrollFactor) { Optional mayBeConstantTripCount = getConstantTripCount(forOp); - if (mayBeConstantTripCount.hasValue() && mayBeConstantTripCount.getValue() < unrollFactor) return loopUnrollByFactor(forOp, mayBeConstantTripCount.getValue()); diff --git a/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir b/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir new file mode 100644 index 00000000000000..487718301d0058 --- /dev/null +++ b/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir @@ -0,0 +1,167 @@ +// RUN: mlir-opt %s -test-conv-vectorization --cse | FileCheck %s + +// CHECK-DAG: #[[$map0:.*]] = affine_map<(d0) -> (d0)> +// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0) -> ()> +// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1, d2) -> (d1, d2)> +// CHECK-DAG: #[[$map3:.*]] = affine_map<(d0, d1) -> (d0, d1)> +// CHECK-DAG: #[[$map4:.*]] = affine_map<(d0, d1) -> ()> +// CHECK-DAG: #[[$map5:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d2, d3)> +// CHECK-DAG: #[[$map6:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK-DAG: #[[$map7:.*]] = affine_map<(d0, d1, d2) -> ()> +// CHECK-DAG: #[[$map8:.*]] = affine_map<(d0, d1, d2, d3, d4) -> (d1, d2, d3, d4)> +// CHECK-DAG: #[[$map9:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-DAG: #[[$map10:.*]] = affine_map<(d0, d1, d2, d3) -> ()> + +func @conv_1d(%arg0: memref<3xf32>, %arg1: memref<3xf32>, %arg2: memref) { + linalg.conv_1d %arg0, %arg1, %arg2 : (memref<3xf32>, memref<3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_1d +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]]], %[[cst]] : memref<3xf32>, vector<3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map0]], #[[$map0]], #[[$map1]]], iterator_types = ["reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3xf32>, vector<3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]]] : memref +// CHECK: return + +func @conv_1d_ncw(%arg0: memref<1x3x3xf32>, %arg1: memref<1x3x3xf32>, %arg2: memref) { + linalg.conv_1d_ncw %arg0, %arg1, %arg2 : (memref<1x3x3xf32>, memref<1x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_1d_ncw +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3xf32>, vector<3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + + +func @conv_1d_nwc(%arg0: memref<1x3x3xf32>, %arg1: memref<1x3x3xf32>, %arg2: memref) { + linalg.conv_1d_nwc %arg0, %arg1, %arg2 : (memref<1x3x3xf32>, memref<1x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_1d_nwc +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3xf32>, vector<3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_2d(%arg0: memref<3x3xf32>, %arg1: memref<3x3xf32>, %arg2: memref) { + linalg.conv_2d %arg0, %arg1, %arg2 : (memref<3x3xf32>, memref<3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_2d +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]]], %[[cst]] : memref<3x3xf32>, vector<3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_2d_nchw(%arg0: memref<1x3x3x3xf32>, %arg1: memref<1x3x3x3xf32>, %arg2: memref) { + linalg.conv_2d_nchw %arg0, %arg1, %arg2 : (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_2d_nchw +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3xf32>, vector<3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_2d_nhwc(%arg0: memref<1x3x3x3xf32>, %arg1: memref<1x3x3x3xf32>, %arg2: memref) { + linalg.conv_2d_nhwc %arg0, %arg1, %arg2 : (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_2d_nhwc +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3xf32>, vector<3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_3d(%arg0: memref<3x3x3xf32>, %arg1: memref<3x3x3xf32>, %arg2: memref) { + linalg.conv_3d %arg0, %arg1, %arg2 : (memref<3x3x3xf32>, memref<3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_3d +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<3x3x3xf32>, vector<3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_3d_ncdhw(%arg0: memref<1x3x3x3x3xf32>, %arg1: memref<1x3x3x3x3xf32>, %arg2: memref) { + linalg.conv_3d_ncdhw %arg0, %arg1, %arg2 : (memref<1x3x3x3x3xf32>, memref<1x3x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_3d_ncdhw +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3x3xf32>, vector<3x3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map9]], #[[$map9]], #[[$map10]]], iterator_types = ["reduction", "reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3x3xf32>, vector<3x3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_3d_ndhwc(%arg0: memref<1x3x3x3x3xf32>, %arg1: memref<1x3x3x3x3xf32>, %arg2: memref) { + linalg.conv_3d_ndhwc %arg0, %arg1, %arg2 : (memref<1x3x3x3x3xf32>, memref<1x3x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_3d_ndhwc +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3x3xf32>, vector<3x3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map9]], #[[$map9]], #[[$map10]]], iterator_types = ["reduction", "reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3x3xf32>, vector<3x3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir index bf8e74e5143ed0..4168634f1240dd 100644 --- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir +++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir @@ -94,7 +94,7 @@ func @const_shape() -> tensor { // CHECK: %[[C1:.*]] = constant 1 : index // CHECK: %[[C2:.*]] = constant 2 : index // CHECK: %[[C3:.*]] = constant 3 : index - // CHECK: %[[TENSOR3:.*]] = tensor_from_elements(%[[C1]], %[[C2]], %[[C3]]) + // CHECK: %[[TENSOR3:.*]] = tensor_from_elements %[[C1]], %[[C2]], %[[C3]] // CHECK: %[[RESULT:.*]] = tensor_cast %[[TENSOR3]] : tensor<3xindex> to tensor // CHECK: return %[[RESULT]] : tensor %shape = shape.const_shape [1, 2, 3] : tensor @@ -191,14 +191,11 @@ func @shape_of(%arg : tensor<*xf32>) { // CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) func @shape_of_unranked(%arg : tensor<*xf32>) { // CHECK: %[[RANK:.*]] = rank %[[ARG]] : tensor<*xf32> - // CHECK: %[[SHAPE_MEM:.*]] = alloca(%[[RANK]]) : memref - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] { - // CHECK: %[[DIM:.]] = dim %[[ARG]], %[[I]] : tensor<*xf32> - // CHECK: store %[[DIM]], %[[SHAPE_MEM]][%[[I]]] : memref - // CHECK: } - // CHECK: %[[SHAPE:.*]] = tensor_load %[[SHAPE_MEM]] : memref + // CHECK: %[[SHAPE:.*]] = dynamic_tensor_from_elements %[[RANK]] { + // CHECK: ^bb0(%[[I:.*]]: index): + // CHECK: %[[EXTENT:.*]] = dim %[[ARG]], %[[I]] : tensor<*xf32> + // CHECK: yield %[[EXTENT]] : index + // CHECK: } : tensor %shape = shape.shape_of %arg : tensor<*xf32> -> tensor return } @@ -223,7 +220,7 @@ func @shape_of_stat(%arg : tensor<1x2x3xf32>) { // CHECK-DAG: %[[C1:.*]] = constant 1 : index // CHECK-DAG: %[[C2:.*]] = constant 2 : index // CHECK-DAG: %[[C3:.*]] = constant 3 : index - // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C2]], %[[C3]]) : tensor<3xindex> + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements %[[C1]], %[[C2]], %[[C3]] : tensor<3xindex> %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> tensor return } @@ -238,7 +235,7 @@ func @shape_of_dyn(%arg : tensor<1x5x?xf32>) { // CHECK-DAG: %[[C5:.*]] = constant 5 : index // CHECK-DAG: %[[C2:.*]] = constant 2 : index // CHECK-DAG: %[[DYN_DIM:.*]] = dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32> - // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C5]], %[[DYN_DIM]]) : tensor<3xindex> + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements %[[C1]], %[[C5]], %[[DYN_DIM]] : tensor<3xindex> %shape = shape.shape_of %arg : tensor<1x5x?xf32> -> tensor return } diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir index 240925baf3d8cd..ef1b2e995053c2 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir @@ -15,11 +15,13 @@ func @materialize_read_1d() { %ip3 = affine.apply affine_map<(d0) -> (d0 + 3)> (%i1) %f4 = vector.transfer_read %A[%i0, %ip3], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32> // Both accesses in the load must be clipped otherwise %i1 + 2 and %i1 + 3 will go out of bounds. - // CHECK: {{.*}} = select - // CHECK: %[[FILTERED1:.*]] = select - // CHECK: {{.*}} = select - // CHECK: %[[FILTERED2:.*]] = select - // CHECK: %{{.*}} = load {{.*}}[%[[FILTERED1]], %[[FILTERED2]]] : memref<7x42xf32> + // CHECK: scf.if + // CHECK-NEXT: load + // CHECK-NEXT: vector.insertelement + // CHECK-NEXT: store + // CHECK-NEXT: else + // CHECK-NEXT: vector.insertelement + // CHECK-NEXT: store } } return @@ -53,7 +55,6 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d // ----- // CHECK: #[[$ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECK: #[[$SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)> // CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_read(%M: index, %N: index, %O: index, %P: index) { @@ -72,37 +73,18 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] { - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L0:.*]] = select - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L1:.*]] = select - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L2:.*]] = select - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L3:.*]] = select - // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] - // - // CHECK-DAG: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref - // CHECK-DAG: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> - // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> - // CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK: %[[VIDX:.*]] = index_cast %[[I4]] + // CHECK: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK: %[[L0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) + // CHECK: %[[L3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) + // CHECK-NEXT: scf.if + // CHECK-NEXT: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[I1]], %[[I2]], %[[L3]]] : memref + // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> + // CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: } else { + // CHECK-NEXT: %[[CVEC:.*]] = vector.insertelement + // CHECK-NEXT: store %[[CVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } @@ -132,7 +114,6 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // ----- // CHECK: #[[$ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECK: #[[$SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)> // CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_write(%M: index, %N: index, %O: index, %P: index) { @@ -153,37 +134,15 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] { - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[S0:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I1]], %[[I5]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[S1:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", %[[I2]], %{{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, %[[I2]], {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", %[[I2]], %[[C0]] : index - // CHECK-NEXT: %[[S2:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[S3:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] - // - // CHECK-NEXT: %[[VEC:.*]] = load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> - // CHECK-NEXT: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> - // CHECK-NEXT: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[S2]], %[[S3]]] : memref + // CHECK: %[[VIDX:.*]] = index_cast %[[I4]] + // CHECK: %[[S0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) + // CHECK: %[[S1:.*]] = affine.apply #[[$ADD]](%[[I1]], %[[I5]]) + // CHECK: %[[S3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) + // CHECK-NEXT: scf.if + // CHECK-NEXT: %[[VEC:.*]] = load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> + // CHECK: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[I2]], %[[S3]]] : memref + // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } diff --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir index 775188bf0ed991..134daa303ed86f 100644 --- a/mlir/test/Dialect/SCF/loop-unroll.mlir +++ b/mlir/test/Dialect/SCF/loop-unroll.mlir @@ -2,6 +2,7 @@ // RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=3' | FileCheck %s --check-prefix UNROLL-BY-3 // RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=0' | FileCheck %s --check-prefix UNROLL-OUTER-BY-2 // RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2 +// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=6 unroll-up-to-factor=true' | FileCheck %s --check-prefix UNROLL-UP-TO func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index, %arg3: memref) { @@ -248,3 +249,24 @@ func @static_loop_unroll_by_3_promote_epilogue(%arg0 : memref) { // UNROLL-BY-3-NEXT: } // UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[C9]]] : memref // UNROLL-BY-3-NEXT: return + + +// Test unroll-up-to functionality. +func @static_loop_unroll_up_to_factor(%arg0 : memref) { + %0 = constant 7.0 : f32 + %lb = constant 0 : index + %ub = constant 2 : index + affine.for %i0 = %lb to %ub { + store %0, %arg0[%i0] : memref + } + return +} +// UNROLL-UP-TO-LABEL: func @static_loop_unroll_up_to_factor +// UNROLL-UP-TO-SAME: %[[MEM:.*0]]: memref +// UNROLL-UP-TO-DAG: %[[C0:.*]] = constant 0 : index +// UNROLL-UP-TO-DAG: %[[C2:.*]] = constant 2 : index +// UNROLL-UP-TO-NEXT: %[[V0:.*]] = affine.apply {{.*}} +// UNROLL-UP-TO-NEXT: store %{{.*}}, %[[MEM]][%[[V0]]] : memref +// UNROLL-UP-TO-NEXT: %[[V1:.*]] = affine.apply {{.*}} +// UNROLL-UP-TO-NEXT: tore %{{.*}}, %[[MEM]][%[[V1]]] : memref +// UNROLL-UP-TO-NEXT: return diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index 69e974bc41734d..e4472b444f0344 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -661,17 +661,17 @@ func @extract_element(%arg0: tensor<*xi32>, %arg1 : tensor<4x4xf32>) -> i32 { // CHECK-LABEL: func @tensor_from_elements() { func @tensor_from_elements() { %c0 = "std.constant"() {value = 0: index} : () -> index - // CHECK: %0 = tensor_from_elements(%c0) : tensor<1xindex> - %0 = tensor_from_elements(%c0) : tensor<1xindex> + // CHECK: %0 = tensor_from_elements %c0 : tensor<1xindex> + %0 = tensor_from_elements %c0 : tensor<1xindex> %c1 = "std.constant"() {value = 1: index} : () -> index - // CHECK: %1 = tensor_from_elements(%c0, %c1) : tensor<2xindex> - %1 = tensor_from_elements(%c0, %c1) : tensor<2xindex> + // CHECK: %1 = tensor_from_elements %c0, %c1 : tensor<2xindex> + %1 = tensor_from_elements %c0, %c1 : tensor<2xindex> %c0_f32 = "std.constant"() {value = 0.0: f32} : () -> f32 // CHECK: [[C0_F32:%.*]] = constant - // CHECK: %2 = tensor_from_elements([[C0_F32]]) : tensor<1xf32> - %2 = tensor_from_elements(%c0_f32) : tensor<1xf32> + // CHECK: %2 = tensor_from_elements [[C0_F32]] : tensor<1xf32> + %2 = tensor_from_elements %c0_f32 : tensor<1xf32> return } diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index 55739119aa26d6..71b007ef6e39f3 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -597,7 +597,7 @@ func @extract_element_tensor_too_few_indices(%t : tensor<2x3xf32>, %i : index) { func @tensor_from_elements_wrong_result_type() { // expected-error@+2 {{expected result type to be a ranked tensor}} %c0 = constant 0 : i32 - %0 = tensor_from_elements(%c0) : tensor<*xi32> + %0 = tensor_from_elements %c0 : tensor<*xi32> return } @@ -606,7 +606,7 @@ func @tensor_from_elements_wrong_result_type() { func @tensor_from_elements_wrong_elements_count() { // expected-error@+2 {{expected result type to be a 1D tensor with 1 element}} %c0 = constant 0 : index - %0 = tensor_from_elements(%c0) : tensor<2xindex> + %0 = tensor_from_elements %c0 : tensor<2xindex> return } diff --git a/mlir/test/IR/print-ir-defuse.mlir b/mlir/test/IR/print-ir-defuse.mlir new file mode 100644 index 00000000000000..78c5804119250a --- /dev/null +++ b/mlir/test/IR/print-ir-defuse.mlir @@ -0,0 +1,31 @@ +// RUN: mlir-opt -test-print-defuse -allow-unregistered-dialect %s | FileCheck %s + +// CHECK: Visiting op 'dialect.op1' with 0 operands: +// CHECK: Has 4 results: +// CHECK: - Result 0 has a single use: - dialect.op2 +// CHECK: - Result 1 has no uses +// CHECK: - Result 2 has 2 uses: +// CHECK: - dialect.innerop1 +// CHECK: - dialect.op2 +// CHECK: - Result 3 has no uses +// CHECK: Visiting op 'dialect.op2' with 2 operands: +// CHECK: - Operand produced by operation 'dialect.op1' +// CHECK: - Operand produced by operation 'dialect.op1' +// CHECK: Has 0 results: +// CHECK: Visiting op 'dialect.innerop1' with 2 operands: +// CHECK: - Operand produced by Block argument, number 0 +// CHECK: - Operand produced by operation 'dialect.op1' +// CHECK: Has 0 results: +// CHECK: Visiting op 'dialect.op3' with 0 operands: +// CHECK: Has 0 results: +// CHECK: Visiting op 'module_terminator' with 0 operands: +// CHECK: Has 0 results: +// CHECK: Visiting op 'module' with 0 operands: +// CHECK: Has 0 results: + +%results:4 = "dialect.op1"() : () -> (i1, i16, i32, i64) +"dialect.op2"(%results#0, %results#2) : (i1, i32) -> () +"dialect.op3"() ({ + ^bb0(%arg0 : i1): + "dialect.innerop1"(%arg0, %results#2) : (i1, i32) -> () +}) : () -> () diff --git a/mlir/test/IR/print-ir-nesting.mlir b/mlir/test/IR/print-ir-nesting.mlir new file mode 100644 index 00000000000000..4682753947550c --- /dev/null +++ b/mlir/test/IR/print-ir-nesting.mlir @@ -0,0 +1,57 @@ +// RUN: mlir-opt -test-print-nesting -allow-unregistered-dialect %s | FileCheck %s + +// CHECK: visiting op: 'module' with 0 operands and 0 results +// CHECK: 1 nested regions: +// CHECK: Region with 1 blocks: +// CHECK: Block with 0 arguments, 0 successors, and 3 operations +module { + + +// CHECK: visiting op: 'dialect.op1' with 0 operands and 4 results +// CHECK: 1 attributes: +// CHECK: - 'attribute name' : '42 : i32' +// CHECK: 0 nested regions: + %results:4 = "dialect.op1"() { "attribute name" = 42 : i32 } : () -> (i1, i16, i32, i64) + + +// CHECK: visiting op: 'dialect.op2' with 0 operands and 0 results +// CHECK: 2 nested regions: + "dialect.op2"() ({ + +// CHECK: Region with 1 blocks: +// CHECK: Block with 0 arguments, 0 successors, and 1 operations +// CHECK: visiting op: 'dialect.innerop1' with 2 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop1"(%results#0, %results#1) : (i1, i16) -> () + +// CHECK: Region with 3 blocks: + },{ + +// CHECK: Block with 0 arguments, 2 successors, and 2 operations +// CHECK: visiting op: 'dialect.innerop2' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop2"() : () -> () +// CHECK: visiting op: 'dialect.innerop3' with 3 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop3"(%results#0, %results#2, %results#3)[^bb1, ^bb2] : (i1, i32, i64) -> () +// CHECK: Block with 1 arguments, 0 successors, and 2 operations + ^bb1(%arg1 : i32): +// CHECK: visiting op: 'dialect.innerop4' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop4"() : () -> () +// CHECK: visiting op: 'dialect.innerop5' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop5"() : () -> () +// CHECK: Block with 1 arguments, 0 successors, and 2 operations + ^bb2(%arg2 : i64): +// CHECK: visiting op: 'dialect.innerop6' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop6"() : () -> () +// CHECK: visiting op: 'dialect.innerop7' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop7"() : () -> () + }) : () -> () + +// CHECK: visiting op: 'module_terminator' with 0 operands and 0 results + +} // module diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir index e1ed2c4309c3db..dc9ff44bf4838e 100644 --- a/mlir/test/Transforms/buffer-placement.mlir +++ b/mlir/test/Transforms/buffer-placement.mlir @@ -1125,3 +1125,295 @@ func @nestedRegionControlFlowAlloca( // CHECK: %[[ALLOCA:.*]] = alloca(%arg0, %arg1) // CHECK-NEXT: scf.yield %[[ALLOC0]] // CHECK: return %[[ALLOC1]] + +// ----- + +// Test Case: structured control-flow loop using a nested alloc. +// The alloc positions of %3 will not be changed, but the iteration argument +// %iterBuf has to be freed before yielding %3 to avoid memory leaks. + +// ----- + +// CHECK-LABEL: func @loop_alloc +func @loop_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = alloc() : memref<2xf32> + scf.yield %3 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC0]] +// CHECK-NEXT: %[[ALLOC1:.*]] = alloc() +// CHECK: linalg.copy(%arg3, %[[ALLOC1]]) +// CHECK: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = %[[ALLOC1]] +// CHECK: cmpi +// CHECK: dealloc %[[IALLOC]] +// CHECK: %[[ALLOC3:.*]] = alloc() +// CHECK: %[[ALLOC4:.*]] = alloc() +// CHECK: linalg.copy(%[[ALLOC3]], %[[ALLOC4]]) +// CHECK: dealloc %[[ALLOC3]] +// CHECK: scf.yield %[[ALLOC4]] +// CHECK: } +// CHECK: linalg.copy(%[[ALLOC2]], %arg4) +// CHECK-NEXT: dealloc %[[ALLOC2]] + +// ----- + +// Test Case: structured control-flow loop with a nested if operation. +// The loop yields buffers that have been defined outside of the loop and the +// backeges only use the iteration arguments (or one of its aliases). +// Therefore, we do not have to (and are not allowed to) free any buffers +// that are passed via the backedges. + +// CHECK-LABEL: func @loop_nested_if_no_alloc +func @loop_nested_if_no_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = scf.if %2 -> (memref<2xf32>) { + scf.yield %0 : memref<2xf32> + } else { + scf.yield %iterBuf : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = +// CHECK: %[[ALLOC2:.*]] = scf.if +// CHECK: scf.yield %[[ALLOC0]] +// CHECK: scf.yield %[[IALLOC]] +// CHECK: scf.yield %[[ALLOC2]] +// CHECK: linalg.copy(%[[ALLOC1]], %arg4) +// CHECK: dealloc %[[ALLOC0]] + +// ----- + +// Test Case: structured control-flow loop with a nested if operation using +// a deeply nested buffer allocation. +// Since the innermost allocation happens in a divergent branch, we have to +// introduce additional copies for the nested if operation. Since the loop's +// yield operation "returns" %3, it will return a newly allocated buffer. +// Therefore, we have to free the iteration argument %iterBuf before +// "returning" %3. + +// CHECK-LABEL: func @loop_nested_if_alloc +func @loop_nested_if_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>) -> memref<2xf32> { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = scf.if %2 -> (memref<2xf32>) { + %4 = alloc() : memref<2xf32> + scf.yield %4 : memref<2xf32> + } else { + scf.yield %0 : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + return %1 : memref<2xf32> +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK: %[[ALLOC1:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]]) +// CHECK-NEXT: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = %[[ALLOC1]] +// CHECK: dealloc %[[IALLOC]] +// CHECK: %[[ALLOC3:.*]] = scf.if + +// CHECK: %[[ALLOC4:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC5:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC4]], %[[ALLOC5]]) +// CHECK-NEXT: dealloc %[[ALLOC4]] +// CHECK-NEXT: scf.yield %[[ALLOC5]] + +// CHECK: %[[ALLOC6:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[ALLOC6]]) +// CHECK-NEXT: scf.yield %[[ALLOC6]] + +// CHECK: %[[ALLOC7:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC3:.*]], %[[ALLOC7]]) +// CHECK-NEXT: dealloc %[[ALLOC3]] +// CHECK-NEXT: scf.yield %[[ALLOC7]] + +// CHECK: dealloc %[[ALLOC0]] +// CHECK-NEXT: return %[[ALLOC2]] + +// ----- + +// Test Case: several nested structured control-flow loops with a deeply nested +// buffer allocation inside an if operation. +// Same behavior is an loop_nested_if_alloc: we have to insert deallocations +// before each yield in all loops recursively. + +// CHECK-LABEL: func @loop_nested_alloc +func @loop_nested_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = scf.for %i2 = %lb to %ub step %step + iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> { + %3 = scf.for %i3 = %lb to %ub step %step + iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> { + %4 = alloc() : memref<2xf32> + %5 = cmpi "eq", %i, %ub : index + %6 = scf.if %5 -> (memref<2xf32>) { + %7 = alloc() : memref<2xf32> + scf.yield %7 : memref<2xf32> + } else { + scf.yield %iterBuf3 : memref<2xf32> + } + scf.yield %6 : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + scf.yield %2 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC0]] +// CHECK-NEXT: %[[ALLOC1:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]]) +// CHECK-NEXT: %[[VAL_7:.*]] = scf.for {{.*}} iter_args(%[[IALLOC0:.*]] = %[[ALLOC1]]) +// CHECK: %[[ALLOC2:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[IALLOC0]], %[[ALLOC2]]) +// CHECK-NEXT: dealloc %[[IALLOC0]] +// CHECK-NEXT: %[[ALLOC3:.*]] = scf.for {{.*}} iter_args(%[[IALLOC1:.*]] = %[[ALLOC2]]) +// CHECK: %[[ALLOC5:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[IALLOC1]], %[[ALLOC5]]) +// CHECK-NEXT: dealloc %[[IALLOC1]] + +// CHECK: %[[ALLOC6:.*]] = scf.for {{.*}} iter_args(%[[IALLOC2:.*]] = %[[ALLOC5]]) +// CHECK: %[[ALLOC8:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC8]] +// CHECK: %[[ALLOC9:.*]] = scf.if + +// CHECK: %[[ALLOC11:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC12:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC11]], %[[ALLOC12]]) +// CHECK-NEXT: dealloc %[[ALLOC11]] +// CHECK-NEXT: scf.yield %[[ALLOC12]] + +// CHECK: %[[ALLOC13:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[IALLOC2]], %[[ALLOC13]]) +// CHECK-NEXT: scf.yield %[[ALLOC13]] + +// CHECK: dealloc %[[IALLOC2]] +// CHECK-NEXT: %[[ALLOC10:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC9]], %[[ALLOC10]]) +// CHECK-NEXT: dealloc %[[ALLOC9]] +// CHECK-NEXT: scf.yield %[[ALLOC10]] + +// CHECK: %[[ALLOC7:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC6]], %[[ALLOC7]]) +// CHECK-NEXT: dealloc %[[ALLOC6]] +// CHECK-NEXT: scf.yield %[[ALLOC7]] + +// CHECK: %[[ALLOC4:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC3]], %[[ALLOC4]]) +// CHECK-NEXT: dealloc %[[ALLOC3]] +// CHECK-NEXT: scf.yield %[[ALLOC4]] + +// CHECK: linalg.copy(%[[VAL_7]], %arg4) +// CHECK-NEXT: dealloc %[[VAL_7]] + +// ----- + +// Test Case: explicit control-flow loop with a dynamically allocated buffer. +// The BufferPlacement transformation should fail on this explicit +// control-flow loop since they are not supported. + +// CHECK-LABEL: func @loop_dynalloc +func @loop_dynalloc( + %arg0 : i32, + %arg1 : i32, + %arg2: memref, + %arg3: memref) { + %const0 = constant 0 : i32 + br ^loopHeader(%const0, %arg2 : i32, memref) + +^loopHeader(%i : i32, %buff : memref): + %lessThan = cmpi "slt", %i, %arg1 : i32 + cond_br %lessThan, + ^loopBody(%i, %buff : i32, memref), + ^exit(%buff : memref) + +^loopBody(%val : i32, %buff2: memref): + %const1 = constant 1 : i32 + %inc = addi %val, %const1 : i32 + %size = std.index_cast %inc : i32 to index + %alloc1 = alloc(%size) : memref + br ^loopHeader(%inc, %alloc1 : i32, memref) + +^exit(%buff3 : memref): + "linalg.copy"(%buff3, %arg3) : (memref, memref) -> () + return +} + +// expected-error@+1 {{Structured control-flow loops are supported only}} + +// ----- + +// Test Case: explicit control-flow loop with a dynamically allocated buffer. +// The BufferPlacement transformation should fail on this explicit +// control-flow loop since they are not supported. + +// CHECK-LABEL: func @do_loop_alloc +func @do_loop_alloc( + %arg0 : i32, + %arg1 : i32, + %arg2: memref<2xf32>, + %arg3: memref<2xf32>) { + %const0 = constant 0 : i32 + br ^loopBody(%const0, %arg2 : i32, memref<2xf32>) + +^loopBody(%val : i32, %buff2: memref<2xf32>): + %const1 = constant 1 : i32 + %inc = addi %val, %const1 : i32 + %alloc1 = alloc() : memref<2xf32> + br ^loopHeader(%inc, %alloc1 : i32, memref<2xf32>) + +^loopHeader(%i : i32, %buff : memref<2xf32>): + %lessThan = cmpi "slt", %i, %arg1 : i32 + cond_br %lessThan, + ^loopBody(%i, %buff : i32, memref<2xf32>), + ^exit(%buff : memref<2xf32>) + +^exit(%buff3 : memref<2xf32>): + "linalg.copy"(%buff3, %arg3) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// expected-error@+1 {{Structured control-flow loops are supported only}} diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir index 7333446c6e5d93..76fe82588be3e8 100644 --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -981,7 +981,7 @@ func @memref_cast_folding_subview_static(%V: memref<16x16xf32>, %a: index, %b: i func @extract_element_from_tensor_from_elements(%element : index) -> index { // CHECK-SAME: ([[ARG:%.*]]: index) %c0 = constant 0 : index - %tensor = tensor_from_elements(%element) : tensor<1xindex> + %tensor = tensor_from_elements %element : tensor<1xindex> %extracted_element = extract_element %tensor[%c0] : tensor<1xindex> // CHECK: [[ARG]] : index return %extracted_element : index diff --git a/mlir/test/Transforms/copy-removal.mlir b/mlir/test/Transforms/copy-removal.mlir index f750dabb18a048..a0d1193b77d58d 100644 --- a/mlir/test/Transforms/copy-removal.mlir +++ b/mlir/test/Transforms/copy-removal.mlir @@ -283,3 +283,67 @@ func @test_ReuseCopyTargetAsSource(%arg0: memref<2xf32>){ dealloc %temp : memref<2xf32> return } + +// ----- + +// The only redundant copy is linalg.copy(%4, %5) + +// CHECK-LABEL: func @loop_alloc +func @loop_alloc(%arg0: index, %arg1: index, %arg2: index, %arg3: memref<2xf32>, %arg4: memref<2xf32>) { + // CHECK: %{{.*}} = alloc() + %0 = alloc() : memref<2xf32> + dealloc %0 : memref<2xf32> + // CHECK: %{{.*}} = alloc() + %1 = alloc() : memref<2xf32> + // CHECK: linalg.copy + linalg.copy(%arg3, %1) : memref<2xf32>, memref<2xf32> + %2 = scf.for %arg5 = %arg0 to %arg1 step %arg2 iter_args(%arg6 = %1) -> (memref<2xf32>) { + %3 = cmpi "eq", %arg5, %arg1 : index + // CHECK: dealloc + dealloc %arg6 : memref<2xf32> + // CHECK: %[[PERCENT4:.*]] = alloc() + %4 = alloc() : memref<2xf32> + // CHECK-NOT: alloc + // CHECK-NOT: linalg.copy + // CHECK-NOT: dealloc + %5 = alloc() : memref<2xf32> + linalg.copy(%4, %5) : memref<2xf32>, memref<2xf32> + dealloc %4 : memref<2xf32> + // CHECK: %[[PERCENT6:.*]] = alloc() + %6 = alloc() : memref<2xf32> + // CHECK: linalg.copy(%[[PERCENT4]], %[[PERCENT6]]) + linalg.copy(%5, %6) : memref<2xf32>, memref<2xf32> + scf.yield %6 : memref<2xf32> + } + // CHECK: linalg.copy + linalg.copy(%2, %arg4) : memref<2xf32>, memref<2xf32> + dealloc %2 : memref<2xf32> + return +} + +// ----- + +// The linalg.copy operation can be removed in addition to alloc and dealloc +// operations. All uses of %0 is then replaced with %arg2. + +// CHECK-LABEL: func @check_with_affine_dialect +func @check_with_affine_dialect(%arg0: memref<4xf32>, %arg1: memref<4xf32>, %arg2: memref<4xf32>) { + // CHECK-SAME: (%[[ARG0:.*]]: memref<4xf32>, %[[ARG1:.*]]: memref<4xf32>, %[[RES:.*]]: memref<4xf32>) + // CHECK-NOT: alloc + %0 = alloc() : memref<4xf32> + affine.for %arg3 = 0 to 4 { + %5 = affine.load %arg0[%arg3] : memref<4xf32> + %6 = affine.load %arg1[%arg3] : memref<4xf32> + %7 = cmpf "ogt", %5, %6 : f32 + // CHECK: %[[SELECT_RES:.*]] = select + %8 = select %7, %5, %6 : f32 + // CHECK-NEXT: affine.store %[[SELECT_RES]], %[[RES]] + affine.store %8, %0[%arg3] : memref<4xf32> + } + // CHECK-NOT: linalg.copy + // CHECK-NOT: dealloc + "linalg.copy"(%0, %arg2) : (memref<4xf32>, memref<4xf32>) -> () + dealloc %0 : memref<4xf32> + //CHECK: return + return +} diff --git a/mlir/test/lib/IR/CMakeLists.txt b/mlir/test/lib/IR/CMakeLists.txt index f77b26e5ca1848..cf4ecada0f3cb5 100644 --- a/mlir/test/lib/IR/CMakeLists.txt +++ b/mlir/test/lib/IR/CMakeLists.txt @@ -3,6 +3,8 @@ add_mlir_library(MLIRTestIR TestFunc.cpp TestInterfaces.cpp TestMatchers.cpp + TestPrintDefUse.cpp + TestPrintNesting.cpp TestSideEffects.cpp TestSymbolUses.cpp TestTypes.cpp diff --git a/mlir/test/lib/IR/TestPrintDefUse.cpp b/mlir/test/lib/IR/TestPrintDefUse.cpp new file mode 100644 index 00000000000000..3153a148477a9c --- /dev/null +++ b/mlir/test/lib/IR/TestPrintDefUse.cpp @@ -0,0 +1,71 @@ +//===- TestPrintDefUse.cpp - Passes to illustrate the IR def-use chains ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Function.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; + +namespace { +/// This pass illustrates the IR def-use chains through printing. +struct TestPrintDefUsePass + : public PassWrapper> { + void runOnOperation() override { + // Recursively traverse the IR nested under the current operation and print + // every single operation and their operands and users. + getOperation()->walk([](Operation *op) { + llvm::outs() << "Visiting op '" << op->getName() << "' with " + << op->getNumOperands() << " operands:\n"; + + // Print information about the producer of each of the operands. + for (Value operand : op->getOperands()) { + if (Operation *producer = operand.getDefiningOp()) { + llvm::outs() << " - Operand produced by operation '" + << producer->getName() << "'\n"; + } else { + // If there is no defining op, the Value is necessarily a Block + // argument. + auto blockArg = operand.cast(); + llvm::outs() << " - Operand produced by Block argument, number " + << blockArg.getArgNumber() << "\n"; + } + } + + // Print information about the user of each of the result. + llvm::outs() << "Has " << op->getNumResults() << " results:\n"; + for (auto indexedResult : llvm::enumerate(op->getResults())) { + Value result = indexedResult.value(); + llvm::outs() << " - Result " << indexedResult.index(); + if (result.use_empty()) { + llvm::outs() << " has no uses\n"; + continue; + } + if (result.hasOneUse()) { + llvm::outs() << " has a single use: "; + } else { + llvm::outs() << " has " + << std::distance(result.getUses().begin(), + result.getUses().end()) + << " uses:\n"; + } + for (Operation *userOp : result.getUsers()) { + llvm::outs() << " - " << userOp->getName() << "\n"; + } + } + }); + } +}; +} // end anonymous namespace + +namespace mlir { +void registerTestPrintDefUsePass() { + PassRegistration("test-print-defuse", + "Test various printing."); +} +} // namespace mlir diff --git a/mlir/test/lib/IR/TestPrintNesting.cpp b/mlir/test/lib/IR/TestPrintNesting.cpp new file mode 100644 index 00000000000000..825d241740fda0 --- /dev/null +++ b/mlir/test/lib/IR/TestPrintNesting.cpp @@ -0,0 +1,96 @@ +//===- TestPrintNesting.cpp - Passes to illustrate the IR nesting ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Function.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; + +namespace { +/// This pass illustrates the IR nesting through printing. +struct TestPrintNestingPass + : public PassWrapper> { + // Entry point for the pass. + void runOnOperation() override { + Operation *op = getOperation(); + resetIndent(); + printOperation(op); + } + + /// The three methods below are mutually recursive and follow the nesting of + /// the IR: operation->region->block->operation->... + + void printOperation(Operation *op) { + // Print the operation itself and some of its properties + printIndent() << "visiting op: '" << op->getName() << "' with " + << op->getNumOperands() << " operands and " + << op->getNumResults() << " results\n"; + // Print the operation attributes + if (!op->getAttrs().empty()) { + printIndent() << op->getAttrs().size() << " attributes:\n"; + for (NamedAttribute attr : op->getAttrs()) + printIndent() << " - '" << attr.first << "' : '" << attr.second + << "'\n"; + } + + // Recurse into each of the regions attached to the operation. + printIndent() << " " << op->getNumRegions() << " nested regions:\n"; + auto indent = pushIndent(); + for (Region ®ion : op->getRegions()) + printRegion(region); + } + + void printRegion(Region ®ion) { + // A region does not hold anything by itself other than a list of blocks. + printIndent() << "Region with " << region.getBlocks().size() + << " blocks:\n"; + auto indent = pushIndent(); + for (Block &block : region.getBlocks()) + printBlock(block); + } + + void printBlock(Block &block) { + // Print the block intrinsics properties (basically: argument list) + printIndent() + << "Block with " << block.getNumArguments() << " arguments, " + << block.getNumSuccessors() + << " successors, and " + // Note, this `.size()` is traversing a linked-list and is O(n). + << block.getOperations().size() << " operations\n"; + + // Block main role is to hold a list of Operations: let's recurse. + auto indent = pushIndent(); + for (Operation &op : block.getOperations()) + printOperation(&op); + } + + /// Manages the indentation as we traverse the IR nesting. + int indent; + struct IdentRAII { + int &indent; + IdentRAII(int &indent) : indent(indent) {} + ~IdentRAII() { --indent; } + }; + void resetIndent() { indent = 0; } + IdentRAII pushIndent() { return IdentRAII(++indent); } + + llvm::raw_ostream &printIndent() { + for (int i = 0; i < indent; ++i) + llvm::outs() << " "; + return llvm::outs(); + } +}; +} // end anonymous namespace + +namespace mlir { +void registerTestPrintNestingPass() { + PassRegistration("test-print-nesting", + "Test various printing."); +} +} // namespace mlir diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt index de894467d63d43..3ac1e7c5523508 100644 --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -5,6 +5,7 @@ add_mlir_library(MLIRTestTransforms TestExpandTanh.cpp TestCallGraph.cpp TestConstantFold.cpp + TestConvVectorization.cpp TestConvertCallOp.cpp TestConvertGPUKernelToCubin.cpp TestConvertGPUKernelToHsaco.cpp diff --git a/mlir/test/lib/Transforms/TestBufferPlacement.cpp b/mlir/test/lib/Transforms/TestBufferPlacement.cpp index 14b72b9fc92a02..c338f0f37c4eae 100644 --- a/mlir/test/lib/Transforms/TestBufferPlacement.cpp +++ b/mlir/test/lib/Transforms/TestBufferPlacement.cpp @@ -65,11 +65,6 @@ struct TestBufferPlacementPreparationPass op, "dynamic shapes not currently supported"); auto memrefType = MemRefType::get(type.getShape(), type.getElementType()); - - // Compute alloc position and insert a custom allocation node. - OpBuilder::InsertionGuard guard(rewriter); - rewriter.restoreInsertionPoint( - bufferAssignment->computeAllocPosition(result)); auto alloc = rewriter.create(loc, memrefType); newArgs.push_back(alloc); newResults.push_back(alloc); @@ -110,13 +105,12 @@ struct TestBufferPlacementPreparationPass }; void populateTensorLinalgToBufferLinalgConversionPattern( - MLIRContext *context, BufferAssignmentPlacer *placer, - BufferAssignmentTypeConverter *converter, + MLIRContext *context, BufferAssignmentTypeConverter *converter, OwningRewritePatternList *patterns) { populateWithBufferAssignmentOpConversionPatterns< - mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, placer, - converter, patterns); - patterns->insert(context, placer, converter); + mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, converter, + patterns); + patterns->insert(context, converter); } void getDependentDialects(DialectRegistry ®istry) const override { @@ -133,6 +127,8 @@ struct TestBufferPlacementPreparationPass target.addLegalDialect(); target.addLegalOp(); target.addLegalOp(); + target.addLegalOp(); + target.addLegalOp(); // Mark all Linalg operations illegal as long as they work on tensors. auto isLegalOperation = [&](Operation *op) { @@ -191,16 +187,11 @@ struct TestBufferPlacementPreparationPass return success(); }); - // Walk over all the functions to apply buffer assignment. - this->getOperation().walk([&](FuncOp function) -> WalkResult { - OwningRewritePatternList patterns; - BufferAssignmentPlacer placer(function); - populateTensorLinalgToBufferLinalgConversionPattern( - &context, &placer, &converter, &patterns); - - // Applying full conversion - return applyFullConversion(function, target, patterns); - }); + OwningRewritePatternList patterns; + populateTensorLinalgToBufferLinalgConversionPattern(&context, &converter, + &patterns); + if (failed(applyFullConversion(this->getOperation(), target, patterns))) + this->signalPassFailure(); }; }; } // end anonymous namespace diff --git a/mlir/test/lib/Transforms/TestConvVectorization.cpp b/mlir/test/lib/Transforms/TestConvVectorization.cpp new file mode 100644 index 00000000000000..37e509cbbbe1ba --- /dev/null +++ b/mlir/test/lib/Transforms/TestConvVectorization.cpp @@ -0,0 +1,51 @@ +//===- TestConvVectorization.cpp - Linalg to Vector dialect conversion ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +namespace { +/// A pass converting MLIR Linalg ops into Vector ops. +class TestConvVectorization + : public PassWrapper> { + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + registry.insert(); + registry.insert(); + } +}; +} // namespace + +void TestConvVectorization::runOnOperation() { + MLIRContext *context = &getContext(); + ModuleOp module = getOperation(); + + ConversionTarget target(*context); + target.addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); + + OwningRewritePatternList patterns; + linalg::populateConvVectorizationPatterns(context, patterns); + + if (failed(applyPartialConversion(module, target, patterns))) + return signalPassFailure(); +} + +namespace mlir { +void registerTestConvVectorization() { + PassRegistration testTransformPatternsPass( + "test-conv-vectorization", "Test vectorization of convolutions"); +} +} // namespace mlir diff --git a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp b/mlir/test/lib/Transforms/TestLoopUnrolling.cpp index 712fddb97028ed..396f08b2cba323 100644 --- a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp +++ b/mlir/test/lib/Transforms/TestLoopUnrolling.cpp @@ -55,6 +55,9 @@ class TestLoopUnrollingPass Option unrollFactor{*this, "unroll-factor", llvm::cl::desc("Loop unroll factor."), llvm::cl::init(1)}; + Option unrollUpToFactor{*this, "unroll-up-to-factor", + llvm::cl::desc("Loop unroll up to factor."), + llvm::cl::init(false)}; Option loopDepth{*this, "loop-depth", llvm::cl::desc("Loop depth."), llvm::cl::init(0)}; }; diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index ad76abed647e7b..437b5f4b6f1a6f 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -45,6 +45,7 @@ void registerTestAllReduceLoweringPass(); void registerTestBufferPlacementPreparationPass(); void registerTestCallGraphPass(); void registerTestConstantFold(); +void registerTestConvVectorization(); void registerTestConvertGPUKernelToCubinPass(); void registerTestConvertGPUKernelToHsacoPass(); void registerTestDominancePass(); @@ -66,6 +67,8 @@ void registerTestMemRefDependenceCheck(); void registerTestMemRefStrideCalculation(); void registerTestOpaqueLoc(); void registerTestPreparationPassWithAllowedMemrefResults(); +void registerTestPrintDefUsePass(); +void registerTestPrintNestingPass(); void registerTestRecursiveTypesPass(); void registerTestReducer(); void registerTestSpirvEntryPointABIPass(); @@ -91,6 +94,7 @@ void registerTestPasses() { registerTestAffineLoopUnswitchingPass(); registerTestLoopPermutationPass(); registerTestCallGraphPass(); + registerTestConvVectorization(); registerTestConstantFold(); #if MLIR_CUDA_CONVERSIONS_ENABLED registerTestConvertGPUKernelToCubinPass(); @@ -115,6 +119,8 @@ void registerTestPasses() { registerTestMemRefStrideCalculation(); registerTestOpaqueLoc(); registerTestPreparationPassWithAllowedMemrefResults(); + registerTestPrintDefUsePass(); + registerTestPrintNestingPass(); registerTestRecursiveTypesPass(); registerTestReducer(); registerTestGpuParallelLoopMappingPass();