diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h index 826677cd63b22b..af832b4c7c84cf 100644 --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -368,13 +368,6 @@ class RewriteInstance { /// rewritten binary. void patchBuildID(); - /// Return file offset corresponding to a given virtual address. - uint64_t getFileOffsetFor(uint64_t Address) { - assert(Address >= NewTextSegmentAddress && - "address in not in the new text segment"); - return Address - NewTextSegmentAddress + NewTextSegmentOffset; - } - /// Return file offset corresponding to a virtual \p Address. /// Return 0 if the address has no mapping in the file, including being /// part of .bss section. @@ -398,9 +391,6 @@ class RewriteInstance { /// Return true if the section holds debug information. static bool isDebugSection(StringRef SectionName); - /// Return true if the section holds linux kernel symbol information. - static bool isKSymtabSection(StringRef SectionName); - /// Adds Debug section to overwrite. static void addToDebugSectionsToOverwrite(const char *Section) { DebugSectionsToOverwrite.emplace_back(Section); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index fd2477231142e3..4e0096cf988aed 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -5767,10 +5767,3 @@ bool RewriteInstance::isDebugSection(StringRef SectionName) { return false; } - -bool RewriteInstance::isKSymtabSection(StringRef SectionName) { - if (SectionName.starts_with("__ksymtab")) - return true; - - return false; -} diff --git a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp index 2fa7cd0baf98f6..c507043c367a86 100644 --- a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp @@ -85,10 +85,10 @@ void UnnecessaryValueParamCheck::check(const MatchFinder::MatchResult &Result) { TraversalKindScope RAII(*Result.Context, TK_AsIs); - FunctionParmMutationAnalyzer &Analyzer = - MutationAnalyzers.try_emplace(Function, *Function, *Result.Context) - .first->second; - if (Analyzer.isMutated(Param)) + FunctionParmMutationAnalyzer *Analyzer = + FunctionParmMutationAnalyzer::getFunctionParmMutationAnalyzer( + *Function, *Result.Context, MutationAnalyzerCache); + if (Analyzer->isMutated(Param)) return; const bool IsConstQualified = @@ -169,7 +169,7 @@ void UnnecessaryValueParamCheck::storeOptions( } void UnnecessaryValueParamCheck::onEndOfTranslationUnit() { - MutationAnalyzers.clear(); + MutationAnalyzerCache.clear(); } void UnnecessaryValueParamCheck::handleMoveFix(const ParmVarDecl &Var, diff --git a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.h b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.h index 1872e3bc9bf29c..7250bffd20b2f9 100644 --- a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.h +++ b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.h @@ -37,8 +37,7 @@ class UnnecessaryValueParamCheck : public ClangTidyCheck { void handleMoveFix(const ParmVarDecl &Var, const DeclRefExpr &CopyArgument, const ASTContext &Context); - llvm::DenseMap - MutationAnalyzers; + ExprMutationAnalyzer::Memoized MutationAnalyzerCache; utils::IncludeInserter Inserter; const std::vector AllowedTypes; }; diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 4dfbd8ca49ab9b..7095c564444fe6 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -221,6 +221,10 @@ Changes in existing checks ` check by replacing the local option `HeaderFileExtensions` by the global option of the same name. +- Improved :doc:`misc-const-correctness + ` check by avoiding infinite recursion + for recursive forwarding reference. + - Improved :doc:`misc-definitions-in-headers ` check by replacing the local option `HeaderFileExtensions` by the global option of the same name. diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-templates.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-templates.cpp index 9da468128743e9..248374a71dd40b 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-templates.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-templates.cpp @@ -58,3 +58,18 @@ void concatenate3(Args... args) (..., (stream << args)); } } // namespace gh70323 + +namespace gh60895 { + +template void f1(T &&a); +template void f2(T &&a); +template void f1(T &&a) { f2(a); } +template void f2(T &&a) { f1(a); } +void f() { + int x = 0; + // CHECK-MESSAGES:[[@LINE-1]]:3: warning: variable 'x' of type 'int' can be declared 'const' + // CHECK-FIXES: int const x = 0; + f1(x); +} + +} // namespace gh60895 diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 05c8f765b55695..3bead159c8f946 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -3466,6 +3466,54 @@ Query for this feature with ``__has_builtin(__builtin_trap)``. ``__builtin_arm_trap`` is lowered to the ``llvm.aarch64.break`` builtin, and then to ``brk #payload``. +``__builtin_allow_runtime_check`` +--------------------------------- + +``__builtin_allow_runtime_check`` return true if the check at the current +program location should be executed. It is expected to be used to implement +``assert`` like checks which can be safely removed by optimizer. + +**Syntax**: + +.. code-block:: c++ + + bool __builtin_allow_runtime_check(const char* kind) + +**Example of use**: + +.. code-block:: c++ + + if (__builtin_allow_runtime_check("mycheck") && !ExpensiveCheck()) { + abort(); + } + +**Description** + +``__builtin_allow_runtime_check`` is lowered to ` ``llvm.allow.runtime.check`` +`_ +builtin. + +The ``__builtin_allow_runtime_check()`` is expected to be used with control +flow conditions such as in ``if`` to guard expensive runtime checks. The +specific rules for selecting permitted checks can differ and are controlled by +the compiler options. + +Flags to control checks: +* ``-mllvm -lower-allow-check-percentile-cutoff-hot=N`` where N is PGO hotness +cutoff in range ``[0, 999999]`` to disallow checks in hot code. +* ``-mllvm -lower-allow-check-random-rate=P`` where P is number in range +``[0.0, 1.0]`` representation probability of keeping a check. +* If both flags are specified, ``-lower-allow-check-random-rate`` takes +precedence. +* If none is specified, ``__builtin_allow_runtime_check`` is lowered as +``true``, allowing all checks. + +Parameter ``kind`` is a string literal representing a user selected kind for +guarded check. It's unused now. It will enable kind-specific lowering in future. +E.g. a higher hotness cutoff can be used for more expensive kind of check. + +Query for this feature with ``__has_builtin(__builtin_allow_runtime_check)``. + ``__builtin_nondeterministic_value`` ------------------------------------ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 76701dc723b6c3..6099f8ab02f443 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -68,7 +68,7 @@ AST Dumping Potentially Breaking Changes Clang Frontend Potentially Breaking Changes ------------------------------------------- -- Removed support for constructing on-stack ``TemplateArgumentList``s; interfaces should instead +- Removed support for constructing on-stack ``TemplateArgumentList``\ s; interfaces should instead use ``ArrayRef`` to pass template arguments. Transitioning internal uses to ``ArrayRef`` reduces AST memory usage by 0.4% when compiling clang, and is expected to show similar improvements on other workloads. @@ -104,8 +104,7 @@ C++20 Feature Support - Clang now implements [module.import]p7 fully. Clang now will import module units transitively for the module units coming from the same module of the - current module units. - Fixes `#84002 `_. + current module units. Fixes #GH84002 - Initial support for class template argument deduction (CTAD) for type alias templates (`P1814R0 `_). @@ -135,8 +134,7 @@ C++2c Feature Support Resolutions to C++ Defect Reports ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Substitute template parameter pack, when it is not explicitly specified - in the template parameters, but is deduced from a previous argument. - (`#78449: `_). + in the template parameters, but is deduced from a previous argument. (#GH78449) - Type qualifications are now ignored when evaluating layout compatibility of two types. @@ -176,8 +174,7 @@ C23 Feature Support - Clang now generates predefined macros of the form ``__TYPE_FMTB__`` and ``__TYPE_FMTb__`` (e.g., ``__UINT_FAST64_FMTB__``) in C23 mode for use with - macros typically exposed from ````, such as ``PRIb8``. - (`#81896: `_). + macros typically exposed from ````, such as ``PRIb8``. (#GH81896) - Clang now supports `N3018 The constexpr specifier for object definitions` `_. @@ -215,7 +212,10 @@ New Compiler Flags - ``-Wmissing-designated-field-initializers``, grouped under ``-Wmissing-field-initializers``. This diagnostic can be disabled to make ``-Wmissing-field-initializers`` behave - like it did before Clang 18.x. Fixes (`#56628 `_) + like it did before Clang 18.x. Fixes #GH56628 + +- ``-fexperimental-modules-reduced-bmi`` enables the Reduced BMI for C++20 named modules. + See the document of standard C++ modules for details. Deprecated Compiler Flags ------------------------- @@ -254,8 +254,7 @@ Removed Compiler Flags - The ``-freroll-loops`` flag has been removed. It had no effect since Clang 13. - ``-m[no-]unaligned-access`` is removed for RISC-V and LoongArch. - ``-m[no-]strict-align``, also supported by GCC, should be used instead. - (`#85350 `_.) + ``-m[no-]strict-align``, also supported by GCC, should be used instead. (#GH85350) Attribute Changes in Clang -------------------------- @@ -325,8 +324,7 @@ Improvements to Clang's diagnostics Fixes #GH82512. - Clang now provides improved warnings for the ``cleanup`` attribute to detect misuse scenarios, - such as attempting to call ``free`` on an unallocated object. Fixes - `#79443 `_. + such as attempting to call ``free`` on an unallocated object. Fixes #GH79443. - Clang no longer warns when the ``bitand`` operator is used with boolean operands, distinguishing it from potential typographical errors or unintended @@ -372,11 +370,10 @@ Improvements to Clang's time-trace Bug Fixes in This Version ------------------------- - Clang's ``-Wundefined-func-template`` no longer warns on pure virtual - functions. - (`#74016 `_) + functions. (#GH74016) - Fixed missing warnings when comparing mismatched enumeration constants - in C (`#29217 `). + in C (#GH29217) - Clang now accepts elaborated-type-specifiers that explicitly specialize a member class template for an implicit instantiation of a class template. @@ -415,7 +412,7 @@ Bug Fixes in This Version type only rather than to the complex type (e.g. ``_Complex float / int`` is now evaluated as ``_Complex float / float`` rather than ``_Complex float / _Complex float``), as mandated by the C standard. This significantly improves codegen of `*` and `/` especially. - Fixes (`#31205 `_). + Fixes #GH31205. - Fixes an assertion failure on invalid code when trying to define member functions in lambdas. @@ -423,6 +420,8 @@ Bug Fixes in This Version - Fixed a regression in CTAD that a friend declaration that befriends itself may cause incorrect constraint substitution. (#GH86769). +- Fixed an assertion failure on invalid InitListExpr in C89 mode (#GH88008). + Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -464,8 +463,7 @@ Bug Fixes to C++ Support - Fix a crash when trying to call a varargs function that also has an explicit object parameter. (#GH80971) - Fixed a bug where abbreviated function templates would append their invented template parameters to an empty template parameter lists. -- Fix parsing of abominable function types inside type traits. - Fixes (`#77585 `_) +- Fix parsing of abominable function types inside type traits. Fixes #GH77585 - Clang now classifies aggregate initialization in C++17 and newer as constant or non-constant more accurately. Previously, only a subset of the initializer elements were considered, misclassifying some initializers as constant. Partially fixes @@ -506,9 +504,7 @@ Bug Fixes to C++ Support - Fix a bug where overload resolution falsely reported an ambiguity when it was comparing a member-function against a non member function or a member-function with an explicit object parameter against a member function with no explicit object parameter - when one of the function had more specialized templates. - Fixes (`#82509 `_) - and (`#74494 `_) + when one of the function had more specialized templates. Fixes #GH82509 and #GH74494 - Clang now supports direct lambda calls inside of a type alias template declarations. This addresses (#GH70601), (#GH76674), (#GH79555), (#GH81145) and (#GH82104). - Allow access to a public template alias declaration that refers to friend's @@ -530,8 +526,7 @@ Bug Fixes to C++ Support - Fixed a bug that prevented member function templates of class templates declared with a deduced return type from being explicitly specialized for a given implicit instantiation of the class template. -- Fix crash when inheriting from a cv-qualified type. Fixes: - (`#35603 `_) +- Fix crash when inheriting from a cv-qualified type. Fixes #GH35603 - Fix a crash when the using enum declaration uses an anonymous enumeration. Fixes (#GH86790). - Handled an edge case in ``getFullyPackExpandedSize`` so that we now avoid a false-positive diagnostic. (#GH84220) - Clang now correctly tracks type dependence of by-value captures in lambdas with an explicit @@ -539,6 +534,7 @@ Bug Fixes to C++ Support Fixes (#GH70604), (#GH79754), (#GH84163), (#GH84425), (#GH86054), (#GH86398), and (#GH86399). - Fix a crash when deducing ``auto`` from an invalid dereference (#GH88329). - Fix a crash in requires expression with templated base class member function. Fixes (#GH84020). +- Placement new initializes typedef array with correct size (#GH41441) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -552,8 +548,7 @@ Miscellaneous Clang Crashes Fixed ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Do not attempt to dump the layout of dependent types or invalid declarations - when ``-fdump-record-layouts-complete`` is passed. - Fixes (`#83684 `_). + when ``-fdump-record-layouts-complete`` is passed. Fixes #GH83684. OpenACC Specific Changes ------------------------ @@ -603,8 +598,7 @@ Windows Support would only be included if AVX was enabled at compile time. This was done to work around include times from MSVC STL including ``intrin.h`` under clang-cl. Clang-cl now provides ``intrin0.h`` for MSVC STL and therefore all intrinsic - features without requiring enablement at compile time. - Fixes: (`#53520 `_) + features without requiring enablement at compile time. Fixes #GH53520 - Improved compile times with MSVC STL. MSVC provides ``intrin0.h`` which is a header that only includes intrinsics that are used by MSVC STL to avoid the @@ -685,6 +679,8 @@ Static Analyzer but not under any case blocks if ``unroll-loops=true`` analyzer config is set. (#GH68819) - Support C++23 static operator calls. (#GH84972) +- Fixed a crash in ``security.cert.env.InvalidPtr`` checker when accidentally + matched user-defined ``strerror`` and similar library functions. (GH#88181) New features ^^^^^^^^^^^^ diff --git a/clang/docs/StandardCPlusPlusModules.rst b/clang/docs/StandardCPlusPlusModules.rst index c5478bba45f389..8d5529d5d37db5 100644 --- a/clang/docs/StandardCPlusPlusModules.rst +++ b/clang/docs/StandardCPlusPlusModules.rst @@ -520,6 +520,112 @@ is attached to the global module fragments. For example: Now the linkage name of ``NS::foo()`` will be ``_ZN2NS3fooEv``. +Reduced BMI +----------- + +To support the 2 phase compilation model, Clang chose to put everything needed to +produce an object into the BMI. But every consumer of the BMI, except itself, doesn't +need such informations. It makes the BMI to larger and so may introduce unnecessary +dependencies into the BMI. To mitigate the problem, we decided to reduce the information +contained in the BMI. + +To be clear, we call the default BMI as Full BMI and the new introduced BMI as Reduced +BMI. + +Users can use ``-fexperimental-modules-reduced-bmi`` flag to enable the Reduced BMI. + +For one phase compilation model (CMake implements this model), with +``-fexperimental-modules-reduced-bmi``, the generated BMI will be Reduced BMI automatically. +(The output path of the BMI is specified by ``-fmodule-output=`` as usual one phase +compilation model). + +It is still possible to support Reduced BMI in two phase compilation model. With +``-fexperimental-modules-reduced-bmi``, ``--precompile`` and ``-fmodule-output=`` specified, +the generated BMI specified by ``-o`` will be full BMI and the BMI specified by +``-fmodule-output=`` will be Reduced BMI. The dependency graph may be: + +.. code-block:: none + + module-unit.cppm --> module-unit.full.pcm -> module-unit.o + | + -> module-unit.reduced.pcm -> consumer1.cpp + -> consumer2.cpp + -> ... + -> consumer_n.cpp + +We don't emit diagnostics if ``-fexperimental-modules-reduced-bmi`` is used with a non-module +unit. This design helps the end users of one phase compilation model to perform experiments +early without asking for the help of build systems. The users of build systems which supports +two phase compilation model still need helps from build systems. + +Within Reduced BMI, we won't write unreachable entities from GMF, definitions of non-inline +functions and non-inline variables. This may not be a transparent change. +`[module.global.frag]ex2 `_ may be a good +example: + +.. code-block:: c++ + + // foo.h + namespace N { + struct X {}; + int d(); + int e(); + inline int f(X, int = d()) { return e(); } + int g(X); + int h(X); + } + + // M.cppm + module; + #include "foo.h" + export module M; + template int use_f() { + N::X x; // N::X, N, and :: are decl-reachable from use_f + return f(x, 123); // N::f is decl-reachable from use_f, + // N::e is indirectly decl-reachable from use_f + // because it is decl-reachable from N::f, and + // N::d is decl-reachable from use_f + // because it is decl-reachable from N::f + // even though it is not used in this call + } + template int use_g() { + N::X x; // N::X, N, and :: are decl-reachable from use_g + return g((T(), x)); // N::g is not decl-reachable from use_g + } + template int use_h() { + N::X x; // N::X, N, and :: are decl-reachable from use_h + return h((T(), x)); // N::h is not decl-reachable from use_h, but + // N::h is decl-reachable from use_h + } + int k = use_h(); + // use_h is decl-reachable from k, so + // N::h is decl-reachable from k + + // M-impl.cpp + module M; + int a = use_f(); // OK + int b = use_g(); // error: no viable function for call to g; + // g is not decl-reachable from purview of + // module M's interface, so is discarded + int c = use_h(); // OK + +In the above example, the function definition of ``N::g`` is elided from the Reduced +BMI of ``M.cppm``. Then the use of ``use_g`` in ``M-impl.cpp`` fails +to instantiate. For such issues, users can add references to ``N::g`` in the module purview +of ``M.cppm`` to make sure it is reachable, e.g., ``using N::g;``. + +We think the Reduced BMI is the correct direction. But given it is a drastic change, +we'd like to make it experimental first to avoid breaking existing users. The roadmap +of Reduced BMI may be: + +1. ``-fexperimental-modules-reduced-bmi`` is opt in for 1~2 releases. The period depends +on testing feedbacks. +2. We would announce Reduced BMI is not experimental and introduce ``-fmodules-reduced-bmi``. +and suggest users to enable this mode. This may takes 1~2 releases too. +3. Finally we will enable this by default. When that time comes, the term BMI will refer to +the reduced BMI today and the Full BMI will only be meaningful to build systems which +loves to support two phase compilations. + Performance Tips ---------------- diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt index 3089438c23d94e..2252d0ccde96d2 100644 --- a/clang/docs/tools/clang-formatted-files.txt +++ b/clang/docs/tools/clang-formatted-files.txt @@ -123,6 +123,7 @@ clang/include/clang/Analysis/Analyses/CalledOnceCheck.h clang/include/clang/Analysis/Analyses/CFGReachabilityAnalysis.h clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h clang/include/clang/Analysis/FlowSensitive/AdornedCFG.h +clang/include/clang/Analysis/FlowSensitive/ASTOps.h clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -307,6 +308,7 @@ clang/lib/Analysis/CalledOnceCheck.cpp clang/lib/Analysis/CloneDetection.cpp clang/lib/Analysis/CodeInjector.cpp clang/lib/Analysis/FlowSensitive/AdornedCFG.cpp +clang/lib/Analysis/FlowSensitive/ASTOps.cpp clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp clang/lib/Analysis/FlowSensitive/DebugSupport.cpp diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h index 401b8e904a1b7a..07587849eb1219 100644 --- a/clang/include/clang/AST/OpenACCClause.h +++ b/clang/include/clang/AST/OpenACCClause.h @@ -145,6 +145,17 @@ class OpenACCIfClause : public OpenACCClauseWithCondition { SourceLocation EndLoc); }; +/// A 'self' clause, which has an optional condition expression. +class OpenACCSelfClause : public OpenACCClauseWithCondition { + OpenACCSelfClause(SourceLocation BeginLoc, SourceLocation LParenLoc, + Expr *ConditionExpr, SourceLocation EndLoc); + +public: + static OpenACCSelfClause *Create(const ASTContext &C, SourceLocation BeginLoc, + SourceLocation LParenLoc, + Expr *ConditionExpr, SourceLocation EndLoc); +}; + template class OpenACCClauseVisitor { Impl &getDerived() { return static_cast(*this); } @@ -159,53 +170,13 @@ template class OpenACCClauseVisitor { return; switch (C->getClauseKind()) { - case OpenACCClauseKind::Default: - VisitDefaultClause(*cast(C)); - return; - case OpenACCClauseKind::If: - VisitIfClause(*cast(C)); - return; - case OpenACCClauseKind::Finalize: - case OpenACCClauseKind::IfPresent: - case OpenACCClauseKind::Seq: - case OpenACCClauseKind::Independent: - case OpenACCClauseKind::Auto: - case OpenACCClauseKind::Worker: - case OpenACCClauseKind::Vector: - case OpenACCClauseKind::NoHost: - case OpenACCClauseKind::Self: - case OpenACCClauseKind::Copy: - case OpenACCClauseKind::UseDevice: - case OpenACCClauseKind::Attach: - case OpenACCClauseKind::Delete: - case OpenACCClauseKind::Detach: - case OpenACCClauseKind::Device: - case OpenACCClauseKind::DevicePtr: - case OpenACCClauseKind::DeviceResident: - case OpenACCClauseKind::FirstPrivate: - case OpenACCClauseKind::Host: - case OpenACCClauseKind::Link: - case OpenACCClauseKind::NoCreate: - case OpenACCClauseKind::Present: - case OpenACCClauseKind::Private: - case OpenACCClauseKind::CopyOut: - case OpenACCClauseKind::CopyIn: - case OpenACCClauseKind::Create: - case OpenACCClauseKind::Reduction: - case OpenACCClauseKind::Collapse: - case OpenACCClauseKind::Bind: - case OpenACCClauseKind::VectorLength: - case OpenACCClauseKind::NumGangs: - case OpenACCClauseKind::NumWorkers: - case OpenACCClauseKind::DeviceNum: - case OpenACCClauseKind::DefaultAsync: - case OpenACCClauseKind::DeviceType: - case OpenACCClauseKind::DType: - case OpenACCClauseKind::Async: - case OpenACCClauseKind::Tile: - case OpenACCClauseKind::Gang: - case OpenACCClauseKind::Wait: - case OpenACCClauseKind::Invalid: +#define VISIT_CLAUSE(CLAUSE_NAME) \ + case OpenACCClauseKind::CLAUSE_NAME: \ + Visit##CLAUSE_NAME##Clause(*cast(C)); \ + return; +#include "clang/Basic/OpenACCClauses.def" + + default: llvm_unreachable("Clause visitor not yet implemented"); } llvm_unreachable("Invalid Clause kind"); diff --git a/clang/include/clang/AST/StmtOpenACC.h b/clang/include/clang/AST/StmtOpenACC.h index 419cb6cada0bc7..66f8f844e0b29e 100644 --- a/clang/include/clang/AST/StmtOpenACC.h +++ b/clang/include/clang/AST/StmtOpenACC.h @@ -142,9 +142,7 @@ class OpenACCComputeConstruct final Stmt *StructuredBlock) : OpenACCAssociatedStmtConstruct(OpenACCComputeConstructClass, K, Start, End, StructuredBlock) { - assert((K == OpenACCDirectiveKind::Parallel || - K == OpenACCDirectiveKind::Serial || - K == OpenACCDirectiveKind::Kernels) && + assert(isOpenACCComputeDirectiveKind(K) && "Only parallel, serial, and kernels constructs should be " "represented by this type"); diff --git a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h index 1ceef944fbc34e..117173ba9a0958 100644 --- a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h +++ b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h @@ -8,11 +8,9 @@ #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_EXPRMUTATIONANALYZER_H #define LLVM_CLANG_ANALYSIS_ANALYSES_EXPRMUTATIONANALYZER_H -#include - -#include "clang/AST/AST.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "llvm/ADT/DenseMap.h" +#include namespace clang { @@ -21,14 +19,74 @@ class FunctionParmMutationAnalyzer; /// Analyzes whether any mutative operations are applied to an expression within /// a given statement. class ExprMutationAnalyzer { + friend class FunctionParmMutationAnalyzer; + public: + struct Memoized { + using ResultMap = llvm::DenseMap; + using FunctionParaAnalyzerMap = + llvm::SmallDenseMap>; + + ResultMap Results; + ResultMap PointeeResults; + FunctionParaAnalyzerMap FuncParmAnalyzer; + + void clear() { + Results.clear(); + PointeeResults.clear(); + FuncParmAnalyzer.clear(); + } + }; + struct Analyzer { + Analyzer(const Stmt &Stm, ASTContext &Context, Memoized &Memorized) + : Stm(Stm), Context(Context), Memorized(Memorized) {} + + const Stmt *findMutation(const Expr *Exp); + const Stmt *findMutation(const Decl *Dec); + + const Stmt *findPointeeMutation(const Expr *Exp); + const Stmt *findPointeeMutation(const Decl *Dec); + static bool isUnevaluated(const Stmt *Smt, const Stmt &Stm, + ASTContext &Context); + + private: + using MutationFinder = const Stmt *(Analyzer::*)(const Expr *); + + const Stmt *findMutationMemoized(const Expr *Exp, + llvm::ArrayRef Finders, + Memoized::ResultMap &MemoizedResults); + const Stmt *tryEachDeclRef(const Decl *Dec, MutationFinder Finder); + + bool isUnevaluated(const Expr *Exp); + + const Stmt *findExprMutation(ArrayRef Matches); + const Stmt *findDeclMutation(ArrayRef Matches); + const Stmt * + findExprPointeeMutation(ArrayRef Matches); + const Stmt * + findDeclPointeeMutation(ArrayRef Matches); + + const Stmt *findDirectMutation(const Expr *Exp); + const Stmt *findMemberMutation(const Expr *Exp); + const Stmt *findArrayElementMutation(const Expr *Exp); + const Stmt *findCastMutation(const Expr *Exp); + const Stmt *findRangeLoopMutation(const Expr *Exp); + const Stmt *findReferenceMutation(const Expr *Exp); + const Stmt *findFunctionArgMutation(const Expr *Exp); + + const Stmt &Stm; + ASTContext &Context; + Memoized &Memorized; + }; + ExprMutationAnalyzer(const Stmt &Stm, ASTContext &Context) - : Stm(Stm), Context(Context) {} + : Memorized(), A(Stm, Context, Memorized) {} bool isMutated(const Expr *Exp) { return findMutation(Exp) != nullptr; } bool isMutated(const Decl *Dec) { return findMutation(Dec) != nullptr; } - const Stmt *findMutation(const Expr *Exp); - const Stmt *findMutation(const Decl *Dec); + const Stmt *findMutation(const Expr *Exp) { return A.findMutation(Exp); } + const Stmt *findMutation(const Decl *Dec) { return A.findMutation(Dec); } bool isPointeeMutated(const Expr *Exp) { return findPointeeMutation(Exp) != nullptr; @@ -36,51 +94,40 @@ class ExprMutationAnalyzer { bool isPointeeMutated(const Decl *Dec) { return findPointeeMutation(Dec) != nullptr; } - const Stmt *findPointeeMutation(const Expr *Exp); - const Stmt *findPointeeMutation(const Decl *Dec); + const Stmt *findPointeeMutation(const Expr *Exp) { + return A.findPointeeMutation(Exp); + } + const Stmt *findPointeeMutation(const Decl *Dec) { + return A.findPointeeMutation(Dec); + } + static bool isUnevaluated(const Stmt *Smt, const Stmt &Stm, - ASTContext &Context); + ASTContext &Context) { + return Analyzer::isUnevaluated(Smt, Stm, Context); + } private: - using MutationFinder = const Stmt *(ExprMutationAnalyzer::*)(const Expr *); - using ResultMap = llvm::DenseMap; - - const Stmt *findMutationMemoized(const Expr *Exp, - llvm::ArrayRef Finders, - ResultMap &MemoizedResults); - const Stmt *tryEachDeclRef(const Decl *Dec, MutationFinder Finder); - - bool isUnevaluated(const Expr *Exp); - - const Stmt *findExprMutation(ArrayRef Matches); - const Stmt *findDeclMutation(ArrayRef Matches); - const Stmt * - findExprPointeeMutation(ArrayRef Matches); - const Stmt * - findDeclPointeeMutation(ArrayRef Matches); - - const Stmt *findDirectMutation(const Expr *Exp); - const Stmt *findMemberMutation(const Expr *Exp); - const Stmt *findArrayElementMutation(const Expr *Exp); - const Stmt *findCastMutation(const Expr *Exp); - const Stmt *findRangeLoopMutation(const Expr *Exp); - const Stmt *findReferenceMutation(const Expr *Exp); - const Stmt *findFunctionArgMutation(const Expr *Exp); - - const Stmt &Stm; - ASTContext &Context; - llvm::DenseMap> - FuncParmAnalyzer; - ResultMap Results; - ResultMap PointeeResults; + Memoized Memorized; + Analyzer A; }; // A convenient wrapper around ExprMutationAnalyzer for analyzing function // params. class FunctionParmMutationAnalyzer { public: - FunctionParmMutationAnalyzer(const FunctionDecl &Func, ASTContext &Context); + static FunctionParmMutationAnalyzer * + getFunctionParmMutationAnalyzer(const FunctionDecl &Func, ASTContext &Context, + ExprMutationAnalyzer::Memoized &Memorized) { + auto it = Memorized.FuncParmAnalyzer.find(&Func); + if (it == Memorized.FuncParmAnalyzer.end()) + it = + Memorized.FuncParmAnalyzer + .try_emplace(&Func, std::unique_ptr( + new FunctionParmMutationAnalyzer( + Func, Context, Memorized))) + .first; + return it->getSecond().get(); + } bool isMutated(const ParmVarDecl *Parm) { return findMutation(Parm) != nullptr; @@ -88,8 +135,11 @@ class FunctionParmMutationAnalyzer { const Stmt *findMutation(const ParmVarDecl *Parm); private: - ExprMutationAnalyzer BodyAnalyzer; + ExprMutationAnalyzer::Analyzer BodyAnalyzer; llvm::DenseMap Results; + + FunctionParmMutationAnalyzer(const FunctionDecl &Func, ASTContext &Context, + ExprMutationAnalyzer::Memoized &Memorized); }; } // namespace clang diff --git a/clang/include/clang/Analysis/FlowSensitive/ASTOps.h b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h new file mode 100644 index 00000000000000..27ad32c1694f77 --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h @@ -0,0 +1,98 @@ +//===-- ASTOps.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Operations on AST nodes that are used in flow-sensitive analysis. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_ASTOPS_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_ASTOPS_H + +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/FlowSensitive/StorageLocation.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" + +namespace clang { +namespace dataflow { + +/// Skip past nodes that the CFG does not emit. These nodes are invisible to +/// flow-sensitive analysis, and should be ignored as they will effectively not +/// exist. +/// +/// * `ParenExpr` - The CFG takes the operator precedence into account, but +/// otherwise omits the node afterwards. +/// +/// * `ExprWithCleanups` - The CFG will generate the appropriate calls to +/// destructors and then omit the node. +/// +const Expr &ignoreCFGOmittedNodes(const Expr &E); +const Stmt &ignoreCFGOmittedNodes(const Stmt &S); + +/// A set of `FieldDecl *`. Use `SmallSetVector` to guarantee deterministic +/// iteration order. +using FieldSet = llvm::SmallSetVector; + +/// Returns the set of all fields in the type. +FieldSet getObjectFields(QualType Type); + +/// Returns whether `Fields` and `FieldLocs` contain the same fields. +bool containsSameFields(const FieldSet &Fields, + const RecordStorageLocation::FieldToLoc &FieldLocs); + +/// Helper class for initialization of a record with an `InitListExpr`. +/// `InitListExpr::inits()` contains the initializers for both the base classes +/// and the fields of the record; this helper class separates these out into two +/// different lists. In addition, it deals with special cases associated with +/// unions. +class RecordInitListHelper { +public: + // `InitList` must have record type. + RecordInitListHelper(const InitListExpr *InitList); + + // Base classes with their associated initializer expressions. + ArrayRef> base_inits() const { + return BaseInits; + } + + // Fields with their associated initializer expressions. + ArrayRef> field_inits() const { + return FieldInits; + } + +private: + SmallVector> BaseInits; + SmallVector> FieldInits; + + // We potentially synthesize an `ImplicitValueInitExpr` for unions. It's a + // member variable because we store a pointer to it in `FieldInits`. + std::optional ImplicitValueInitForUnion; +}; + +/// A collection of several types of declarations, all referenced from the same +/// function. +struct ReferencedDecls { + /// Non-static member variables. + FieldSet Fields; + /// All variables with static storage duration, notably including static + /// member variables and static variables declared within a function. + llvm::DenseSet Globals; + /// Free functions and member functions which are referenced (but not + /// necessarily called). + llvm::DenseSet Functions; +}; + +/// Returns declarations that are declared in or referenced from `FD`. +ReferencedDecls getReferencedDecls(const FunctionDecl &FD); + +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_ASTOPS_H diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h index 909a91059438ca..aa2c366cb164a9 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h @@ -18,6 +18,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/TypeOrdering.h" +#include "clang/Analysis/FlowSensitive/ASTOps.h" #include "clang/Analysis/FlowSensitive/AdornedCFG.h" #include "clang/Analysis/FlowSensitive/Arena.h" #include "clang/Analysis/FlowSensitive/Solver.h" @@ -30,38 +31,11 @@ #include #include #include -#include -#include -#include namespace clang { namespace dataflow { class Logger; -/// Skip past nodes that the CFG does not emit. These nodes are invisible to -/// flow-sensitive analysis, and should be ignored as they will effectively not -/// exist. -/// -/// * `ParenExpr` - The CFG takes the operator precedence into account, but -/// otherwise omits the node afterwards. -/// -/// * `ExprWithCleanups` - The CFG will generate the appropriate calls to -/// destructors and then omit the node. -/// -const Expr &ignoreCFGOmittedNodes(const Expr &E); -const Stmt &ignoreCFGOmittedNodes(const Stmt &S); - -/// A set of `FieldDecl *`. Use `SmallSetVector` to guarantee deterministic -/// iteration order. -using FieldSet = llvm::SmallSetVector; - -/// Returns the set of all fields in the type. -FieldSet getObjectFields(QualType Type); - -/// Returns whether `Fields` and `FieldLocs` contain the same fields. -bool containsSameFields(const FieldSet &Fields, - const RecordStorageLocation::FieldToLoc &FieldLocs); - struct ContextSensitiveOptions { /// The maximum depth to analyze. A value of zero is equivalent to disabling /// context-sensitive analysis entirely. diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index 706664d7db1c25..4277792219c0af 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -775,42 +775,6 @@ RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE, RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME, const Environment &Env); -/// Returns the fields of a `RecordDecl` that are initialized by an -/// `InitListExpr`, in the order in which they appear in -/// `InitListExpr::inits()`. -/// `Init->getType()` must be a record type. -std::vector -getFieldsForInitListExpr(const InitListExpr *InitList); - -/// Helper class for initialization of a record with an `InitListExpr`. -/// `InitListExpr::inits()` contains the initializers for both the base classes -/// and the fields of the record; this helper class separates these out into two -/// different lists. In addition, it deals with special cases associated with -/// unions. -class RecordInitListHelper { -public: - // `InitList` must have record type. - RecordInitListHelper(const InitListExpr *InitList); - - // Base classes with their associated initializer expressions. - ArrayRef> base_inits() const { - return BaseInits; - } - - // Fields with their associated initializer expressions. - ArrayRef> field_inits() const { - return FieldInits; - } - -private: - SmallVector> BaseInits; - SmallVector> FieldInits; - - // We potentially synthesize an `ImplicitValueInitExpr` for unions. It's a - // member variable because we store a pointer to it in `FieldInits`. - std::optional ImplicitValueInitForUnion; -}; - /// Associates a new `RecordValue` with `Loc` and returns the new value. RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env); diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index d6ceb450bd106b..de721a87b3341d 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1164,6 +1164,12 @@ def Unreachable : Builtin { let Prototype = "void()"; } +def AllowRuntimeCheck : Builtin { + let Spellings = ["__builtin_allow_runtime_check"]; + let Attributes = [NoThrow, Pure, Const]; + let Prototype = "bool(char const*)"; +} + def ShuffleVector : Builtin { let Spellings = ["__builtin_shufflevector"]; let Attributes = [NoThrow, Const, CustomTypeChecking]; diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 38f30543a0f662..ba0e4465a0f5a0 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -50,17 +50,15 @@ const char *CudaVersionToString(CudaVersion V); // Input is "Major.Minor" CudaVersion CudaStringToVersion(const llvm::Twine &S); -// We have a name conflict with sys/mac.h on AIX -#ifdef SM_32 -#undef SM_32 -#endif enum class CudaArch { UNUSED, UNKNOWN, + // TODO: Deprecate and remove GPU architectures older than sm_52. SM_20, SM_21, SM_30, - SM_32, + // This has a name conflict with sys/mac.h on AIX, rename it as a workaround. + SM_32_, SM_35, SM_37, SM_50, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 5ec0218aedfe86..44f802c0c28e84 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12274,4 +12274,8 @@ def note_acc_branch_into_compute_construct : Note<"invalid branch into OpenACC Compute Construct">; def note_acc_branch_out_of_compute_construct : Note<"invalid branch out of OpenACC Compute Construct">; +def warn_acc_if_self_conflict + : Warning<"OpenACC construct 'self' has no effect when an 'if' clause " + "evaluates to true">, + InGroup>; } // end of sema component. diff --git a/clang/include/clang/Basic/OpenACCClauses.def b/clang/include/clang/Basic/OpenACCClauses.def index 7fd2720e02ce22..378495d2c0909a 100644 --- a/clang/include/clang/Basic/OpenACCClauses.def +++ b/clang/include/clang/Basic/OpenACCClauses.def @@ -17,5 +17,6 @@ VISIT_CLAUSE(Default) VISIT_CLAUSE(If) +VISIT_CLAUSE(Self) #undef VISIT_CLAUSE diff --git a/clang/include/clang/Basic/OpenACCKinds.h b/clang/include/clang/Basic/OpenACCKinds.h index 3414df99991701..e3f74178433285 100644 --- a/clang/include/clang/Basic/OpenACCKinds.h +++ b/clang/include/clang/Basic/OpenACCKinds.h @@ -146,6 +146,12 @@ inline llvm::raw_ostream &operator<<(llvm::raw_ostream &Out, return printOpenACCDirectiveKind(Out, K); } +inline bool isOpenACCComputeDirectiveKind(OpenACCDirectiveKind K) { + return K == OpenACCDirectiveKind::Parallel || + K == OpenACCDirectiveKind::Serial || + K == OpenACCDirectiveKind::Kernels; +} + enum class OpenACCAtomicKind { Read, Write, diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td index cb2a09303e8e12..d36b4617bef5d2 100644 --- a/clang/include/clang/Basic/arm_fp16.td +++ b/clang/include/clang/Basic/arm_fp16.td @@ -14,7 +14,7 @@ include "arm_neon_incl.td" // ARMv8.2-A FP16 intrinsics. -let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "fullfp16" in { // Negate def VNEGSH : SInst<"vneg", "11", "Sh">; diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 7edac5afafaa99..6d655c39360d3b 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -605,11 +605,11 @@ def VQDMULL_LANE : SOpInst<"vqdmull_lane", "(>Q)..I", "si", OP_QDMULL_LN>; def VQDMULH_N : SOpInst<"vqdmulh_n", "..1", "siQsQi", OP_QDMULH_N>; def VQRDMULH_N : SOpInst<"vqrdmulh_n", "..1", "siQsQi", OP_QRDMULH_N>; -let ArchGuard = "!defined(__aarch64__)" in { +let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)" in { def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "..qI", "siQsQi", OP_QDMULH_LN>; def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>; } -let ArchGuard = "defined(__aarch64__)" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in { def A64_VQDMULH_LANE : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi">; def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi">; } @@ -686,7 +686,7 @@ multiclass REINTERPRET_CROSS_TYPES { // E.3.31 Vector reinterpret cast operations def VREINTERPRET : REINTERPRET_CROSS_SELF<"csilUcUsUiUlhfPcPsQcQsQiQlQUcQUsQUiQUlQhQfQPcQPs"> { - let ArchGuard = "!defined(__aarch64__)"; + let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)"; let BigEndianSafe = 1; } @@ -714,7 +714,7 @@ def VADDP : WInst<"vadd", "...", "PcPsPlQPcQPsQPl">; //////////////////////////////////////////////////////////////////////////////// // AArch64 Intrinsics -let ArchGuard = "defined(__aarch64__)" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in { //////////////////////////////////////////////////////////////////////////////// // Load/Store @@ -1091,14 +1091,14 @@ let isLaneQ = 1 in { def VQDMULH_LANEQ : SInst<"vqdmulh_laneq", "..QI", "siQsQi">; def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi">; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a" in { def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN> { let isLaneQ = 1; } def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN> { let isLaneQ = 1; } -} // ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" +} // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a" // Note: d type implemented by SCALAR_VMULX_LANE def VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "fQfQd", OP_MULX_LN>; @@ -1143,7 +1143,7 @@ def SHA256H2 : SInst<"vsha256h2", "....", "QUi">; def SHA256SU1 : SInst<"vsha256su1", "....", "QUi">; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "sha3" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sha3" in { def BCAX : SInst<"vbcax", "....", "QUcQUsQUiQUlQcQsQiQl">; def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQcQsQiQl">; def RAX1 : SInst<"vrax1", "...", "QUl">; @@ -1153,14 +1153,14 @@ def XAR : SInst<"vxar", "...I", "QUl">; } } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "sha3" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sha3" in { def SHA512SU0 : SInst<"vsha512su0", "...", "QUl">; def SHA512su1 : SInst<"vsha512su1", "....", "QUl">; def SHA512H : SInst<"vsha512h", "....", "QUl">; def SHA512H2 : SInst<"vsha512h2", "....", "QUl">; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "sm4" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sm4" in { def SM3SS1 : SInst<"vsm3ss1", "....", "QUi">; def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi">; def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi">; @@ -1170,7 +1170,7 @@ def SM3PARTW1 : SInst<"vsm3partw1", "....", "QUi">; def SM3PARTW2 : SInst<"vsm3partw2", "....", "QUi">; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "sm4" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sm4" in { def SM4E : SInst<"vsm4e", "...", "QUi">; def SM4EKEY : SInst<"vsm4ekey", "...", "QUi">; } @@ -1193,7 +1193,7 @@ def FCVTAS_S32 : SInst<"vcvta_s32", "S.", "fQf">; def FCVTAU_S32 : SInst<"vcvta_u32", "U.", "fQf">; } -let ArchGuard = "defined(__aarch64__)" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in { def FCVTNS_S64 : SInst<"vcvtn_s64", "S.", "dQd">; def FCVTNU_S64 : SInst<"vcvtn_u64", "U.", "dQd">; def FCVTPS_S64 : SInst<"vcvtp_s64", "S.", "dQd">; @@ -1217,7 +1217,7 @@ def FRINTZ_S32 : SInst<"vrnd", "..", "fQf">; def FRINTI_S32 : SInst<"vrndi", "..", "fQf">; } -let ArchGuard = "defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { +let ArchGuard = "(defined(__aarch64__) || defined(__arm64ec__)) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { def FRINTN_S64 : SInst<"vrndn", "..", "dQd">; def FRINTA_S64 : SInst<"vrnda", "..", "dQd">; def FRINTP_S64 : SInst<"vrndp", "..", "dQd">; @@ -1227,7 +1227,7 @@ def FRINTZ_S64 : SInst<"vrnd", "..", "dQd">; def FRINTI_S64 : SInst<"vrndi", "..", "dQd">; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.5a" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.5a" in { def FRINT32X_S32 : SInst<"vrnd32x", "..", "fQf">; def FRINT32Z_S32 : SInst<"vrnd32z", "..", "fQf">; def FRINT64X_S32 : SInst<"vrnd64x", "..", "fQf">; @@ -1247,7 +1247,7 @@ def FMAXNM_S32 : SInst<"vmaxnm", "...", "fQf">; def FMINNM_S32 : SInst<"vminnm", "...", "fQf">; } -let ArchGuard = "defined(__aarch64__) && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in { +let ArchGuard = "(defined(__aarch64__) || defined(__arm64ec__)) && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in { def FMAXNM_S64 : SInst<"vmaxnm", "...", "dQd">; def FMINNM_S64 : SInst<"vminnm", "...", "dQd">; } @@ -1289,7 +1289,7 @@ def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPc">; // itself during generation so, unlike all other intrinsics, this one should // include *all* types, not just additional ones. def VVREINTERPRET : REINTERPRET_CROSS_SELF<"csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk"> { - let ArchGuard = "defined(__aarch64__)"; + let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)"; let BigEndianSafe = 1; } @@ -1401,7 +1401,7 @@ def SCALAR_SQDMULH : SInst<"vqdmulh", "111", "SsSi">; // Scalar Integer Saturating Rounding Doubling Multiply Half High def SCALAR_SQRDMULH : SInst<"vqrdmulh", "111", "SsSi">; -let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a" in { //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half def SCALAR_SQRDMLAH : SInst<"vqrdmlah", "1111", "SsSi">; @@ -1409,7 +1409,7 @@ def SCALAR_SQRDMLAH : SInst<"vqrdmlah", "1111", "SsSi">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half def SCALAR_SQRDMLSH : SInst<"vqrdmlsh", "1111", "SsSi">; -} // ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" +} // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a" //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Multiply Extended @@ -1651,7 +1651,7 @@ def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcS let isLaneQ = 1; } -} // ArchGuard = "defined(__aarch64__)" +} // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" // ARMv8.2-A FP16 vector intrinsics for A32/A64. let TargetGuard = "fullfp16" in { @@ -1775,7 +1775,7 @@ def VEXTH : WInst<"vext", "...I", "hQh">; def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>; // ARMv8.2-A FP16 vector intrinsics for A64 only. -let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "fullfp16" in { // Vector rounding def FRINTIH : SInst<"vrndi", "..", "hQh">; @@ -1856,7 +1856,7 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in { def FMINNMVH : SInst<"vminnmv", "1.", "hQh">; } -let ArchGuard = "defined(__aarch64__)" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in { // Permutation def VTRN1H : SOpInst<"vtrn1", "...", "hQh", OP_TRN1>; def VZIP1H : SOpInst<"vzip1", "...", "hQh", OP_ZIP1>; @@ -1876,7 +1876,7 @@ let TargetGuard = "dotprod" in { def DOT : SInst<"vdot", "..(<<)(<<)", "iQiUiQUi">; def DOT_LANE : SOpInst<"vdot_lane", "..(<<)(<; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "dotprod" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "dotprod" in { // Variants indexing into a 128-bit vector are A64 only. def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(< { let isLaneQ = 1; @@ -1884,7 +1884,7 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "dotprod" in { } // v8.2-A FP16 fused multiply-add long instructions. -let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp16fml" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "fp16fml" in { def VFMLAL_LOW : SInst<"vfmlal_low", ">>..", "hQh">; def VFMLSL_LOW : SInst<"vfmlsl_low", ">>..", "hQh">; def VFMLAL_HIGH : SInst<"vfmlal_high", ">>..", "hQh">; @@ -1918,7 +1918,7 @@ let TargetGuard = "i8mm" in { def VUSDOT_LANE : SOpInst<"vusdot_lane", "..(<; def VSUDOT_LANE : SOpInst<"vsudot_lane", "..(<<)(<; - let ArchGuard = "defined(__aarch64__)" in { + let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in { let isLaneQ = 1 in { def VUSDOT_LANEQ : SOpInst<"vusdot_laneq", "..(<; def VSUDOT_LANEQ : SOpInst<"vsudot_laneq", "..(<<)(<; @@ -1986,7 +1986,7 @@ let TargetGuard = "v8.3a" in { defm VCMLA_F32 : VCMLA_ROTS<"f", "uint64x1_t", "uint64x2_t">; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.3a" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.3a" in { def VCADDQ_ROT90_FP64 : SInst<"vcaddq_rot90", "QQQ", "d">; def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">; @@ -2058,14 +2058,14 @@ let TargetGuard = "bf16" in { def SCALAR_CVT_F32_BF16 : SOpInst<"vcvtah_f32", "(1F>)(1!)", "b", OP_CVT_F32_BF16>; } -let ArchGuard = "!defined(__aarch64__)", TargetGuard = "bf16" in { +let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)", TargetGuard = "bf16" in { def VCVT_BF16_F32_A32_INTERNAL : WInst<"__a32_vcvt_bf16", "BQ", "f">; def VCVT_BF16_F32_A32 : SOpInst<"vcvt_bf16", "BQ", "f", OP_VCVT_BF16_F32_A32>; def VCVT_LOW_BF16_F32_A32 : SOpInst<"vcvt_low_bf16", "BQ", "Qf", OP_VCVT_BF16_F32_LO_A32>; def VCVT_HIGH_BF16_F32_A32 : SOpInst<"vcvt_high_bf16", "BBQ", "Qf", OP_VCVT_BF16_F32_HI_A32>; } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "bf16" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "bf16" in { def VCVT_LOW_BF16_F32_A64_INTERNAL : WInst<"__a64_vcvtq_low_bf16", "BQ", "Hf">; def VCVT_LOW_BF16_F32_A64 : SOpInst<"vcvt_low_bf16", "BQ", "Qf", OP_VCVT_BF16_F32_LO_A64>; def VCVT_HIGH_BF16_F32_A64 : SInst<"vcvt_high_bf16", "BBQ", "Qf">; @@ -2077,14 +2077,14 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "bf16" in { def COPYQ_LANEQ_BF16 : IOpInst<"vcopy_laneq", "..I.I", "Qb", OP_COPY_LN>; } -let ArchGuard = "!defined(__aarch64__)", TargetGuard = "bf16" in { +let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)", TargetGuard = "bf16" in { let BigEndianSafe = 1 in { defm VREINTERPRET_BF : REINTERPRET_CROSS_TYPES< "csilUcUsUiUlhfPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQPcQPsQPl", "bQb">; } } -let ArchGuard = "defined(__aarch64__)", TargetGuard = "bf16" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "bf16" in { let BigEndianSafe = 1 in { defm VVREINTERPRET_BF : REINTERPRET_CROSS_TYPES< "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", "bQb">; @@ -2092,7 +2092,7 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "bf16" in { } // v8.9a/v9.4a LRCPC3 intrinsics -let ArchGuard = "defined(__aarch64__)", TargetGuard = "rcpc3" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "rcpc3" in { def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl">; def VSTL1_LANE : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl">; } diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 5950dd74cfe83c..23b268126de4e0 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -18,6 +18,7 @@ #include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/Sema.h" +#include "clang/Sema/SemaOpenMP.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Frontend/OpenMP/OMPContext.h" #include "llvm/Support/SaveAndRestore.h" @@ -2537,7 +2538,7 @@ class Parser : public CodeCompletionHandler { /// Returns true for declaration, false for expression. bool isForInitDeclaration() { if (getLangOpts().OpenMP) - Actions.startOpenMPLoop(); + Actions.OpenMP().startOpenMPLoop(); if (getLangOpts().CPlusPlus) return Tok.is(tok::kw_using) || isCXXSimpleDeclaration(/*AllowForRangeDecl=*/true); @@ -3396,7 +3397,7 @@ class Parser : public CodeCompletionHandler { SourceLocation Loc); /// Parse clauses for '#pragma omp [begin] declare target'. - void ParseOMPDeclareTargetClauses(Sema::DeclareTargetContextInfo &DTCI); + void ParseOMPDeclareTargetClauses(SemaOpenMP::DeclareTargetContextInfo &DTCI); /// Parse '#pragma omp end declare target'. void ParseOMPEndDeclareTargetDirective(OpenMPDirectiveKind BeginDKind, @@ -3486,7 +3487,7 @@ class Parser : public CodeCompletionHandler { /// Parses indirect clause /// \param ParseOnly true to skip the clause's semantic actions and return // false; - bool ParseOpenMPIndirectClause(Sema::DeclareTargetContextInfo &DTCI, + bool ParseOpenMPIndirectClause(SemaOpenMP::DeclareTargetContextInfo &DTCI, bool ParseOnly); /// Parses clause with a single expression and an additional argument /// of a kind \a Kind. @@ -3556,12 +3557,12 @@ class Parser : public CodeCompletionHandler { /// Parses a reserved locator like 'omp_all_memory'. bool ParseOpenMPReservedLocator(OpenMPClauseKind Kind, - Sema::OpenMPVarListDataTy &Data, + SemaOpenMP::OpenMPVarListDataTy &Data, const LangOptions &LangOpts); /// Parses clauses with list. bool ParseOpenMPVarList(OpenMPDirectiveKind DKind, OpenMPClauseKind Kind, SmallVectorImpl &Vars, - Sema::OpenMPVarListDataTy &Data); + SemaOpenMP::OpenMPVarListDataTy &Data); bool ParseUnqualifiedId(CXXScopeSpec &SS, ParsedType ObjectType, bool ObjectHadErrors, bool EnteringContext, bool AllowDestructorName, bool AllowConstructorName, @@ -3569,11 +3570,11 @@ class Parser : public CodeCompletionHandler { SourceLocation *TemplateKWLoc, UnqualifiedId &Result); /// Parses the mapper modifier in map, to, and from clauses. - bool parseMapperModifier(Sema::OpenMPVarListDataTy &Data); + bool parseMapperModifier(SemaOpenMP::OpenMPVarListDataTy &Data); /// Parses map-type-modifiers in map clause. /// map([ [map-type-modifier[,] [map-type-modifier[,] ...] map-type : ] list) /// where, map-type-modifier ::= always | close | mapper(mapper-identifier) - bool parseMapTypeModifiers(Sema::OpenMPVarListDataTy &Data); + bool parseMapTypeModifiers(SemaOpenMP::OpenMPVarListDataTy &Data); //===--------------------------------------------------------------------===// // OpenACC Parsing. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index d93ac7863b721d..77150a318ee47d 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -26,14 +26,12 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprConcepts.h" #include "clang/AST/ExprObjC.h" -#include "clang/AST/ExprOpenMP.h" #include "clang/AST/ExternalASTSource.h" #include "clang/AST/LocInfoType.h" #include "clang/AST/MangleNumberingContext.h" #include "clang/AST/NSAPI.h" #include "clang/AST/PrettyPrinter.h" #include "clang/AST/StmtCXX.h" -#include "clang/AST/StmtOpenMP.h" #include "clang/AST/TypeLoc.h" #include "clang/AST/TypeOrdering.h" #include "clang/Basic/BitmaskEnum.h" @@ -43,7 +41,6 @@ #include "clang/Basic/ExpressionTraits.h" #include "clang/Basic/Module.h" #include "clang/Basic/OpenCLOptions.h" -#include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/PragmaKinds.h" #include "clang/Basic/Specifiers.h" #include "clang/Basic/TemplateKinds.h" @@ -58,6 +55,7 @@ #include "clang/Sema/Scope.h" #include "clang/Sema/SemaBase.h" #include "clang/Sema/SemaConcept.h" +#include "clang/Sema/SemaDiagnostic.h" #include "clang/Sema/TypoCorrection.h" #include "clang/Sema/Weak.h" #include "llvm/ADT/ArrayRef.h" @@ -68,7 +66,6 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/TinyPtrVector.h" -#include "llvm/Frontend/OpenMP/OMPConstants.h" #include #include #include @@ -167,12 +164,6 @@ class ObjCMessageExpr; class ObjCMethodDecl; class ObjCPropertyDecl; class ObjCProtocolDecl; -class OMPThreadPrivateDecl; -class OMPRequiresDecl; -class OMPDeclareReductionDecl; -class OMPDeclareSimdDecl; -class OMPClause; -struct OMPVarListLocTy; struct OverloadCandidate; enum class OverloadCandidateParamOrder : char; enum OverloadCandidateRewriteKind : unsigned; @@ -187,6 +178,7 @@ class QualType; class SemaCUDA; class SemaHLSL; class SemaOpenACC; +class SemaOpenMP; class SemaSYCL; class StandardConversionSequence; class Stmt; @@ -480,7 +472,6 @@ class Sema final : public SemaBase { // 35. Code Completion (SemaCodeComplete.cpp) // 36. FixIt Helpers (SemaFixItUtils.cpp) // 37. Name Lookup for RISC-V Vector Intrinsic (SemaRISCVVectorLookup.cpp) - // 38. OpenMP Directives and Clauses (SemaOpenMP.cpp) /// \name Semantic Analysis /// Implementations are in Sema.cpp @@ -997,6 +988,11 @@ class Sema final : public SemaBase { return *OpenACCPtr; } + SemaOpenMP &OpenMP() { + assert(OpenMPPtr && "SemaOpenMP is dead"); + return *OpenMPPtr; + } + SemaSYCL &SYCL() { assert(SYCLPtr); return *SYCLPtr; @@ -1035,6 +1031,7 @@ class Sema final : public SemaBase { std::unique_ptr CUDAPtr; std::unique_ptr HLSLPtr; std::unique_ptr OpenACCPtr; + std::unique_ptr OpenMPPtr; std::unique_ptr SYCLPtr; ///@} @@ -3431,7 +3428,8 @@ class Sema final : public SemaBase { bool ConstexprSupported, bool CLinkageMayDiffer); /// type checking declaration initializers (C99 6.7.8) - bool CheckForConstantInitializer(Expr *e, QualType t); + bool CheckForConstantInitializer( + Expr *Init, unsigned DiagID = diag::err_init_element_not_constant); QualType deduceVarTypeFromInitializer(VarDecl *VDecl, DeclarationName Name, QualType Type, TypeSourceInfo *TSI, @@ -3443,14 +3441,6 @@ class Sema final : public SemaBase { sema::LambdaScopeInfo *RebuildLambdaScopeInfo(CXXMethodDecl *CallOperator); - /// The declarator \p D defines a function in the scope \p S which is nested - /// in an `omp begin/end declare variant` scope. In this method we create a - /// declaration for \p D and rename \p D according to the OpenMP context - /// selector of the surrounding scope. Return all base functions in \p Bases. - void ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( - Scope *S, Declarator &D, MultiTemplateParamsArg TemplateParameterLists, - SmallVectorImpl &Bases); - // Heuristically tells if the function is `get_return_object` member of a // coroutine promise_type by matching the function name. static bool CanBeGetReturnObject(const FunctionDecl *FD); @@ -5533,32 +5523,6 @@ class Sema final : public SemaBase { Expr *ColumnIdx, SourceLocation RBLoc); - ExprResult ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc, - Expr *LowerBound, - SourceLocation ColonLocFirst, - SourceLocation ColonLocSecond, - Expr *Length, Expr *Stride, - SourceLocation RBLoc); - ExprResult ActOnOMPArrayShapingExpr(Expr *Base, SourceLocation LParenLoc, - SourceLocation RParenLoc, - ArrayRef Dims, - ArrayRef Brackets); - - /// Data structure for iterator expression. - struct OMPIteratorData { - IdentifierInfo *DeclIdent = nullptr; - SourceLocation DeclIdentLoc; - ParsedType Type; - OMPIteratorExpr::IteratorRange Range; - SourceLocation AssignLoc; - SourceLocation ColonLoc; - SourceLocation SecColonLoc; - }; - - ExprResult ActOnOMPIteratorExpr(Scope *S, SourceLocation IteratorKwLoc, - SourceLocation LLoc, SourceLocation RLoc, - ArrayRef Data); - bool ConvertArgumentsForCall(CallExpr *Call, Expr *Fn, FunctionDecl *FDecl, const FunctionProtoType *Proto, ArrayRef Args, SourceLocation RParenLoc, @@ -12863,1373 +12827,6 @@ class Sema final : public SemaBase { std::unique_ptr RVIntrinsicManager; ///@} - - // - // - // ------------------------------------------------------------------------- - // - // - - /// \name OpenMP Directives and Clauses - /// Implementations are in SemaOpenMP.cpp - ///@{ - -public: - /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current - /// context is "used as device code". - /// - /// - If CurContext is a `declare target` function or it is known that the - /// function is emitted for the device, emits the diagnostics immediately. - /// - If CurContext is a non-`declare target` function and we are compiling - /// for the device, creates a diagnostic which is emitted if and when we - /// realize that the function will be codegen'ed. - /// - /// Example usage: - /// - /// // Variable-length arrays are not allowed in NVPTX device code. - /// if (diagIfOpenMPDeviceCode(Loc, diag::err_vla_unsupported)) - /// return ExprError(); - /// // Otherwise, continue parsing as normal. - SemaDiagnosticBuilder diagIfOpenMPDeviceCode(SourceLocation Loc, - unsigned DiagID, - const FunctionDecl *FD); - - /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current - /// context is "used as host code". - /// - /// - If CurContext is a `declare target` function or it is known that the - /// function is emitted for the host, emits the diagnostics immediately. - /// - If CurContext is a non-host function, just ignore it. - /// - /// Example usage: - /// - /// // Variable-length arrays are not allowed in NVPTX device code. - /// if (diagIfOpenMPHostode(Loc, diag::err_vla_unsupported)) - /// return ExprError(); - /// // Otherwise, continue parsing as normal. - SemaDiagnosticBuilder diagIfOpenMPHostCode(SourceLocation Loc, - unsigned DiagID, - const FunctionDecl *FD); - - /// Register \p D as specialization of all base functions in \p Bases in the - /// current `omp begin/end declare variant` scope. - void ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope( - Decl *D, SmallVectorImpl &Bases); - - /// Act on \p D, a function definition inside of an `omp [begin/end] assumes`. - void ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(Decl *D); - - /// Can we exit an OpenMP declare variant scope at the moment. - bool isInOpenMPDeclareVariantScope() const { - return !OMPDeclareVariantScopes.empty(); - } - - ExprResult - VerifyPositiveIntegerConstantInClause(Expr *Op, OpenMPClauseKind CKind, - bool StrictlyPositive = true, - bool SuppressExprDiags = false); - - /// Given the potential call expression \p Call, determine if there is a - /// specialization via the OpenMP declare variant mechanism available. If - /// there is, return the specialized call expression, otherwise return the - /// original \p Call. - ExprResult ActOnOpenMPCall(ExprResult Call, Scope *Scope, - SourceLocation LParenLoc, MultiExprArg ArgExprs, - SourceLocation RParenLoc, Expr *ExecConfig); - - /// Handle a `omp begin declare variant`. - void ActOnOpenMPBeginDeclareVariant(SourceLocation Loc, OMPTraitInfo &TI); - - /// Handle a `omp end declare variant`. - void ActOnOpenMPEndDeclareVariant(); - - /// Function tries to capture lambda's captured variables in the OpenMP region - /// before the original lambda is captured. - void tryCaptureOpenMPLambdas(ValueDecl *V); - - /// Return true if the provided declaration \a VD should be captured by - /// reference. - /// \param Level Relative level of nested OpenMP construct for that the check - /// is performed. - /// \param OpenMPCaptureLevel Capture level within an OpenMP construct. - bool isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, - unsigned OpenMPCaptureLevel) const; - - /// Check if the specified variable is used in one of the private - /// clauses (private, firstprivate, lastprivate, reduction etc.) in OpenMP - /// constructs. - VarDecl *isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo = false, - unsigned StopAt = 0); - - /// The member expression(this->fd) needs to be rebuilt in the template - /// instantiation to generate private copy for OpenMP when default - /// clause is used. The function will return true if default - /// cluse is used. - bool isOpenMPRebuildMemberExpr(ValueDecl *D); - - ExprResult getOpenMPCapturedExpr(VarDecl *Capture, ExprValueKind VK, - ExprObjectKind OK, SourceLocation Loc); - - /// If the current region is a loop-based region, mark the start of the loop - /// construct. - void startOpenMPLoop(); - - /// If the current region is a range loop-based region, mark the start of the - /// loop construct. - void startOpenMPCXXRangeFor(); - - /// Check if the specified variable is used in 'private' clause. - /// \param Level Relative level of nested OpenMP construct for that the check - /// is performed. - OpenMPClauseKind isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, - unsigned CapLevel) const; - - /// Sets OpenMP capture kind (OMPC_private, OMPC_firstprivate, OMPC_map etc.) - /// for \p FD based on DSA for the provided corresponding captured declaration - /// \p D. - void setOpenMPCaptureKind(FieldDecl *FD, const ValueDecl *D, unsigned Level); - - /// Check if the specified variable is captured by 'target' directive. - /// \param Level Relative level of nested OpenMP construct for that the check - /// is performed. - bool isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level, - unsigned CaptureLevel) const; - - /// Check if the specified global variable must be captured by outer capture - /// regions. - /// \param Level Relative level of nested OpenMP construct for that - /// the check is performed. - bool isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level, - unsigned CaptureLevel) const; - - ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc, - Expr *Op); - /// Called on start of new data sharing attribute block. - void StartOpenMPDSABlock(OpenMPDirectiveKind K, - const DeclarationNameInfo &DirName, Scope *CurScope, - SourceLocation Loc); - /// Start analysis of clauses. - void StartOpenMPClause(OpenMPClauseKind K); - /// End analysis of clauses. - void EndOpenMPClause(); - /// Called on end of data sharing attribute block. - void EndOpenMPDSABlock(Stmt *CurDirective); - - /// Check if the current region is an OpenMP loop region and if it is, - /// mark loop control variable, used in \p Init for loop initialization, as - /// private by default. - /// \param Init First part of the for loop. - void ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init); - - /// Called on well-formed '\#pragma omp metadirective' after parsing - /// of the associated statement. - StmtResult ActOnOpenMPMetaDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - - // OpenMP directives and clauses. - /// Called on correct id-expression from the '#pragma omp - /// threadprivate'. - ExprResult ActOnOpenMPIdExpression(Scope *CurScope, CXXScopeSpec &ScopeSpec, - const DeclarationNameInfo &Id, - OpenMPDirectiveKind Kind); - /// Called on well-formed '#pragma omp threadprivate'. - DeclGroupPtrTy ActOnOpenMPThreadprivateDirective(SourceLocation Loc, - ArrayRef VarList); - /// Builds a new OpenMPThreadPrivateDecl and checks its correctness. - OMPThreadPrivateDecl *CheckOMPThreadPrivateDecl(SourceLocation Loc, - ArrayRef VarList); - /// Called on well-formed '#pragma omp allocate'. - DeclGroupPtrTy ActOnOpenMPAllocateDirective(SourceLocation Loc, - ArrayRef VarList, - ArrayRef Clauses, - DeclContext *Owner = nullptr); - - /// Called on well-formed '#pragma omp [begin] assume[s]'. - void ActOnOpenMPAssumesDirective(SourceLocation Loc, - OpenMPDirectiveKind DKind, - ArrayRef Assumptions, - bool SkippedClauses); - - /// Check if there is an active global `omp begin assumes` directive. - bool isInOpenMPAssumeScope() const { return !OMPAssumeScoped.empty(); } - - /// Check if there is an active global `omp assumes` directive. - bool hasGlobalOpenMPAssumes() const { return !OMPAssumeGlobal.empty(); } - - /// Called on well-formed '#pragma omp end assumes'. - void ActOnOpenMPEndAssumesDirective(); - - /// Called on well-formed '#pragma omp requires'. - DeclGroupPtrTy ActOnOpenMPRequiresDirective(SourceLocation Loc, - ArrayRef ClauseList); - /// Check restrictions on Requires directive - OMPRequiresDecl *CheckOMPRequiresDecl(SourceLocation Loc, - ArrayRef Clauses); - /// Check if the specified type is allowed to be used in 'omp declare - /// reduction' construct. - QualType ActOnOpenMPDeclareReductionType(SourceLocation TyLoc, - TypeResult ParsedType); - /// Called on start of '#pragma omp declare reduction'. - DeclGroupPtrTy ActOnOpenMPDeclareReductionDirectiveStart( - Scope *S, DeclContext *DC, DeclarationName Name, - ArrayRef> ReductionTypes, - AccessSpecifier AS, Decl *PrevDeclInScope = nullptr); - /// Initialize declare reduction construct initializer. - void ActOnOpenMPDeclareReductionCombinerStart(Scope *S, Decl *D); - /// Finish current declare reduction construct initializer. - void ActOnOpenMPDeclareReductionCombinerEnd(Decl *D, Expr *Combiner); - /// Initialize declare reduction construct initializer. - /// \return omp_priv variable. - VarDecl *ActOnOpenMPDeclareReductionInitializerStart(Scope *S, Decl *D); - /// Finish current declare reduction construct initializer. - void ActOnOpenMPDeclareReductionInitializerEnd(Decl *D, Expr *Initializer, - VarDecl *OmpPrivParm); - /// Called at the end of '#pragma omp declare reduction'. - DeclGroupPtrTy ActOnOpenMPDeclareReductionDirectiveEnd( - Scope *S, DeclGroupPtrTy DeclReductions, bool IsValid); - - /// Check variable declaration in 'omp declare mapper' construct. - TypeResult ActOnOpenMPDeclareMapperVarDecl(Scope *S, Declarator &D); - /// Check if the specified type is allowed to be used in 'omp declare - /// mapper' construct. - QualType ActOnOpenMPDeclareMapperType(SourceLocation TyLoc, - TypeResult ParsedType); - /// Called on start of '#pragma omp declare mapper'. - DeclGroupPtrTy ActOnOpenMPDeclareMapperDirective( - Scope *S, DeclContext *DC, DeclarationName Name, QualType MapperType, - SourceLocation StartLoc, DeclarationName VN, AccessSpecifier AS, - Expr *MapperVarRef, ArrayRef Clauses, - Decl *PrevDeclInScope = nullptr); - /// Build the mapper variable of '#pragma omp declare mapper'. - ExprResult ActOnOpenMPDeclareMapperDirectiveVarDecl(Scope *S, - QualType MapperType, - SourceLocation StartLoc, - DeclarationName VN); - void ActOnOpenMPIteratorVarDecl(VarDecl *VD); - bool isOpenMPDeclareMapperVarDeclAllowed(const VarDecl *VD) const; - const ValueDecl *getOpenMPDeclareMapperVarName() const; - - struct DeclareTargetContextInfo { - struct MapInfo { - OMPDeclareTargetDeclAttr::MapTypeTy MT; - SourceLocation Loc; - }; - /// Explicitly listed variables and functions in a 'to' or 'link' clause. - llvm::DenseMap ExplicitlyMapped; - - /// The 'device_type' as parsed from the clause. - OMPDeclareTargetDeclAttr::DevTypeTy DT = OMPDeclareTargetDeclAttr::DT_Any; - - /// The directive kind, `begin declare target` or `declare target`. - OpenMPDirectiveKind Kind; - - /// The directive with indirect clause. - std::optional Indirect; - - /// The directive location. - SourceLocation Loc; - - DeclareTargetContextInfo(OpenMPDirectiveKind Kind, SourceLocation Loc) - : Kind(Kind), Loc(Loc) {} - }; - - /// Called on the start of target region i.e. '#pragma omp declare target'. - bool ActOnStartOpenMPDeclareTargetContext(DeclareTargetContextInfo &DTCI); - - /// Called at the end of target region i.e. '#pragma omp end declare target'. - const DeclareTargetContextInfo ActOnOpenMPEndDeclareTargetDirective(); - - /// Called once a target context is completed, that can be when a - /// '#pragma omp end declare target' was encountered or when a - /// '#pragma omp declare target' without declaration-definition-seq was - /// encountered. - void ActOnFinishedOpenMPDeclareTargetContext(DeclareTargetContextInfo &DTCI); - - /// Report unterminated 'omp declare target' or 'omp begin declare target' at - /// the end of a compilation unit. - void DiagnoseUnterminatedOpenMPDeclareTarget(); - - /// Searches for the provided declaration name for OpenMP declare target - /// directive. - NamedDecl *lookupOpenMPDeclareTargetName(Scope *CurScope, - CXXScopeSpec &ScopeSpec, - const DeclarationNameInfo &Id); - - /// Called on correct id-expression from the '#pragma omp declare target'. - void ActOnOpenMPDeclareTargetName(NamedDecl *ND, SourceLocation Loc, - OMPDeclareTargetDeclAttr::MapTypeTy MT, - DeclareTargetContextInfo &DTCI); - - /// Check declaration inside target region. - void - checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D, - SourceLocation IdLoc = SourceLocation()); - - /// Adds OMPDeclareTargetDeclAttr to referenced variables in declare target - /// directive. - void ActOnOpenMPDeclareTargetInitializer(Decl *D); - - /// Finishes analysis of the deferred functions calls that may be declared as - /// host/nohost during device/host compilation. - void finalizeOpenMPDelayedAnalysis(const FunctionDecl *Caller, - const FunctionDecl *Callee, - SourceLocation Loc); - - /// Return true if currently in OpenMP task with untied clause context. - bool isInOpenMPTaskUntiedContext() const; - - /// Return true inside OpenMP declare target region. - bool isInOpenMPDeclareTargetContext() const { - return !DeclareTargetNesting.empty(); - } - /// Return true inside OpenMP target region. - bool isInOpenMPTargetExecutionDirective() const; - - /// Return the number of captured regions created for an OpenMP directive. - static int getOpenMPCaptureLevels(OpenMPDirectiveKind Kind); - - /// Initialization of captured region for OpenMP region. - void ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope); - - /// Called for syntactical loops (ForStmt or CXXForRangeStmt) associated to - /// an OpenMP loop directive. - StmtResult ActOnOpenMPCanonicalLoop(Stmt *AStmt); - - /// Process a canonical OpenMP loop nest that can either be a canonical - /// literal loop (ForStmt or CXXForRangeStmt), or the generated loop of an - /// OpenMP loop transformation construct. - StmtResult ActOnOpenMPLoopnest(Stmt *AStmt); - - /// End of OpenMP region. - /// - /// \param S Statement associated with the current OpenMP region. - /// \param Clauses List of clauses for the current OpenMP region. - /// - /// \returns Statement for finished OpenMP region. - StmtResult ActOnOpenMPRegionEnd(StmtResult S, ArrayRef Clauses); - StmtResult ActOnOpenMPExecutableDirective( - OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName, - OpenMPDirectiveKind CancelRegion, ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, - OpenMPDirectiveKind PrevMappedDirective = llvm::omp::OMPD_unknown); - /// Called on well-formed '\#pragma omp parallel' after parsing - /// of the associated statement. - StmtResult ActOnOpenMPParallelDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - using VarsWithInheritedDSAType = - llvm::SmallDenseMap; - /// Called on well-formed '\#pragma omp simd' after parsing - /// of the associated statement. - StmtResult - ActOnOpenMPSimdDirective(ArrayRef Clauses, Stmt *AStmt, - SourceLocation StartLoc, SourceLocation EndLoc, - VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '#pragma omp tile' after parsing of its clauses and - /// the associated statement. - StmtResult ActOnOpenMPTileDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '#pragma omp unroll' after parsing of its clauses - /// and the associated statement. - StmtResult ActOnOpenMPUnrollDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp for' after parsing - /// of the associated statement. - StmtResult - ActOnOpenMPForDirective(ArrayRef Clauses, Stmt *AStmt, - SourceLocation StartLoc, SourceLocation EndLoc, - VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp for simd' after parsing - /// of the associated statement. - StmtResult - ActOnOpenMPForSimdDirective(ArrayRef Clauses, Stmt *AStmt, - SourceLocation StartLoc, SourceLocation EndLoc, - VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp sections' after parsing - /// of the associated statement. - StmtResult ActOnOpenMPSectionsDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp section' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPSectionDirective(Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp scope' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPScopeDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp single' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPSingleDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp master' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPMasterDirective(Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp critical' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPCriticalDirective(const DeclarationNameInfo &DirName, - ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp parallel for' after parsing - /// of the associated statement. - StmtResult ActOnOpenMPParallelForDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp parallel for simd' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPParallelForSimdDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp parallel master' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPParallelMasterDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp parallel masked' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPParallelMaskedDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp parallel sections' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPParallelSectionsDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp task' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPTaskDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp taskyield'. - StmtResult ActOnOpenMPTaskyieldDirective(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp error'. - /// Error direcitive is allowed in both declared and excutable contexts. - /// Adding InExContext to identify which context is called from. - StmtResult ActOnOpenMPErrorDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc, - bool InExContext = true); - /// Called on well-formed '\#pragma omp barrier'. - StmtResult ActOnOpenMPBarrierDirective(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp taskwait'. - StmtResult ActOnOpenMPTaskwaitDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp taskgroup'. - StmtResult ActOnOpenMPTaskgroupDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp flush'. - StmtResult ActOnOpenMPFlushDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp depobj'. - StmtResult ActOnOpenMPDepobjDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp scan'. - StmtResult ActOnOpenMPScanDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp ordered' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPOrderedDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp atomic' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPAtomicDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp target' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPTargetDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp target data' after parsing of - /// the associated statement. - StmtResult ActOnOpenMPTargetDataDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp target enter data' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPTargetEnterDataDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc, - Stmt *AStmt); - /// Called on well-formed '\#pragma omp target exit data' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPTargetExitDataDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc, - Stmt *AStmt); - /// Called on well-formed '\#pragma omp target parallel' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPTargetParallelDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp target parallel for' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPTargetParallelForDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp teams' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPTeamsDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp teams loop' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPTeamsGenericLoopDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp target teams loop' after parsing of - /// the associated statement. - StmtResult ActOnOpenMPTargetTeamsGenericLoopDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp parallel loop' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPParallelGenericLoopDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp target parallel loop' after parsing - /// of the associated statement. - StmtResult ActOnOpenMPTargetParallelGenericLoopDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp cancellation point'. - StmtResult - ActOnOpenMPCancellationPointDirective(SourceLocation StartLoc, - SourceLocation EndLoc, - OpenMPDirectiveKind CancelRegion); - /// Called on well-formed '\#pragma omp cancel'. - StmtResult ActOnOpenMPCancelDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc, - OpenMPDirectiveKind CancelRegion); - /// Called on well-formed '\#pragma omp taskloop' after parsing of the - /// associated statement. - StmtResult - ActOnOpenMPTaskLoopDirective(ArrayRef Clauses, Stmt *AStmt, - SourceLocation StartLoc, SourceLocation EndLoc, - VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp taskloop simd' after parsing of - /// the associated statement. - StmtResult ActOnOpenMPTaskLoopSimdDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp master taskloop' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPMasterTaskLoopDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp master taskloop simd' after parsing of - /// the associated statement. - StmtResult ActOnOpenMPMasterTaskLoopSimdDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp parallel master taskloop' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPParallelMasterTaskLoopDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp parallel master taskloop simd' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPParallelMasterTaskLoopSimdDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp masked taskloop' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPMaskedTaskLoopDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp masked taskloop simd' after parsing of - /// the associated statement. - StmtResult ActOnOpenMPMaskedTaskLoopSimdDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp parallel masked taskloop' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPParallelMaskedTaskLoopDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp parallel masked taskloop simd' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPParallelMaskedTaskLoopSimdDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp distribute' after parsing - /// of the associated statement. - StmtResult - ActOnOpenMPDistributeDirective(ArrayRef Clauses, Stmt *AStmt, - SourceLocation StartLoc, SourceLocation EndLoc, - VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp target update'. - StmtResult ActOnOpenMPTargetUpdateDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc, - Stmt *AStmt); - /// Called on well-formed '\#pragma omp distribute parallel for' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPDistributeParallelForDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp distribute parallel for simd' - /// after parsing of the associated statement. - StmtResult ActOnOpenMPDistributeParallelForSimdDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp distribute simd' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPDistributeSimdDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp target parallel for simd' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPTargetParallelForSimdDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp target simd' after parsing of - /// the associated statement. - StmtResult - ActOnOpenMPTargetSimdDirective(ArrayRef Clauses, Stmt *AStmt, - SourceLocation StartLoc, SourceLocation EndLoc, - VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp teams distribute' after parsing of - /// the associated statement. - StmtResult ActOnOpenMPTeamsDistributeDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp teams distribute simd' after parsing - /// of the associated statement. - StmtResult ActOnOpenMPTeamsDistributeSimdDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp teams distribute parallel for simd' - /// after parsing of the associated statement. - StmtResult ActOnOpenMPTeamsDistributeParallelForSimdDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp teams distribute parallel for' - /// after parsing of the associated statement. - StmtResult ActOnOpenMPTeamsDistributeParallelForDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp target teams' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPTargetTeamsDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp target teams distribute' after parsing - /// of the associated statement. - StmtResult ActOnOpenMPTargetTeamsDistributeDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp target teams distribute parallel for' - /// after parsing of the associated statement. - StmtResult ActOnOpenMPTargetTeamsDistributeParallelForDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp target teams distribute parallel for - /// simd' after parsing of the associated statement. - StmtResult ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp target teams distribute simd' after - /// parsing of the associated statement. - StmtResult ActOnOpenMPTargetTeamsDistributeSimdDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - /// Called on well-formed '\#pragma omp interop'. - StmtResult ActOnOpenMPInteropDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp dispatch' after parsing of the - // /associated statement. - StmtResult ActOnOpenMPDispatchDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed '\#pragma omp masked' after parsing of the - // /associated statement. - StmtResult ActOnOpenMPMaskedDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc); - - /// Called on well-formed '\#pragma omp loop' after parsing of the - /// associated statement. - StmtResult ActOnOpenMPGenericLoopDirective( - ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); - - /// Checks correctness of linear modifiers. - bool CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind, - SourceLocation LinLoc); - /// Checks that the specified declaration matches requirements for the linear - /// decls. - bool CheckOpenMPLinearDecl(const ValueDecl *D, SourceLocation ELoc, - OpenMPLinearClauseKind LinKind, QualType Type, - bool IsDeclareSimd = false); - - /// Called on well-formed '\#pragma omp declare simd' after parsing of - /// the associated method/function. - DeclGroupPtrTy ActOnOpenMPDeclareSimdDirective( - DeclGroupPtrTy DG, OMPDeclareSimdDeclAttr::BranchStateTy BS, - Expr *Simdlen, ArrayRef Uniforms, ArrayRef Aligneds, - ArrayRef Alignments, ArrayRef Linears, - ArrayRef LinModifiers, ArrayRef Steps, SourceRange SR); - - /// Checks '\#pragma omp declare variant' variant function and original - /// functions after parsing of the associated method/function. - /// \param DG Function declaration to which declare variant directive is - /// applied to. - /// \param VariantRef Expression that references the variant function, which - /// must be used instead of the original one, specified in \p DG. - /// \param TI The trait info object representing the match clause. - /// \param NumAppendArgs The number of omp_interop_t arguments to account for - /// in checking. - /// \returns std::nullopt, if the function/variant function are not compatible - /// with the pragma, pair of original function/variant ref expression - /// otherwise. - std::optional> - checkOpenMPDeclareVariantFunction(DeclGroupPtrTy DG, Expr *VariantRef, - OMPTraitInfo &TI, unsigned NumAppendArgs, - SourceRange SR); - - /// Called on well-formed '\#pragma omp declare variant' after parsing of - /// the associated method/function. - /// \param FD Function declaration to which declare variant directive is - /// applied to. - /// \param VariantRef Expression that references the variant function, which - /// must be used instead of the original one, specified in \p DG. - /// \param TI The context traits associated with the function variant. - /// \param AdjustArgsNothing The list of 'nothing' arguments. - /// \param AdjustArgsNeedDevicePtr The list of 'need_device_ptr' arguments. - /// \param AppendArgs The list of 'append_args' arguments. - /// \param AdjustArgsLoc The Location of an 'adjust_args' clause. - /// \param AppendArgsLoc The Location of an 'append_args' clause. - /// \param SR The SourceRange of the 'declare variant' directive. - void ActOnOpenMPDeclareVariantDirective( - FunctionDecl *FD, Expr *VariantRef, OMPTraitInfo &TI, - ArrayRef AdjustArgsNothing, - ArrayRef AdjustArgsNeedDevicePtr, - ArrayRef AppendArgs, SourceLocation AdjustArgsLoc, - SourceLocation AppendArgsLoc, SourceRange SR); - - OMPClause *ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'allocator' clause. - OMPClause *ActOnOpenMPAllocatorClause(Expr *Allocator, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'if' clause. - OMPClause *ActOnOpenMPIfClause(OpenMPDirectiveKind NameModifier, - Expr *Condition, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation NameModifierLoc, - SourceLocation ColonLoc, - SourceLocation EndLoc); - /// Called on well-formed 'final' clause. - OMPClause *ActOnOpenMPFinalClause(Expr *Condition, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'num_threads' clause. - OMPClause *ActOnOpenMPNumThreadsClause(Expr *NumThreads, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'align' clause. - OMPClause *ActOnOpenMPAlignClause(Expr *Alignment, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'safelen' clause. - OMPClause *ActOnOpenMPSafelenClause(Expr *Length, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'simdlen' clause. - OMPClause *ActOnOpenMPSimdlenClause(Expr *Length, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-form 'sizes' clause. - OMPClause *ActOnOpenMPSizesClause(ArrayRef SizeExprs, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-form 'full' clauses. - OMPClause *ActOnOpenMPFullClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-form 'partial' clauses. - OMPClause *ActOnOpenMPPartialClause(Expr *FactorExpr, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'collapse' clause. - OMPClause *ActOnOpenMPCollapseClause(Expr *NumForLoops, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'ordered' clause. - OMPClause * - ActOnOpenMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc, - SourceLocation LParenLoc = SourceLocation(), - Expr *NumForLoops = nullptr); - /// Called on well-formed 'grainsize' clause. - OMPClause *ActOnOpenMPGrainsizeClause(OpenMPGrainsizeClauseModifier Modifier, - Expr *Size, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation ModifierLoc, - SourceLocation EndLoc); - /// Called on well-formed 'num_tasks' clause. - OMPClause *ActOnOpenMPNumTasksClause(OpenMPNumTasksClauseModifier Modifier, - Expr *NumTasks, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation ModifierLoc, - SourceLocation EndLoc); - /// Called on well-formed 'hint' clause. - OMPClause *ActOnOpenMPHintClause(Expr *Hint, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'detach' clause. - OMPClause *ActOnOpenMPDetachClause(Expr *Evt, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - - OMPClause *ActOnOpenMPSimpleClause(OpenMPClauseKind Kind, unsigned Argument, - SourceLocation ArgumentLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'when' clause. - OMPClause *ActOnOpenMPWhenClause(OMPTraitInfo &TI, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'default' clause. - OMPClause *ActOnOpenMPDefaultClause(llvm::omp::DefaultKind Kind, - SourceLocation KindLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'proc_bind' clause. - OMPClause *ActOnOpenMPProcBindClause(llvm::omp::ProcBindKind Kind, - SourceLocation KindLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'order' clause. - OMPClause *ActOnOpenMPOrderClause(OpenMPOrderClauseModifier Modifier, - OpenMPOrderClauseKind Kind, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation MLoc, SourceLocation KindLoc, - SourceLocation EndLoc); - /// Called on well-formed 'update' clause. - OMPClause *ActOnOpenMPUpdateClause(OpenMPDependClauseKind Kind, - SourceLocation KindLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - - OMPClause *ActOnOpenMPSingleExprWithArgClause( - OpenMPClauseKind Kind, ArrayRef Arguments, Expr *Expr, - SourceLocation StartLoc, SourceLocation LParenLoc, - ArrayRef ArgumentsLoc, SourceLocation DelimLoc, - SourceLocation EndLoc); - /// Called on well-formed 'schedule' clause. - OMPClause *ActOnOpenMPScheduleClause( - OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, - OpenMPScheduleClauseKind Kind, Expr *ChunkSize, SourceLocation StartLoc, - SourceLocation LParenLoc, SourceLocation M1Loc, SourceLocation M2Loc, - SourceLocation KindLoc, SourceLocation CommaLoc, SourceLocation EndLoc); - - OMPClause *ActOnOpenMPClause(OpenMPClauseKind Kind, SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'nowait' clause. - OMPClause *ActOnOpenMPNowaitClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'untied' clause. - OMPClause *ActOnOpenMPUntiedClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'mergeable' clause. - OMPClause *ActOnOpenMPMergeableClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'read' clause. - OMPClause *ActOnOpenMPReadClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'write' clause. - OMPClause *ActOnOpenMPWriteClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'update' clause. - OMPClause *ActOnOpenMPUpdateClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'capture' clause. - OMPClause *ActOnOpenMPCaptureClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'compare' clause. - OMPClause *ActOnOpenMPCompareClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'fail' clause. - OMPClause *ActOnOpenMPFailClause(SourceLocation StartLoc, - SourceLocation EndLoc); - OMPClause *ActOnOpenMPFailClause(OpenMPClauseKind Kind, - SourceLocation KindLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - - /// Called on well-formed 'seq_cst' clause. - OMPClause *ActOnOpenMPSeqCstClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'acq_rel' clause. - OMPClause *ActOnOpenMPAcqRelClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'acquire' clause. - OMPClause *ActOnOpenMPAcquireClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'release' clause. - OMPClause *ActOnOpenMPReleaseClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'relaxed' clause. - OMPClause *ActOnOpenMPRelaxedClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'weak' clause. - OMPClause *ActOnOpenMPWeakClause(SourceLocation StartLoc, - SourceLocation EndLoc); - - /// Called on well-formed 'init' clause. - OMPClause * - ActOnOpenMPInitClause(Expr *InteropVar, OMPInteropInfo &InteropInfo, - SourceLocation StartLoc, SourceLocation LParenLoc, - SourceLocation VarLoc, SourceLocation EndLoc); - - /// Called on well-formed 'use' clause. - OMPClause *ActOnOpenMPUseClause(Expr *InteropVar, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation VarLoc, SourceLocation EndLoc); - - /// Called on well-formed 'destroy' clause. - OMPClause *ActOnOpenMPDestroyClause(Expr *InteropVar, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation VarLoc, - SourceLocation EndLoc); - /// Called on well-formed 'novariants' clause. - OMPClause *ActOnOpenMPNovariantsClause(Expr *Condition, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'nocontext' clause. - OMPClause *ActOnOpenMPNocontextClause(Expr *Condition, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'filter' clause. - OMPClause *ActOnOpenMPFilterClause(Expr *ThreadID, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'threads' clause. - OMPClause *ActOnOpenMPThreadsClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'simd' clause. - OMPClause *ActOnOpenMPSIMDClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'nogroup' clause. - OMPClause *ActOnOpenMPNogroupClause(SourceLocation StartLoc, - SourceLocation EndLoc); - /// Called on well-formed 'unified_address' clause. - OMPClause *ActOnOpenMPUnifiedAddressClause(SourceLocation StartLoc, - SourceLocation EndLoc); - - /// Called on well-formed 'unified_address' clause. - OMPClause *ActOnOpenMPUnifiedSharedMemoryClause(SourceLocation StartLoc, - SourceLocation EndLoc); - - /// Called on well-formed 'reverse_offload' clause. - OMPClause *ActOnOpenMPReverseOffloadClause(SourceLocation StartLoc, - SourceLocation EndLoc); - - /// Called on well-formed 'dynamic_allocators' clause. - OMPClause *ActOnOpenMPDynamicAllocatorsClause(SourceLocation StartLoc, - SourceLocation EndLoc); - - /// Called on well-formed 'atomic_default_mem_order' clause. - OMPClause *ActOnOpenMPAtomicDefaultMemOrderClause( - OpenMPAtomicDefaultMemOrderClauseKind Kind, SourceLocation KindLoc, - SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); - - /// Called on well-formed 'at' clause. - OMPClause *ActOnOpenMPAtClause(OpenMPAtClauseKind Kind, - SourceLocation KindLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - - /// Called on well-formed 'severity' clause. - OMPClause *ActOnOpenMPSeverityClause(OpenMPSeverityClauseKind Kind, - SourceLocation KindLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - - /// Called on well-formed 'message' clause. - /// passing string for message. - OMPClause *ActOnOpenMPMessageClause(Expr *MS, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - - /// Data used for processing a list of variables in OpenMP clauses. - struct OpenMPVarListDataTy final { - Expr *DepModOrTailExpr = nullptr; - Expr *IteratorExpr = nullptr; - SourceLocation ColonLoc; - SourceLocation RLoc; - CXXScopeSpec ReductionOrMapperIdScopeSpec; - DeclarationNameInfo ReductionOrMapperId; - int ExtraModifier = -1; ///< Additional modifier for linear, map, depend or - ///< lastprivate clause. - SmallVector - MapTypeModifiers; - SmallVector - MapTypeModifiersLoc; - SmallVector - MotionModifiers; - SmallVector MotionModifiersLoc; - bool IsMapTypeImplicit = false; - SourceLocation ExtraModifierLoc; - SourceLocation OmpAllMemoryLoc; - SourceLocation - StepModifierLoc; /// 'step' modifier location for linear clause - }; - - OMPClause *ActOnOpenMPVarListClause(OpenMPClauseKind Kind, - ArrayRef Vars, - const OMPVarListLocTy &Locs, - OpenMPVarListDataTy &Data); - /// Called on well-formed 'inclusive' clause. - OMPClause *ActOnOpenMPInclusiveClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'exclusive' clause. - OMPClause *ActOnOpenMPExclusiveClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'allocate' clause. - OMPClause * - ActOnOpenMPAllocateClause(Expr *Allocator, ArrayRef VarList, - SourceLocation StartLoc, SourceLocation ColonLoc, - SourceLocation LParenLoc, SourceLocation EndLoc); - /// Called on well-formed 'private' clause. - OMPClause *ActOnOpenMPPrivateClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'firstprivate' clause. - OMPClause *ActOnOpenMPFirstprivateClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'lastprivate' clause. - OMPClause *ActOnOpenMPLastprivateClause( - ArrayRef VarList, OpenMPLastprivateModifier LPKind, - SourceLocation LPKindLoc, SourceLocation ColonLoc, - SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); - /// Called on well-formed 'shared' clause. - OMPClause *ActOnOpenMPSharedClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'reduction' clause. - OMPClause *ActOnOpenMPReductionClause( - ArrayRef VarList, OpenMPReductionClauseModifier Modifier, - SourceLocation StartLoc, SourceLocation LParenLoc, - SourceLocation ModifierLoc, SourceLocation ColonLoc, - SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec, - const DeclarationNameInfo &ReductionId, - ArrayRef UnresolvedReductions = std::nullopt); - /// Called on well-formed 'task_reduction' clause. - OMPClause *ActOnOpenMPTaskReductionClause( - ArrayRef VarList, SourceLocation StartLoc, - SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc, - CXXScopeSpec &ReductionIdScopeSpec, - const DeclarationNameInfo &ReductionId, - ArrayRef UnresolvedReductions = std::nullopt); - /// Called on well-formed 'in_reduction' clause. - OMPClause *ActOnOpenMPInReductionClause( - ArrayRef VarList, SourceLocation StartLoc, - SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc, - CXXScopeSpec &ReductionIdScopeSpec, - const DeclarationNameInfo &ReductionId, - ArrayRef UnresolvedReductions = std::nullopt); - /// Called on well-formed 'linear' clause. - OMPClause *ActOnOpenMPLinearClause( - ArrayRef VarList, Expr *Step, SourceLocation StartLoc, - SourceLocation LParenLoc, OpenMPLinearClauseKind LinKind, - SourceLocation LinLoc, SourceLocation ColonLoc, - SourceLocation StepModifierLoc, SourceLocation EndLoc); - /// Called on well-formed 'aligned' clause. - OMPClause *ActOnOpenMPAlignedClause(ArrayRef VarList, Expr *Alignment, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation ColonLoc, - SourceLocation EndLoc); - /// Called on well-formed 'copyin' clause. - OMPClause *ActOnOpenMPCopyinClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'copyprivate' clause. - OMPClause *ActOnOpenMPCopyprivateClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'flush' pseudo clause. - OMPClause *ActOnOpenMPFlushClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'depobj' pseudo clause. - OMPClause *ActOnOpenMPDepobjClause(Expr *Depobj, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'depend' clause. - OMPClause *ActOnOpenMPDependClause(const OMPDependClause::DependDataTy &Data, - Expr *DepModifier, - ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'device' clause. - OMPClause *ActOnOpenMPDeviceClause(OpenMPDeviceClauseModifier Modifier, - Expr *Device, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation ModifierLoc, - SourceLocation EndLoc); - /// Called on well-formed 'map' clause. - OMPClause *ActOnOpenMPMapClause( - Expr *IteratorModifier, ArrayRef MapTypeModifiers, - ArrayRef MapTypeModifiersLoc, - CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId, - OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, - SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef VarList, - const OMPVarListLocTy &Locs, bool NoDiagnose = false, - ArrayRef UnresolvedMappers = std::nullopt); - /// Called on well-formed 'num_teams' clause. - OMPClause *ActOnOpenMPNumTeamsClause(Expr *NumTeams, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'thread_limit' clause. - OMPClause *ActOnOpenMPThreadLimitClause(Expr *ThreadLimit, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'priority' clause. - OMPClause *ActOnOpenMPPriorityClause(Expr *Priority, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - /// Called on well-formed 'dist_schedule' clause. - OMPClause *ActOnOpenMPDistScheduleClause( - OpenMPDistScheduleClauseKind Kind, Expr *ChunkSize, - SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation KindLoc, - SourceLocation CommaLoc, SourceLocation EndLoc); - /// Called on well-formed 'defaultmap' clause. - OMPClause *ActOnOpenMPDefaultmapClause( - OpenMPDefaultmapClauseModifier M, OpenMPDefaultmapClauseKind Kind, - SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation MLoc, - SourceLocation KindLoc, SourceLocation EndLoc); - /// Called on well-formed 'to' clause. - OMPClause * - ActOnOpenMPToClause(ArrayRef MotionModifiers, - ArrayRef MotionModifiersLoc, - CXXScopeSpec &MapperIdScopeSpec, - DeclarationNameInfo &MapperId, SourceLocation ColonLoc, - ArrayRef VarList, const OMPVarListLocTy &Locs, - ArrayRef UnresolvedMappers = std::nullopt); - /// Called on well-formed 'from' clause. - OMPClause * - ActOnOpenMPFromClause(ArrayRef MotionModifiers, - ArrayRef MotionModifiersLoc, - CXXScopeSpec &MapperIdScopeSpec, - DeclarationNameInfo &MapperId, SourceLocation ColonLoc, - ArrayRef VarList, const OMPVarListLocTy &Locs, - ArrayRef UnresolvedMappers = std::nullopt); - /// Called on well-formed 'use_device_ptr' clause. - OMPClause *ActOnOpenMPUseDevicePtrClause(ArrayRef VarList, - const OMPVarListLocTy &Locs); - /// Called on well-formed 'use_device_addr' clause. - OMPClause *ActOnOpenMPUseDeviceAddrClause(ArrayRef VarList, - const OMPVarListLocTy &Locs); - /// Called on well-formed 'is_device_ptr' clause. - OMPClause *ActOnOpenMPIsDevicePtrClause(ArrayRef VarList, - const OMPVarListLocTy &Locs); - /// Called on well-formed 'has_device_addr' clause. - OMPClause *ActOnOpenMPHasDeviceAddrClause(ArrayRef VarList, - const OMPVarListLocTy &Locs); - /// Called on well-formed 'nontemporal' clause. - OMPClause *ActOnOpenMPNontemporalClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - - /// Data for list of allocators. - struct UsesAllocatorsData { - /// Allocator. - Expr *Allocator = nullptr; - /// Allocator traits. - Expr *AllocatorTraits = nullptr; - /// Locations of '(' and ')' symbols. - SourceLocation LParenLoc, RParenLoc; - }; - /// Called on well-formed 'uses_allocators' clause. - OMPClause *ActOnOpenMPUsesAllocatorClause(SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc, - ArrayRef Data); - /// Called on well-formed 'affinity' clause. - OMPClause *ActOnOpenMPAffinityClause(SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation ColonLoc, - SourceLocation EndLoc, Expr *Modifier, - ArrayRef Locators); - /// Called on a well-formed 'bind' clause. - OMPClause *ActOnOpenMPBindClause(OpenMPBindClauseKind Kind, - SourceLocation KindLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - - /// Called on a well-formed 'ompx_dyn_cgroup_mem' clause. - OMPClause *ActOnOpenMPXDynCGroupMemClause(Expr *Size, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - - /// Called on well-formed 'doacross' clause. - OMPClause * - ActOnOpenMPDoacrossClause(OpenMPDoacrossClauseModifier DepType, - SourceLocation DepLoc, SourceLocation ColonLoc, - ArrayRef VarList, SourceLocation StartLoc, - SourceLocation LParenLoc, SourceLocation EndLoc); - - /// Called on a well-formed 'ompx_attribute' clause. - OMPClause *ActOnOpenMPXAttributeClause(ArrayRef Attrs, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc); - - /// Called on a well-formed 'ompx_bare' clause. - OMPClause *ActOnOpenMPXBareClause(SourceLocation StartLoc, - SourceLocation EndLoc); - -private: - void *VarDataSharingAttributesStack; - - /// Number of nested '#pragma omp declare target' directives. - SmallVector DeclareTargetNesting; - - /// Initialization of data-sharing attributes stack. - void InitDataSharingAttributesStack(); - void DestroyDataSharingAttributesStack(); - - /// Returns OpenMP nesting level for current directive. - unsigned getOpenMPNestingLevel() const; - - /// Adjusts the function scopes index for the target-based regions. - void adjustOpenMPTargetScopeIndex(unsigned &FunctionScopesIndex, - unsigned Level) const; - - /// Returns the number of scopes associated with the construct on the given - /// OpenMP level. - int getNumberOfConstructScopes(unsigned Level) const; - - /// Push new OpenMP function region for non-capturing function. - void pushOpenMPFunctionRegion(); - - /// Pop OpenMP function region for non-capturing function. - void popOpenMPFunctionRegion(const sema::FunctionScopeInfo *OldFSI); - - /// Analyzes and checks a loop nest for use by a loop transformation. - /// - /// \param Kind The loop transformation directive kind. - /// \param NumLoops How many nested loops the directive is expecting. - /// \param AStmt Associated statement of the transformation directive. - /// \param LoopHelpers [out] The loop analysis result. - /// \param Body [out] The body code nested in \p NumLoops loop. - /// \param OriginalInits [out] Collection of statements and declarations that - /// must have been executed/declared before entering the - /// loop. - /// - /// \return Whether there was any error. - bool checkTransformableLoopNest( - OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, - SmallVectorImpl &LoopHelpers, - Stmt *&Body, - SmallVectorImpl, 0>> - &OriginalInits); - - /// Helper to keep information about the current `omp begin/end declare - /// variant` nesting. - struct OMPDeclareVariantScope { - /// The associated OpenMP context selector. - OMPTraitInfo *TI; - - /// The associated OpenMP context selector mangling. - std::string NameSuffix; - - OMPDeclareVariantScope(OMPTraitInfo &TI); - }; - - /// Return the OMPTraitInfo for the surrounding scope, if any. - OMPTraitInfo *getOMPTraitInfoForSurroundingScope() { - return OMPDeclareVariantScopes.empty() ? nullptr - : OMPDeclareVariantScopes.back().TI; - } - - /// The current `omp begin/end declare variant` scopes. - SmallVector OMPDeclareVariantScopes; - - /// The current `omp begin/end assumes` scopes. - SmallVector OMPAssumeScoped; - - /// All `omp assumes` we encountered so far. - SmallVector OMPAssumeGlobal; - - /// OMPD_loop is mapped to OMPD_for, OMPD_distribute or OMPD_simd depending - /// on the parameter of the bind clause. In the methods for the - /// mapped directives, check the parameters of the lastprivate clause. - bool checkLastPrivateForMappedDirectives(ArrayRef Clauses); - /// Depending on the bind clause of OMPD_loop map the directive to new - /// directives. - /// 1) loop bind(parallel) --> OMPD_for - /// 2) loop bind(teams) --> OMPD_distribute - /// 3) loop bind(thread) --> OMPD_simd - /// This is being handled in Sema instead of Codegen because of the need for - /// rigorous semantic checking in the new mapped directives. - bool mapLoopConstruct(llvm::SmallVector &ClausesWithoutBind, - ArrayRef Clauses, - OpenMPBindClauseKind &BindKind, - OpenMPDirectiveKind &Kind, - OpenMPDirectiveKind &PrevMappedDirective, - SourceLocation StartLoc, SourceLocation EndLoc, - const DeclarationNameInfo &DirName, - OpenMPDirectiveKind CancelRegion); - - ///@} }; DeductionFailureInfo diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h index c1fe0f5b9c0f6b..329dc3945fa2a6 100644 --- a/clang/include/clang/Sema/SemaOpenACC.h +++ b/clang/include/clang/Sema/SemaOpenACC.h @@ -44,7 +44,8 @@ class SemaOpenACC : public SemaBase { Expr *ConditionExpr; }; - std::variant Details; + std::variant Details = + std::monostate{}; public: OpenACCParsedClause(OpenACCDirectiveKind DirKind, @@ -72,8 +73,17 @@ class SemaOpenACC : public SemaBase { } Expr *getConditionExpr() { - assert(ClauseKind == OpenACCClauseKind::If && + assert((ClauseKind == OpenACCClauseKind::If || + (ClauseKind == OpenACCClauseKind::Self && + DirKind != OpenACCDirectiveKind::Update)) && "Parsed clause kind does not have a condition expr"); + + // 'self' has an optional ConditionExpr, so be tolerant of that. This will + // assert in variant otherwise. + if (ClauseKind == OpenACCClauseKind::Self && + std::holds_alternative(Details)) + return nullptr; + return std::get(Details).ConditionExpr; } @@ -87,7 +97,9 @@ class SemaOpenACC : public SemaBase { } void setConditionDetails(Expr *ConditionExpr) { - assert(ClauseKind == OpenACCClauseKind::If && + assert((ClauseKind == OpenACCClauseKind::If || + (ClauseKind == OpenACCClauseKind::Self && + DirKind != OpenACCDirectiveKind::Update)) && "Parsed clause kind does not have a condition expr"); // In C++ we can count on this being a 'bool', but in C this gets left as // some sort of scalar that codegen will have to take care of converting. diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h new file mode 100644 index 00000000000000..9927459bbc5941 --- /dev/null +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -0,0 +1,1447 @@ +//===----- SemaOpenMP.h -- Semantic Analysis for OpenMP constructs -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares semantic analysis for OpenMP constructs and +/// clauses. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SEMA_SEMAOPENMP_H +#define LLVM_CLANG_SEMA_SEMAOPENMP_H + +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/DeclOpenMP.h" +#include "clang/AST/DeclarationName.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprOpenMP.h" +#include "clang/AST/OpenMPClause.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtOpenMP.h" +#include "clang/AST/Type.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/OpenMPKinds.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/Specifiers.h" +#include "clang/Sema/DeclSpec.h" +#include "clang/Sema/Ownership.h" +#include "clang/Sema/Scope.h" +#include "clang/Sema/ScopeInfo.h" +#include "clang/Sema/SemaBase.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerUnion.h" +#include +#include +#include + +namespace clang { + +class SemaOpenMP : public SemaBase { +public: + SemaOpenMP(Sema &S); + + friend class Parser; + friend class Sema; + + using DeclGroupPtrTy = OpaquePtr; + using CapturedParamNameType = std::pair; + + /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current + /// context is "used as device code". + /// + /// - If CurContext is a `declare target` function or it is known that the + /// function is emitted for the device, emits the diagnostics immediately. + /// - If CurContext is a non-`declare target` function and we are compiling + /// for the device, creates a diagnostic which is emitted if and when we + /// realize that the function will be codegen'ed. + /// + /// Example usage: + /// + /// // Variable-length arrays are not allowed in NVPTX device code. + /// if (diagIfOpenMPDeviceCode(Loc, diag::err_vla_unsupported)) + /// return ExprError(); + /// // Otherwise, continue parsing as normal. + SemaDiagnosticBuilder diagIfOpenMPDeviceCode(SourceLocation Loc, + unsigned DiagID, + const FunctionDecl *FD); + + /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current + /// context is "used as host code". + /// + /// - If CurContext is a `declare target` function or it is known that the + /// function is emitted for the host, emits the diagnostics immediately. + /// - If CurContext is a non-host function, just ignore it. + /// + /// Example usage: + /// + /// // Variable-length arrays are not allowed in NVPTX device code. + /// if (diagIfOpenMPHostode(Loc, diag::err_vla_unsupported)) + /// return ExprError(); + /// // Otherwise, continue parsing as normal. + SemaDiagnosticBuilder diagIfOpenMPHostCode(SourceLocation Loc, + unsigned DiagID, + const FunctionDecl *FD); + + /// The declarator \p D defines a function in the scope \p S which is nested + /// in an `omp begin/end declare variant` scope. In this method we create a + /// declaration for \p D and rename \p D according to the OpenMP context + /// selector of the surrounding scope. Return all base functions in \p Bases. + void ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( + Scope *S, Declarator &D, MultiTemplateParamsArg TemplateParameterLists, + SmallVectorImpl &Bases); + + /// Register \p D as specialization of all base functions in \p Bases in the + /// current `omp begin/end declare variant` scope. + void ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope( + Decl *D, SmallVectorImpl &Bases); + + /// Act on \p D, a function definition inside of an `omp [begin/end] assumes`. + void ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(Decl *D); + + /// Can we exit an OpenMP declare variant scope at the moment. + bool isInOpenMPDeclareVariantScope() const { + return !OMPDeclareVariantScopes.empty(); + } + + ExprResult + VerifyPositiveIntegerConstantInClause(Expr *Op, OpenMPClauseKind CKind, + bool StrictlyPositive = true, + bool SuppressExprDiags = false); + + /// Given the potential call expression \p Call, determine if there is a + /// specialization via the OpenMP declare variant mechanism available. If + /// there is, return the specialized call expression, otherwise return the + /// original \p Call. + ExprResult ActOnOpenMPCall(ExprResult Call, Scope *Scope, + SourceLocation LParenLoc, MultiExprArg ArgExprs, + SourceLocation RParenLoc, Expr *ExecConfig); + + /// Handle a `omp begin declare variant`. + void ActOnOpenMPBeginDeclareVariant(SourceLocation Loc, OMPTraitInfo &TI); + + /// Handle a `omp end declare variant`. + void ActOnOpenMPEndDeclareVariant(); + + /// Function tries to capture lambda's captured variables in the OpenMP region + /// before the original lambda is captured. + void tryCaptureOpenMPLambdas(ValueDecl *V); + + /// Return true if the provided declaration \a VD should be captured by + /// reference. + /// \param Level Relative level of nested OpenMP construct for that the check + /// is performed. + /// \param OpenMPCaptureLevel Capture level within an OpenMP construct. + bool isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, + unsigned OpenMPCaptureLevel) const; + + /// Check if the specified variable is used in one of the private + /// clauses (private, firstprivate, lastprivate, reduction etc.) in OpenMP + /// constructs. + VarDecl *isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo = false, + unsigned StopAt = 0); + + /// The member expression(this->fd) needs to be rebuilt in the template + /// instantiation to generate private copy for OpenMP when default + /// clause is used. The function will return true if default + /// cluse is used. + bool isOpenMPRebuildMemberExpr(ValueDecl *D); + + ExprResult getOpenMPCapturedExpr(VarDecl *Capture, ExprValueKind VK, + ExprObjectKind OK, SourceLocation Loc); + + /// If the current region is a loop-based region, mark the start of the loop + /// construct. + void startOpenMPLoop(); + + /// If the current region is a range loop-based region, mark the start of the + /// loop construct. + void startOpenMPCXXRangeFor(); + + /// Check if the specified variable is used in 'private' clause. + /// \param Level Relative level of nested OpenMP construct for that the check + /// is performed. + OpenMPClauseKind isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, + unsigned CapLevel) const; + + /// Sets OpenMP capture kind (OMPC_private, OMPC_firstprivate, OMPC_map etc.) + /// for \p FD based on DSA for the provided corresponding captured declaration + /// \p D. + void setOpenMPCaptureKind(FieldDecl *FD, const ValueDecl *D, unsigned Level); + + /// Check if the specified variable is captured by 'target' directive. + /// \param Level Relative level of nested OpenMP construct for that the check + /// is performed. + bool isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level, + unsigned CaptureLevel) const; + + /// Check if the specified global variable must be captured by outer capture + /// regions. + /// \param Level Relative level of nested OpenMP construct for that + /// the check is performed. + bool isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level, + unsigned CaptureLevel) const; + + ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc, + Expr *Op); + /// Called on start of new data sharing attribute block. + void StartOpenMPDSABlock(OpenMPDirectiveKind K, + const DeclarationNameInfo &DirName, Scope *CurScope, + SourceLocation Loc); + /// Start analysis of clauses. + void StartOpenMPClause(OpenMPClauseKind K); + /// End analysis of clauses. + void EndOpenMPClause(); + /// Called on end of data sharing attribute block. + void EndOpenMPDSABlock(Stmt *CurDirective); + + /// Check if the current region is an OpenMP loop region and if it is, + /// mark loop control variable, used in \p Init for loop initialization, as + /// private by default. + /// \param Init First part of the for loop. + void ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init); + + /// Called on well-formed '\#pragma omp metadirective' after parsing + /// of the associated statement. + StmtResult ActOnOpenMPMetaDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + + // OpenMP directives and clauses. + /// Called on correct id-expression from the '#pragma omp + /// threadprivate'. + ExprResult ActOnOpenMPIdExpression(Scope *CurScope, CXXScopeSpec &ScopeSpec, + const DeclarationNameInfo &Id, + OpenMPDirectiveKind Kind); + /// Called on well-formed '#pragma omp threadprivate'. + DeclGroupPtrTy ActOnOpenMPThreadprivateDirective(SourceLocation Loc, + ArrayRef VarList); + /// Builds a new OpenMPThreadPrivateDecl and checks its correctness. + OMPThreadPrivateDecl *CheckOMPThreadPrivateDecl(SourceLocation Loc, + ArrayRef VarList); + /// Called on well-formed '#pragma omp allocate'. + DeclGroupPtrTy ActOnOpenMPAllocateDirective(SourceLocation Loc, + ArrayRef VarList, + ArrayRef Clauses, + DeclContext *Owner = nullptr); + + /// Called on well-formed '#pragma omp [begin] assume[s]'. + void ActOnOpenMPAssumesDirective(SourceLocation Loc, + OpenMPDirectiveKind DKind, + ArrayRef Assumptions, + bool SkippedClauses); + + /// Check if there is an active global `omp begin assumes` directive. + bool isInOpenMPAssumeScope() const { return !OMPAssumeScoped.empty(); } + + /// Check if there is an active global `omp assumes` directive. + bool hasGlobalOpenMPAssumes() const { return !OMPAssumeGlobal.empty(); } + + /// Called on well-formed '#pragma omp end assumes'. + void ActOnOpenMPEndAssumesDirective(); + + /// Called on well-formed '#pragma omp requires'. + DeclGroupPtrTy ActOnOpenMPRequiresDirective(SourceLocation Loc, + ArrayRef ClauseList); + /// Check restrictions on Requires directive + OMPRequiresDecl *CheckOMPRequiresDecl(SourceLocation Loc, + ArrayRef Clauses); + /// Check if the specified type is allowed to be used in 'omp declare + /// reduction' construct. + QualType ActOnOpenMPDeclareReductionType(SourceLocation TyLoc, + TypeResult ParsedType); + /// Called on start of '#pragma omp declare reduction'. + DeclGroupPtrTy ActOnOpenMPDeclareReductionDirectiveStart( + Scope *S, DeclContext *DC, DeclarationName Name, + ArrayRef> ReductionTypes, + AccessSpecifier AS, Decl *PrevDeclInScope = nullptr); + /// Initialize declare reduction construct initializer. + void ActOnOpenMPDeclareReductionCombinerStart(Scope *S, Decl *D); + /// Finish current declare reduction construct initializer. + void ActOnOpenMPDeclareReductionCombinerEnd(Decl *D, Expr *Combiner); + /// Initialize declare reduction construct initializer. + /// \return omp_priv variable. + VarDecl *ActOnOpenMPDeclareReductionInitializerStart(Scope *S, Decl *D); + /// Finish current declare reduction construct initializer. + void ActOnOpenMPDeclareReductionInitializerEnd(Decl *D, Expr *Initializer, + VarDecl *OmpPrivParm); + /// Called at the end of '#pragma omp declare reduction'. + DeclGroupPtrTy ActOnOpenMPDeclareReductionDirectiveEnd( + Scope *S, DeclGroupPtrTy DeclReductions, bool IsValid); + + /// Check variable declaration in 'omp declare mapper' construct. + TypeResult ActOnOpenMPDeclareMapperVarDecl(Scope *S, Declarator &D); + /// Check if the specified type is allowed to be used in 'omp declare + /// mapper' construct. + QualType ActOnOpenMPDeclareMapperType(SourceLocation TyLoc, + TypeResult ParsedType); + /// Called on start of '#pragma omp declare mapper'. + DeclGroupPtrTy ActOnOpenMPDeclareMapperDirective( + Scope *S, DeclContext *DC, DeclarationName Name, QualType MapperType, + SourceLocation StartLoc, DeclarationName VN, AccessSpecifier AS, + Expr *MapperVarRef, ArrayRef Clauses, + Decl *PrevDeclInScope = nullptr); + /// Build the mapper variable of '#pragma omp declare mapper'. + ExprResult ActOnOpenMPDeclareMapperDirectiveVarDecl(Scope *S, + QualType MapperType, + SourceLocation StartLoc, + DeclarationName VN); + void ActOnOpenMPIteratorVarDecl(VarDecl *VD); + bool isOpenMPDeclareMapperVarDeclAllowed(const VarDecl *VD) const; + const ValueDecl *getOpenMPDeclareMapperVarName() const; + + struct DeclareTargetContextInfo { + struct MapInfo { + OMPDeclareTargetDeclAttr::MapTypeTy MT; + SourceLocation Loc; + }; + /// Explicitly listed variables and functions in a 'to' or 'link' clause. + llvm::DenseMap ExplicitlyMapped; + + /// The 'device_type' as parsed from the clause. + OMPDeclareTargetDeclAttr::DevTypeTy DT = OMPDeclareTargetDeclAttr::DT_Any; + + /// The directive kind, `begin declare target` or `declare target`. + OpenMPDirectiveKind Kind; + + /// The directive with indirect clause. + std::optional Indirect; + + /// The directive location. + SourceLocation Loc; + + DeclareTargetContextInfo(OpenMPDirectiveKind Kind, SourceLocation Loc) + : Kind(Kind), Loc(Loc) {} + }; + + /// Called on the start of target region i.e. '#pragma omp declare target'. + bool ActOnStartOpenMPDeclareTargetContext(DeclareTargetContextInfo &DTCI); + + /// Called at the end of target region i.e. '#pragma omp end declare target'. + const DeclareTargetContextInfo ActOnOpenMPEndDeclareTargetDirective(); + + /// Called once a target context is completed, that can be when a + /// '#pragma omp end declare target' was encountered or when a + /// '#pragma omp declare target' without declaration-definition-seq was + /// encountered. + void ActOnFinishedOpenMPDeclareTargetContext(DeclareTargetContextInfo &DTCI); + + /// Report unterminated 'omp declare target' or 'omp begin declare target' at + /// the end of a compilation unit. + void DiagnoseUnterminatedOpenMPDeclareTarget(); + + /// Searches for the provided declaration name for OpenMP declare target + /// directive. + NamedDecl *lookupOpenMPDeclareTargetName(Scope *CurScope, + CXXScopeSpec &ScopeSpec, + const DeclarationNameInfo &Id); + + /// Called on correct id-expression from the '#pragma omp declare target'. + void ActOnOpenMPDeclareTargetName(NamedDecl *ND, SourceLocation Loc, + OMPDeclareTargetDeclAttr::MapTypeTy MT, + DeclareTargetContextInfo &DTCI); + + /// Check declaration inside target region. + void + checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D, + SourceLocation IdLoc = SourceLocation()); + + /// Adds OMPDeclareTargetDeclAttr to referenced variables in declare target + /// directive. + void ActOnOpenMPDeclareTargetInitializer(Decl *D); + + /// Finishes analysis of the deferred functions calls that may be declared as + /// host/nohost during device/host compilation. + void finalizeOpenMPDelayedAnalysis(const FunctionDecl *Caller, + const FunctionDecl *Callee, + SourceLocation Loc); + + /// Return true if currently in OpenMP task with untied clause context. + bool isInOpenMPTaskUntiedContext() const; + + /// Return true inside OpenMP declare target region. + bool isInOpenMPDeclareTargetContext() const { + return !DeclareTargetNesting.empty(); + } + /// Return true inside OpenMP target region. + bool isInOpenMPTargetExecutionDirective() const; + + /// Return the number of captured regions created for an OpenMP directive. + static int getOpenMPCaptureLevels(OpenMPDirectiveKind Kind); + + /// Initialization of captured region for OpenMP region. + void ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope); + + /// Called for syntactical loops (ForStmt or CXXForRangeStmt) associated to + /// an OpenMP loop directive. + StmtResult ActOnOpenMPCanonicalLoop(Stmt *AStmt); + + /// Process a canonical OpenMP loop nest that can either be a canonical + /// literal loop (ForStmt or CXXForRangeStmt), or the generated loop of an + /// OpenMP loop transformation construct. + StmtResult ActOnOpenMPLoopnest(Stmt *AStmt); + + /// End of OpenMP region. + /// + /// \param S Statement associated with the current OpenMP region. + /// \param Clauses List of clauses for the current OpenMP region. + /// + /// \returns Statement for finished OpenMP region. + StmtResult ActOnOpenMPRegionEnd(StmtResult S, ArrayRef Clauses); + StmtResult ActOnOpenMPExecutableDirective( + OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName, + OpenMPDirectiveKind CancelRegion, ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, + OpenMPDirectiveKind PrevMappedDirective = llvm::omp::OMPD_unknown); + /// Called on well-formed '\#pragma omp parallel' after parsing + /// of the associated statement. + StmtResult ActOnOpenMPParallelDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + using VarsWithInheritedDSAType = + llvm::SmallDenseMap; + /// Called on well-formed '\#pragma omp simd' after parsing + /// of the associated statement. + StmtResult + ActOnOpenMPSimdDirective(ArrayRef Clauses, Stmt *AStmt, + SourceLocation StartLoc, SourceLocation EndLoc, + VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '#pragma omp tile' after parsing of its clauses and + /// the associated statement. + StmtResult ActOnOpenMPTileDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '#pragma omp unroll' after parsing of its clauses + /// and the associated statement. + StmtResult ActOnOpenMPUnrollDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp for' after parsing + /// of the associated statement. + StmtResult + ActOnOpenMPForDirective(ArrayRef Clauses, Stmt *AStmt, + SourceLocation StartLoc, SourceLocation EndLoc, + VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp for simd' after parsing + /// of the associated statement. + StmtResult + ActOnOpenMPForSimdDirective(ArrayRef Clauses, Stmt *AStmt, + SourceLocation StartLoc, SourceLocation EndLoc, + VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp sections' after parsing + /// of the associated statement. + StmtResult ActOnOpenMPSectionsDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp section' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPSectionDirective(Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp scope' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPScopeDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp single' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPSingleDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp master' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPMasterDirective(Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp critical' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPCriticalDirective(const DeclarationNameInfo &DirName, + ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp parallel for' after parsing + /// of the associated statement. + StmtResult ActOnOpenMPParallelForDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp parallel for simd' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPParallelForSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp parallel master' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPParallelMasterDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp parallel masked' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPParallelMaskedDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp parallel sections' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPParallelSectionsDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp task' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPTaskDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp taskyield'. + StmtResult ActOnOpenMPTaskyieldDirective(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp error'. + /// Error direcitive is allowed in both declared and excutable contexts. + /// Adding InExContext to identify which context is called from. + StmtResult ActOnOpenMPErrorDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc, + bool InExContext = true); + /// Called on well-formed '\#pragma omp barrier'. + StmtResult ActOnOpenMPBarrierDirective(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp taskwait'. + StmtResult ActOnOpenMPTaskwaitDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp taskgroup'. + StmtResult ActOnOpenMPTaskgroupDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp flush'. + StmtResult ActOnOpenMPFlushDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp depobj'. + StmtResult ActOnOpenMPDepobjDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp scan'. + StmtResult ActOnOpenMPScanDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp ordered' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPOrderedDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp atomic' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPAtomicDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp target' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPTargetDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp target data' after parsing of + /// the associated statement. + StmtResult ActOnOpenMPTargetDataDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp target enter data' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPTargetEnterDataDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc, + Stmt *AStmt); + /// Called on well-formed '\#pragma omp target exit data' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPTargetExitDataDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc, + Stmt *AStmt); + /// Called on well-formed '\#pragma omp target parallel' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPTargetParallelDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp target parallel for' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPTargetParallelForDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp teams' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPTeamsDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp teams loop' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPTeamsGenericLoopDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp target teams loop' after parsing of + /// the associated statement. + StmtResult ActOnOpenMPTargetTeamsGenericLoopDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp parallel loop' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPParallelGenericLoopDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp target parallel loop' after parsing + /// of the associated statement. + StmtResult ActOnOpenMPTargetParallelGenericLoopDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp cancellation point'. + StmtResult + ActOnOpenMPCancellationPointDirective(SourceLocation StartLoc, + SourceLocation EndLoc, + OpenMPDirectiveKind CancelRegion); + /// Called on well-formed '\#pragma omp cancel'. + StmtResult ActOnOpenMPCancelDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc, + OpenMPDirectiveKind CancelRegion); + /// Called on well-formed '\#pragma omp taskloop' after parsing of the + /// associated statement. + StmtResult + ActOnOpenMPTaskLoopDirective(ArrayRef Clauses, Stmt *AStmt, + SourceLocation StartLoc, SourceLocation EndLoc, + VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp taskloop simd' after parsing of + /// the associated statement. + StmtResult ActOnOpenMPTaskLoopSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp master taskloop' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPMasterTaskLoopDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp master taskloop simd' after parsing of + /// the associated statement. + StmtResult ActOnOpenMPMasterTaskLoopSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp parallel master taskloop' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPParallelMasterTaskLoopDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp parallel master taskloop simd' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPParallelMasterTaskLoopSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp masked taskloop' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPMaskedTaskLoopDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp masked taskloop simd' after parsing of + /// the associated statement. + StmtResult ActOnOpenMPMaskedTaskLoopSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp parallel masked taskloop' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPParallelMaskedTaskLoopDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp parallel masked taskloop simd' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPParallelMaskedTaskLoopSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp distribute' after parsing + /// of the associated statement. + StmtResult + ActOnOpenMPDistributeDirective(ArrayRef Clauses, Stmt *AStmt, + SourceLocation StartLoc, SourceLocation EndLoc, + VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp target update'. + StmtResult ActOnOpenMPTargetUpdateDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc, + Stmt *AStmt); + /// Called on well-formed '\#pragma omp distribute parallel for' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPDistributeParallelForDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp distribute parallel for simd' + /// after parsing of the associated statement. + StmtResult ActOnOpenMPDistributeParallelForSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp distribute simd' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPDistributeSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp target parallel for simd' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPTargetParallelForSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp target simd' after parsing of + /// the associated statement. + StmtResult + ActOnOpenMPTargetSimdDirective(ArrayRef Clauses, Stmt *AStmt, + SourceLocation StartLoc, SourceLocation EndLoc, + VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp teams distribute' after parsing of + /// the associated statement. + StmtResult ActOnOpenMPTeamsDistributeDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp teams distribute simd' after parsing + /// of the associated statement. + StmtResult ActOnOpenMPTeamsDistributeSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp teams distribute parallel for simd' + /// after parsing of the associated statement. + StmtResult ActOnOpenMPTeamsDistributeParallelForSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp teams distribute parallel for' + /// after parsing of the associated statement. + StmtResult ActOnOpenMPTeamsDistributeParallelForDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp target teams' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPTargetTeamsDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp target teams distribute' after parsing + /// of the associated statement. + StmtResult ActOnOpenMPTargetTeamsDistributeDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp target teams distribute parallel for' + /// after parsing of the associated statement. + StmtResult ActOnOpenMPTargetTeamsDistributeParallelForDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp target teams distribute parallel for + /// simd' after parsing of the associated statement. + StmtResult ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp target teams distribute simd' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPTargetTeamsDistributeSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp interop'. + StmtResult ActOnOpenMPInteropDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp dispatch' after parsing of the + // /associated statement. + StmtResult ActOnOpenMPDispatchDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp masked' after parsing of the + // /associated statement. + StmtResult ActOnOpenMPMaskedDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + + /// Called on well-formed '\#pragma omp loop' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPGenericLoopDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + + /// Checks correctness of linear modifiers. + bool CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind, + SourceLocation LinLoc); + /// Checks that the specified declaration matches requirements for the linear + /// decls. + bool CheckOpenMPLinearDecl(const ValueDecl *D, SourceLocation ELoc, + OpenMPLinearClauseKind LinKind, QualType Type, + bool IsDeclareSimd = false); + + /// Called on well-formed '\#pragma omp declare simd' after parsing of + /// the associated method/function. + DeclGroupPtrTy ActOnOpenMPDeclareSimdDirective( + DeclGroupPtrTy DG, OMPDeclareSimdDeclAttr::BranchStateTy BS, + Expr *Simdlen, ArrayRef Uniforms, ArrayRef Aligneds, + ArrayRef Alignments, ArrayRef Linears, + ArrayRef LinModifiers, ArrayRef Steps, SourceRange SR); + + /// Checks '\#pragma omp declare variant' variant function and original + /// functions after parsing of the associated method/function. + /// \param DG Function declaration to which declare variant directive is + /// applied to. + /// \param VariantRef Expression that references the variant function, which + /// must be used instead of the original one, specified in \p DG. + /// \param TI The trait info object representing the match clause. + /// \param NumAppendArgs The number of omp_interop_t arguments to account for + /// in checking. + /// \returns std::nullopt, if the function/variant function are not compatible + /// with the pragma, pair of original function/variant ref expression + /// otherwise. + std::optional> + checkOpenMPDeclareVariantFunction(DeclGroupPtrTy DG, Expr *VariantRef, + OMPTraitInfo &TI, unsigned NumAppendArgs, + SourceRange SR); + + /// Called on well-formed '\#pragma omp declare variant' after parsing of + /// the associated method/function. + /// \param FD Function declaration to which declare variant directive is + /// applied to. + /// \param VariantRef Expression that references the variant function, which + /// must be used instead of the original one, specified in \p DG. + /// \param TI The context traits associated with the function variant. + /// \param AdjustArgsNothing The list of 'nothing' arguments. + /// \param AdjustArgsNeedDevicePtr The list of 'need_device_ptr' arguments. + /// \param AppendArgs The list of 'append_args' arguments. + /// \param AdjustArgsLoc The Location of an 'adjust_args' clause. + /// \param AppendArgsLoc The Location of an 'append_args' clause. + /// \param SR The SourceRange of the 'declare variant' directive. + void ActOnOpenMPDeclareVariantDirective( + FunctionDecl *FD, Expr *VariantRef, OMPTraitInfo &TI, + ArrayRef AdjustArgsNothing, + ArrayRef AdjustArgsNeedDevicePtr, + ArrayRef AppendArgs, SourceLocation AdjustArgsLoc, + SourceLocation AppendArgsLoc, SourceRange SR); + + OMPClause *ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'allocator' clause. + OMPClause *ActOnOpenMPAllocatorClause(Expr *Allocator, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'if' clause. + OMPClause *ActOnOpenMPIfClause(OpenMPDirectiveKind NameModifier, + Expr *Condition, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation NameModifierLoc, + SourceLocation ColonLoc, + SourceLocation EndLoc); + /// Called on well-formed 'final' clause. + OMPClause *ActOnOpenMPFinalClause(Expr *Condition, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'num_threads' clause. + OMPClause *ActOnOpenMPNumThreadsClause(Expr *NumThreads, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'align' clause. + OMPClause *ActOnOpenMPAlignClause(Expr *Alignment, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'safelen' clause. + OMPClause *ActOnOpenMPSafelenClause(Expr *Length, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'simdlen' clause. + OMPClause *ActOnOpenMPSimdlenClause(Expr *Length, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-form 'sizes' clause. + OMPClause *ActOnOpenMPSizesClause(ArrayRef SizeExprs, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-form 'full' clauses. + OMPClause *ActOnOpenMPFullClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-form 'partial' clauses. + OMPClause *ActOnOpenMPPartialClause(Expr *FactorExpr, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'collapse' clause. + OMPClause *ActOnOpenMPCollapseClause(Expr *NumForLoops, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'ordered' clause. + OMPClause * + ActOnOpenMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc, + SourceLocation LParenLoc = SourceLocation(), + Expr *NumForLoops = nullptr); + /// Called on well-formed 'grainsize' clause. + OMPClause *ActOnOpenMPGrainsizeClause(OpenMPGrainsizeClauseModifier Modifier, + Expr *Size, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation ModifierLoc, + SourceLocation EndLoc); + /// Called on well-formed 'num_tasks' clause. + OMPClause *ActOnOpenMPNumTasksClause(OpenMPNumTasksClauseModifier Modifier, + Expr *NumTasks, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation ModifierLoc, + SourceLocation EndLoc); + /// Called on well-formed 'hint' clause. + OMPClause *ActOnOpenMPHintClause(Expr *Hint, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'detach' clause. + OMPClause *ActOnOpenMPDetachClause(Expr *Evt, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + + OMPClause *ActOnOpenMPSimpleClause(OpenMPClauseKind Kind, unsigned Argument, + SourceLocation ArgumentLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'when' clause. + OMPClause *ActOnOpenMPWhenClause(OMPTraitInfo &TI, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'default' clause. + OMPClause *ActOnOpenMPDefaultClause(llvm::omp::DefaultKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'proc_bind' clause. + OMPClause *ActOnOpenMPProcBindClause(llvm::omp::ProcBindKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'order' clause. + OMPClause *ActOnOpenMPOrderClause(OpenMPOrderClauseModifier Modifier, + OpenMPOrderClauseKind Kind, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation MLoc, SourceLocation KindLoc, + SourceLocation EndLoc); + /// Called on well-formed 'update' clause. + OMPClause *ActOnOpenMPUpdateClause(OpenMPDependClauseKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + + OMPClause *ActOnOpenMPSingleExprWithArgClause( + OpenMPClauseKind Kind, ArrayRef Arguments, Expr *Expr, + SourceLocation StartLoc, SourceLocation LParenLoc, + ArrayRef ArgumentsLoc, SourceLocation DelimLoc, + SourceLocation EndLoc); + /// Called on well-formed 'schedule' clause. + OMPClause *ActOnOpenMPScheduleClause( + OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, + OpenMPScheduleClauseKind Kind, Expr *ChunkSize, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation M1Loc, SourceLocation M2Loc, + SourceLocation KindLoc, SourceLocation CommaLoc, SourceLocation EndLoc); + + OMPClause *ActOnOpenMPClause(OpenMPClauseKind Kind, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'nowait' clause. + OMPClause *ActOnOpenMPNowaitClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'untied' clause. + OMPClause *ActOnOpenMPUntiedClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'mergeable' clause. + OMPClause *ActOnOpenMPMergeableClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'read' clause. + OMPClause *ActOnOpenMPReadClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'write' clause. + OMPClause *ActOnOpenMPWriteClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'update' clause. + OMPClause *ActOnOpenMPUpdateClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'capture' clause. + OMPClause *ActOnOpenMPCaptureClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'compare' clause. + OMPClause *ActOnOpenMPCompareClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'fail' clause. + OMPClause *ActOnOpenMPFailClause(SourceLocation StartLoc, + SourceLocation EndLoc); + OMPClause *ActOnOpenMPFailClause(OpenMPClauseKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + + /// Called on well-formed 'seq_cst' clause. + OMPClause *ActOnOpenMPSeqCstClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'acq_rel' clause. + OMPClause *ActOnOpenMPAcqRelClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'acquire' clause. + OMPClause *ActOnOpenMPAcquireClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'release' clause. + OMPClause *ActOnOpenMPReleaseClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'relaxed' clause. + OMPClause *ActOnOpenMPRelaxedClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'weak' clause. + OMPClause *ActOnOpenMPWeakClause(SourceLocation StartLoc, + SourceLocation EndLoc); + + /// Called on well-formed 'init' clause. + OMPClause * + ActOnOpenMPInitClause(Expr *InteropVar, OMPInteropInfo &InteropInfo, + SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation VarLoc, SourceLocation EndLoc); + + /// Called on well-formed 'use' clause. + OMPClause *ActOnOpenMPUseClause(Expr *InteropVar, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation VarLoc, SourceLocation EndLoc); + + /// Called on well-formed 'destroy' clause. + OMPClause *ActOnOpenMPDestroyClause(Expr *InteropVar, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation VarLoc, + SourceLocation EndLoc); + /// Called on well-formed 'novariants' clause. + OMPClause *ActOnOpenMPNovariantsClause(Expr *Condition, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'nocontext' clause. + OMPClause *ActOnOpenMPNocontextClause(Expr *Condition, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'filter' clause. + OMPClause *ActOnOpenMPFilterClause(Expr *ThreadID, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'threads' clause. + OMPClause *ActOnOpenMPThreadsClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'simd' clause. + OMPClause *ActOnOpenMPSIMDClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'nogroup' clause. + OMPClause *ActOnOpenMPNogroupClause(SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed 'unified_address' clause. + OMPClause *ActOnOpenMPUnifiedAddressClause(SourceLocation StartLoc, + SourceLocation EndLoc); + + /// Called on well-formed 'unified_address' clause. + OMPClause *ActOnOpenMPUnifiedSharedMemoryClause(SourceLocation StartLoc, + SourceLocation EndLoc); + + /// Called on well-formed 'reverse_offload' clause. + OMPClause *ActOnOpenMPReverseOffloadClause(SourceLocation StartLoc, + SourceLocation EndLoc); + + /// Called on well-formed 'dynamic_allocators' clause. + OMPClause *ActOnOpenMPDynamicAllocatorsClause(SourceLocation StartLoc, + SourceLocation EndLoc); + + /// Called on well-formed 'atomic_default_mem_order' clause. + OMPClause *ActOnOpenMPAtomicDefaultMemOrderClause( + OpenMPAtomicDefaultMemOrderClauseKind Kind, SourceLocation KindLoc, + SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + + /// Called on well-formed 'at' clause. + OMPClause *ActOnOpenMPAtClause(OpenMPAtClauseKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + + /// Called on well-formed 'severity' clause. + OMPClause *ActOnOpenMPSeverityClause(OpenMPSeverityClauseKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + + /// Called on well-formed 'message' clause. + /// passing string for message. + OMPClause *ActOnOpenMPMessageClause(Expr *MS, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + + /// Data used for processing a list of variables in OpenMP clauses. + struct OpenMPVarListDataTy final { + Expr *DepModOrTailExpr = nullptr; + Expr *IteratorExpr = nullptr; + SourceLocation ColonLoc; + SourceLocation RLoc; + CXXScopeSpec ReductionOrMapperIdScopeSpec; + DeclarationNameInfo ReductionOrMapperId; + int ExtraModifier = -1; ///< Additional modifier for linear, map, depend or + ///< lastprivate clause. + SmallVector + MapTypeModifiers; + SmallVector + MapTypeModifiersLoc; + SmallVector + MotionModifiers; + SmallVector MotionModifiersLoc; + bool IsMapTypeImplicit = false; + SourceLocation ExtraModifierLoc; + SourceLocation OmpAllMemoryLoc; + SourceLocation + StepModifierLoc; /// 'step' modifier location for linear clause + }; + + OMPClause *ActOnOpenMPVarListClause(OpenMPClauseKind Kind, + ArrayRef Vars, + const OMPVarListLocTy &Locs, + OpenMPVarListDataTy &Data); + /// Called on well-formed 'inclusive' clause. + OMPClause *ActOnOpenMPInclusiveClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'exclusive' clause. + OMPClause *ActOnOpenMPExclusiveClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'allocate' clause. + OMPClause * + ActOnOpenMPAllocateClause(Expr *Allocator, ArrayRef VarList, + SourceLocation StartLoc, SourceLocation ColonLoc, + SourceLocation LParenLoc, SourceLocation EndLoc); + /// Called on well-formed 'private' clause. + OMPClause *ActOnOpenMPPrivateClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'firstprivate' clause. + OMPClause *ActOnOpenMPFirstprivateClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'lastprivate' clause. + OMPClause *ActOnOpenMPLastprivateClause( + ArrayRef VarList, OpenMPLastprivateModifier LPKind, + SourceLocation LPKindLoc, SourceLocation ColonLoc, + SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + /// Called on well-formed 'shared' clause. + OMPClause *ActOnOpenMPSharedClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'reduction' clause. + OMPClause *ActOnOpenMPReductionClause( + ArrayRef VarList, OpenMPReductionClauseModifier Modifier, + SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation ModifierLoc, SourceLocation ColonLoc, + SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec, + const DeclarationNameInfo &ReductionId, + ArrayRef UnresolvedReductions = std::nullopt); + /// Called on well-formed 'task_reduction' clause. + OMPClause *ActOnOpenMPTaskReductionClause( + ArrayRef VarList, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc, + CXXScopeSpec &ReductionIdScopeSpec, + const DeclarationNameInfo &ReductionId, + ArrayRef UnresolvedReductions = std::nullopt); + /// Called on well-formed 'in_reduction' clause. + OMPClause *ActOnOpenMPInReductionClause( + ArrayRef VarList, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc, + CXXScopeSpec &ReductionIdScopeSpec, + const DeclarationNameInfo &ReductionId, + ArrayRef UnresolvedReductions = std::nullopt); + /// Called on well-formed 'linear' clause. + OMPClause *ActOnOpenMPLinearClause( + ArrayRef VarList, Expr *Step, SourceLocation StartLoc, + SourceLocation LParenLoc, OpenMPLinearClauseKind LinKind, + SourceLocation LinLoc, SourceLocation ColonLoc, + SourceLocation StepModifierLoc, SourceLocation EndLoc); + /// Called on well-formed 'aligned' clause. + OMPClause *ActOnOpenMPAlignedClause(ArrayRef VarList, Expr *Alignment, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation ColonLoc, + SourceLocation EndLoc); + /// Called on well-formed 'copyin' clause. + OMPClause *ActOnOpenMPCopyinClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'copyprivate' clause. + OMPClause *ActOnOpenMPCopyprivateClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'flush' pseudo clause. + OMPClause *ActOnOpenMPFlushClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'depobj' pseudo clause. + OMPClause *ActOnOpenMPDepobjClause(Expr *Depobj, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'depend' clause. + OMPClause *ActOnOpenMPDependClause(const OMPDependClause::DependDataTy &Data, + Expr *DepModifier, + ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'device' clause. + OMPClause *ActOnOpenMPDeviceClause(OpenMPDeviceClauseModifier Modifier, + Expr *Device, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation ModifierLoc, + SourceLocation EndLoc); + /// Called on well-formed 'map' clause. + OMPClause *ActOnOpenMPMapClause( + Expr *IteratorModifier, ArrayRef MapTypeModifiers, + ArrayRef MapTypeModifiersLoc, + CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId, + OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, + SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef VarList, + const OMPVarListLocTy &Locs, bool NoDiagnose = false, + ArrayRef UnresolvedMappers = std::nullopt); + /// Called on well-formed 'num_teams' clause. + OMPClause *ActOnOpenMPNumTeamsClause(Expr *NumTeams, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'thread_limit' clause. + OMPClause *ActOnOpenMPThreadLimitClause(Expr *ThreadLimit, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'priority' clause. + OMPClause *ActOnOpenMPPriorityClause(Expr *Priority, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// Called on well-formed 'dist_schedule' clause. + OMPClause *ActOnOpenMPDistScheduleClause( + OpenMPDistScheduleClauseKind Kind, Expr *ChunkSize, + SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation KindLoc, + SourceLocation CommaLoc, SourceLocation EndLoc); + /// Called on well-formed 'defaultmap' clause. + OMPClause *ActOnOpenMPDefaultmapClause( + OpenMPDefaultmapClauseModifier M, OpenMPDefaultmapClauseKind Kind, + SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation MLoc, + SourceLocation KindLoc, SourceLocation EndLoc); + /// Called on well-formed 'to' clause. + OMPClause * + ActOnOpenMPToClause(ArrayRef MotionModifiers, + ArrayRef MotionModifiersLoc, + CXXScopeSpec &MapperIdScopeSpec, + DeclarationNameInfo &MapperId, SourceLocation ColonLoc, + ArrayRef VarList, const OMPVarListLocTy &Locs, + ArrayRef UnresolvedMappers = std::nullopt); + /// Called on well-formed 'from' clause. + OMPClause * + ActOnOpenMPFromClause(ArrayRef MotionModifiers, + ArrayRef MotionModifiersLoc, + CXXScopeSpec &MapperIdScopeSpec, + DeclarationNameInfo &MapperId, SourceLocation ColonLoc, + ArrayRef VarList, const OMPVarListLocTy &Locs, + ArrayRef UnresolvedMappers = std::nullopt); + /// Called on well-formed 'use_device_ptr' clause. + OMPClause *ActOnOpenMPUseDevicePtrClause(ArrayRef VarList, + const OMPVarListLocTy &Locs); + /// Called on well-formed 'use_device_addr' clause. + OMPClause *ActOnOpenMPUseDeviceAddrClause(ArrayRef VarList, + const OMPVarListLocTy &Locs); + /// Called on well-formed 'is_device_ptr' clause. + OMPClause *ActOnOpenMPIsDevicePtrClause(ArrayRef VarList, + const OMPVarListLocTy &Locs); + /// Called on well-formed 'has_device_addr' clause. + OMPClause *ActOnOpenMPHasDeviceAddrClause(ArrayRef VarList, + const OMPVarListLocTy &Locs); + /// Called on well-formed 'nontemporal' clause. + OMPClause *ActOnOpenMPNontemporalClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + + /// Data for list of allocators. + struct UsesAllocatorsData { + /// Allocator. + Expr *Allocator = nullptr; + /// Allocator traits. + Expr *AllocatorTraits = nullptr; + /// Locations of '(' and ')' symbols. + SourceLocation LParenLoc, RParenLoc; + }; + /// Called on well-formed 'uses_allocators' clause. + OMPClause *ActOnOpenMPUsesAllocatorClause(SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc, + ArrayRef Data); + /// Called on well-formed 'affinity' clause. + OMPClause *ActOnOpenMPAffinityClause(SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation ColonLoc, + SourceLocation EndLoc, Expr *Modifier, + ArrayRef Locators); + /// Called on a well-formed 'bind' clause. + OMPClause *ActOnOpenMPBindClause(OpenMPBindClauseKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + + /// Called on a well-formed 'ompx_dyn_cgroup_mem' clause. + OMPClause *ActOnOpenMPXDynCGroupMemClause(Expr *Size, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + + /// Called on well-formed 'doacross' clause. + OMPClause * + ActOnOpenMPDoacrossClause(OpenMPDoacrossClauseModifier DepType, + SourceLocation DepLoc, SourceLocation ColonLoc, + ArrayRef VarList, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc); + + /// Called on a well-formed 'ompx_attribute' clause. + OMPClause *ActOnOpenMPXAttributeClause(ArrayRef Attrs, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + + /// Called on a well-formed 'ompx_bare' clause. + OMPClause *ActOnOpenMPXBareClause(SourceLocation StartLoc, + SourceLocation EndLoc); + + ExprResult ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc, + Expr *LowerBound, + SourceLocation ColonLocFirst, + SourceLocation ColonLocSecond, + Expr *Length, Expr *Stride, + SourceLocation RBLoc); + ExprResult ActOnOMPArrayShapingExpr(Expr *Base, SourceLocation LParenLoc, + SourceLocation RParenLoc, + ArrayRef Dims, + ArrayRef Brackets); + + /// Data structure for iterator expression. + struct OMPIteratorData { + IdentifierInfo *DeclIdent = nullptr; + SourceLocation DeclIdentLoc; + ParsedType Type; + OMPIteratorExpr::IteratorRange Range; + SourceLocation AssignLoc; + SourceLocation ColonLoc; + SourceLocation SecColonLoc; + }; + + ExprResult ActOnOMPIteratorExpr(Scope *S, SourceLocation IteratorKwLoc, + SourceLocation LLoc, SourceLocation RLoc, + ArrayRef Data); + +private: + void *VarDataSharingAttributesStack; + + /// Number of nested '#pragma omp declare target' directives. + SmallVector DeclareTargetNesting; + + /// Initialization of data-sharing attributes stack. + void InitDataSharingAttributesStack(); + void DestroyDataSharingAttributesStack(); + + /// Returns OpenMP nesting level for current directive. + unsigned getOpenMPNestingLevel() const; + + /// Adjusts the function scopes index for the target-based regions. + void adjustOpenMPTargetScopeIndex(unsigned &FunctionScopesIndex, + unsigned Level) const; + + /// Returns the number of scopes associated with the construct on the given + /// OpenMP level. + int getNumberOfConstructScopes(unsigned Level) const; + + /// Push new OpenMP function region for non-capturing function. + void pushOpenMPFunctionRegion(); + + /// Pop OpenMP function region for non-capturing function. + void popOpenMPFunctionRegion(const sema::FunctionScopeInfo *OldFSI); + + /// Analyzes and checks a loop nest for use by a loop transformation. + /// + /// \param Kind The loop transformation directive kind. + /// \param NumLoops How many nested loops the directive is expecting. + /// \param AStmt Associated statement of the transformation directive. + /// \param LoopHelpers [out] The loop analysis result. + /// \param Body [out] The body code nested in \p NumLoops loop. + /// \param OriginalInits [out] Collection of statements and declarations that + /// must have been executed/declared before entering the + /// loop. + /// + /// \return Whether there was any error. + bool checkTransformableLoopNest( + OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, + SmallVectorImpl &LoopHelpers, + Stmt *&Body, + SmallVectorImpl, 0>> + &OriginalInits); + + /// Helper to keep information about the current `omp begin/end declare + /// variant` nesting. + struct OMPDeclareVariantScope { + /// The associated OpenMP context selector. + OMPTraitInfo *TI; + + /// The associated OpenMP context selector mangling. + std::string NameSuffix; + + OMPDeclareVariantScope(OMPTraitInfo &TI); + }; + + /// Return the OMPTraitInfo for the surrounding scope, if any. + OMPTraitInfo *getOMPTraitInfoForSurroundingScope() { + return OMPDeclareVariantScopes.empty() ? nullptr + : OMPDeclareVariantScopes.back().TI; + } + + /// The current `omp begin/end declare variant` scopes. + SmallVector OMPDeclareVariantScopes; + + /// The current `omp begin/end assumes` scopes. + SmallVector OMPAssumeScoped; + + /// All `omp assumes` we encountered so far. + SmallVector OMPAssumeGlobal; + + /// OMPD_loop is mapped to OMPD_for, OMPD_distribute or OMPD_simd depending + /// on the parameter of the bind clause. In the methods for the + /// mapped directives, check the parameters of the lastprivate clause. + bool checkLastPrivateForMappedDirectives(ArrayRef Clauses); + /// Depending on the bind clause of OMPD_loop map the directive to new + /// directives. + /// 1) loop bind(parallel) --> OMPD_for + /// 2) loop bind(teams) --> OMPD_distribute + /// 3) loop bind(thread) --> OMPD_simd + /// This is being handled in Sema instead of Codegen because of the need for + /// rigorous semantic checking in the new mapped directives. + bool mapLoopConstruct(llvm::SmallVector &ClausesWithoutBind, + ArrayRef Clauses, + OpenMPBindClauseKind &BindKind, + OpenMPDirectiveKind &Kind, + OpenMPDirectiveKind &PrevMappedDirective, + SourceLocation StartLoc, SourceLocation EndLoc, + const DeclarationNameInfo &DirName, + OpenMPDirectiveKind CancelRegion); +}; + +} // namespace clang + +#endif // LLVM_CLANG_SEMA_SEMAOPENMP_H diff --git a/clang/include/clang/Serialization/ModuleFileExtension.h b/clang/include/clang/Serialization/ModuleFileExtension.h index d7d456c8b5db8e..50ce401516275c 100644 --- a/clang/include/clang/Serialization/ModuleFileExtension.h +++ b/clang/include/clang/Serialization/ModuleFileExtension.h @@ -9,7 +9,6 @@ #ifndef LLVM_CLANG_SERIALIZATION_MODULEFILEEXTENSION_H #define LLVM_CLANG_SERIALIZATION_MODULEFILEEXTENSION_H -#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/Support/ExtensibleRTTI.h" #include "llvm/Support/HashBuilder.h" #include "llvm/Support/MD5.h" diff --git a/clang/include/clang/Serialization/PCHContainerOperations.h b/clang/include/clang/Serialization/PCHContainerOperations.h index ddfddf2dafadf9..c9a7e334ce6eb3 100644 --- a/clang/include/clang/Serialization/PCHContainerOperations.h +++ b/clang/include/clang/Serialization/PCHContainerOperations.h @@ -12,7 +12,7 @@ #include "clang/Basic/Module.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" #include namespace llvm { diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 00c4a9f161304a..6b4b51aac41e84 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -398,6 +398,35 @@ bool ByteCodeExprGen::VisitCastExpr(const CastExpr *CE) { return true; } + case CK_VectorSplat: { + assert(!classify(CE->getType())); + assert(classify(SubExpr->getType())); + assert(CE->getType()->isVectorType()); + + if (DiscardResult) + return this->discard(SubExpr); + + assert(Initializing); // FIXME: Not always correct. + const auto *VT = CE->getType()->getAs(); + PrimType ElemT = classifyPrim(SubExpr); + unsigned ElemOffset = allocateLocalPrimitive( + SubExpr, ElemT, /*IsConst=*/true, /*IsExtended=*/false); + + if (!this->visit(SubExpr)) + return false; + if (!this->emitSetLocal(ElemT, ElemOffset, CE)) + return false; + + for (unsigned I = 0; I != VT->getNumElements(); ++I) { + if (!this->emitGetLocal(ElemT, ElemOffset, CE)) + return false; + if (!this->emitInitElem(ElemT, I, CE)) + return false; + } + + return true; + } + case CK_ToVoid: return discard(SubExpr); diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h index db0d73ce23f7c4..7e9dc8631fc0d3 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.h +++ b/clang/lib/AST/Interp/ByteCodeExprGen.h @@ -148,13 +148,20 @@ class ByteCodeExprGen : public ConstStmtVisitor, bool>, return Ctx.classify(Ty); } - /// Classifies a known primitive type + /// Classifies a known primitive type. PrimType classifyPrim(QualType Ty) const { if (auto T = classify(Ty)) { return *T; } llvm_unreachable("not a primitive type"); } + /// Classifies a known primitive expression. + PrimType classifyPrim(const Expr *E) const { + if (auto T = classify(E)) + return *T; + llvm_unreachable("not a primitive type"); + } + /// Evaluates an expression and places the result on the stack. If the /// expression is of composite type, a local variable will be created /// and a pointer to said variable will be placed on the stack. diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp index dcb512cb514179..9c259c8f9bd0a1 100644 --- a/clang/lib/AST/OpenACCClause.cpp +++ b/clang/lib/AST/OpenACCClause.cpp @@ -48,6 +48,26 @@ OpenACCIfClause::OpenACCIfClause(SourceLocation BeginLoc, "Condition expression type not scalar/dependent"); } +OpenACCSelfClause *OpenACCSelfClause::Create(const ASTContext &C, + SourceLocation BeginLoc, + SourceLocation LParenLoc, + Expr *ConditionExpr, + SourceLocation EndLoc) { + void *Mem = C.Allocate(sizeof(OpenACCIfClause), alignof(OpenACCIfClause)); + return new (Mem) + OpenACCSelfClause(BeginLoc, LParenLoc, ConditionExpr, EndLoc); +} + +OpenACCSelfClause::OpenACCSelfClause(SourceLocation BeginLoc, + SourceLocation LParenLoc, + Expr *ConditionExpr, SourceLocation EndLoc) + : OpenACCClauseWithCondition(OpenACCClauseKind::Self, BeginLoc, LParenLoc, + ConditionExpr, EndLoc) { + assert((!ConditionExpr || ConditionExpr->isInstantiationDependent() || + ConditionExpr->getType()->isScalarType()) && + "Condition expression type not scalar/dependent"); +} + OpenACCClause::child_range OpenACCClause::children() { switch (getClauseKind()) { default: @@ -72,3 +92,9 @@ void OpenACCClausePrinter::VisitDefaultClause(const OpenACCDefaultClause &C) { void OpenACCClausePrinter::VisitIfClause(const OpenACCIfClause &C) { OS << "if(" << C.getConditionExpr() << ")"; } + +void OpenACCClausePrinter::VisitSelfClause(const OpenACCSelfClause &C) { + OS << "self"; + if (const Expr *CondExpr = C.getConditionExpr()) + OS << "(" << CondExpr << ")"; +} diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 789e4634bd293b..b26d804c6f079b 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -2491,6 +2491,11 @@ void OpenACCClauseProfiler::VisitIfClause(const OpenACCIfClause &Clause) { "if clause requires a valid condition expr"); Profiler.VisitStmt(Clause.getConditionExpr()); } + +void OpenACCClauseProfiler::VisitSelfClause(const OpenACCSelfClause &Clause) { + if (Clause.hasConditionExpr()) + Profiler.VisitStmt(Clause.getConditionExpr()); +} } // namespace void StmtProfiler::VisitOpenACCComputeConstruct( diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 688daa64d61974..ff5b3df2d6dfac 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -398,6 +398,7 @@ void TextNodeDumper::Visit(const OpenACCClause *C) { OS << '(' << cast(C)->getDefaultClauseKind() << ')'; break; case OpenACCClauseKind::If: + case OpenACCClauseKind::Self: // The condition expression will be printed as a part of the 'children', // but print 'clause' here so it is clear what is happening from the dump. OS << " clause"; diff --git a/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/clang/lib/Analysis/ExprMutationAnalyzer.cpp index bb042760d297a7..941322be8f870b 100644 --- a/clang/lib/Analysis/ExprMutationAnalyzer.cpp +++ b/clang/lib/Analysis/ExprMutationAnalyzer.cpp @@ -186,9 +186,10 @@ template <> struct NodeID { static constexpr StringRef value = "decl"; }; constexpr StringRef NodeID::value; constexpr StringRef NodeID::value; -template +template const Stmt *tryEachMatch(ArrayRef Matches, - ExprMutationAnalyzer *Analyzer, F Finder) { + ExprMutationAnalyzer::Analyzer *Analyzer, F Finder) { const StringRef ID = NodeID::value; for (const auto &Nodes : Matches) { if (const Stmt *S = (Analyzer->*Finder)(Nodes.getNodeAs(ID))) @@ -199,33 +200,37 @@ const Stmt *tryEachMatch(ArrayRef Matches, } // namespace -const Stmt *ExprMutationAnalyzer::findMutation(const Expr *Exp) { - return findMutationMemoized(Exp, - {&ExprMutationAnalyzer::findDirectMutation, - &ExprMutationAnalyzer::findMemberMutation, - &ExprMutationAnalyzer::findArrayElementMutation, - &ExprMutationAnalyzer::findCastMutation, - &ExprMutationAnalyzer::findRangeLoopMutation, - &ExprMutationAnalyzer::findReferenceMutation, - &ExprMutationAnalyzer::findFunctionArgMutation}, - Results); +const Stmt *ExprMutationAnalyzer::Analyzer::findMutation(const Expr *Exp) { + return findMutationMemoized( + Exp, + {&ExprMutationAnalyzer::Analyzer::findDirectMutation, + &ExprMutationAnalyzer::Analyzer::findMemberMutation, + &ExprMutationAnalyzer::Analyzer::findArrayElementMutation, + &ExprMutationAnalyzer::Analyzer::findCastMutation, + &ExprMutationAnalyzer::Analyzer::findRangeLoopMutation, + &ExprMutationAnalyzer::Analyzer::findReferenceMutation, + &ExprMutationAnalyzer::Analyzer::findFunctionArgMutation}, + Memorized.Results); } -const Stmt *ExprMutationAnalyzer::findMutation(const Decl *Dec) { - return tryEachDeclRef(Dec, &ExprMutationAnalyzer::findMutation); +const Stmt *ExprMutationAnalyzer::Analyzer::findMutation(const Decl *Dec) { + return tryEachDeclRef(Dec, &ExprMutationAnalyzer::Analyzer::findMutation); } -const Stmt *ExprMutationAnalyzer::findPointeeMutation(const Expr *Exp) { - return findMutationMemoized(Exp, {/*TODO*/}, PointeeResults); +const Stmt * +ExprMutationAnalyzer::Analyzer::findPointeeMutation(const Expr *Exp) { + return findMutationMemoized(Exp, {/*TODO*/}, Memorized.PointeeResults); } -const Stmt *ExprMutationAnalyzer::findPointeeMutation(const Decl *Dec) { - return tryEachDeclRef(Dec, &ExprMutationAnalyzer::findPointeeMutation); +const Stmt * +ExprMutationAnalyzer::Analyzer::findPointeeMutation(const Decl *Dec) { + return tryEachDeclRef(Dec, + &ExprMutationAnalyzer::Analyzer::findPointeeMutation); } -const Stmt *ExprMutationAnalyzer::findMutationMemoized( +const Stmt *ExprMutationAnalyzer::Analyzer::findMutationMemoized( const Expr *Exp, llvm::ArrayRef Finders, - ResultMap &MemoizedResults) { + Memoized::ResultMap &MemoizedResults) { const auto Memoized = MemoizedResults.find(Exp); if (Memoized != MemoizedResults.end()) return Memoized->second; @@ -241,8 +246,9 @@ const Stmt *ExprMutationAnalyzer::findMutationMemoized( return MemoizedResults[Exp] = nullptr; } -const Stmt *ExprMutationAnalyzer::tryEachDeclRef(const Decl *Dec, - MutationFinder Finder) { +const Stmt * +ExprMutationAnalyzer::Analyzer::tryEachDeclRef(const Decl *Dec, + MutationFinder Finder) { const auto Refs = match( findAll( declRefExpr(to( @@ -261,8 +267,9 @@ const Stmt *ExprMutationAnalyzer::tryEachDeclRef(const Decl *Dec, return nullptr; } -bool ExprMutationAnalyzer::isUnevaluated(const Stmt *Exp, const Stmt &Stm, - ASTContext &Context) { +bool ExprMutationAnalyzer::Analyzer::isUnevaluated(const Stmt *Exp, + const Stmt &Stm, + ASTContext &Context) { return selectFirst( NodeID::value, match( @@ -293,33 +300,36 @@ bool ExprMutationAnalyzer::isUnevaluated(const Stmt *Exp, const Stmt &Stm, Stm, Context)) != nullptr; } -bool ExprMutationAnalyzer::isUnevaluated(const Expr *Exp) { +bool ExprMutationAnalyzer::Analyzer::isUnevaluated(const Expr *Exp) { return isUnevaluated(Exp, Stm, Context); } const Stmt * -ExprMutationAnalyzer::findExprMutation(ArrayRef Matches) { - return tryEachMatch(Matches, this, &ExprMutationAnalyzer::findMutation); +ExprMutationAnalyzer::Analyzer::findExprMutation(ArrayRef Matches) { + return tryEachMatch(Matches, this, + &ExprMutationAnalyzer::Analyzer::findMutation); } const Stmt * -ExprMutationAnalyzer::findDeclMutation(ArrayRef Matches) { - return tryEachMatch(Matches, this, &ExprMutationAnalyzer::findMutation); +ExprMutationAnalyzer::Analyzer::findDeclMutation(ArrayRef Matches) { + return tryEachMatch(Matches, this, + &ExprMutationAnalyzer::Analyzer::findMutation); } -const Stmt *ExprMutationAnalyzer::findExprPointeeMutation( +const Stmt *ExprMutationAnalyzer::Analyzer::findExprPointeeMutation( ArrayRef Matches) { - return tryEachMatch(Matches, this, - &ExprMutationAnalyzer::findPointeeMutation); + return tryEachMatch( + Matches, this, &ExprMutationAnalyzer::Analyzer::findPointeeMutation); } -const Stmt *ExprMutationAnalyzer::findDeclPointeeMutation( +const Stmt *ExprMutationAnalyzer::Analyzer::findDeclPointeeMutation( ArrayRef Matches) { - return tryEachMatch(Matches, this, - &ExprMutationAnalyzer::findPointeeMutation); + return tryEachMatch( + Matches, this, &ExprMutationAnalyzer::Analyzer::findPointeeMutation); } -const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { +const Stmt * +ExprMutationAnalyzer::Analyzer::findDirectMutation(const Expr *Exp) { // LHS of any assignment operators. const auto AsAssignmentLhs = binaryOperator(isAssignmentOperator(), hasLHS(canResolveToExpr(Exp))); @@ -426,7 +436,7 @@ const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { const auto AsNonConstRefReturn = returnStmt(hasReturnValue(canResolveToExpr(Exp))); - // It is used as a non-const-reference for initalizing a range-for loop. + // It is used as a non-const-reference for initializing a range-for loop. const auto AsNonConstRefRangeInit = cxxForRangeStmt(hasRangeInit(declRefExpr( allOf(canResolveToExpr(Exp), hasType(nonConstReferenceType()))))); @@ -443,7 +453,8 @@ const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { return selectFirst("stmt", Matches); } -const Stmt *ExprMutationAnalyzer::findMemberMutation(const Expr *Exp) { +const Stmt * +ExprMutationAnalyzer::Analyzer::findMemberMutation(const Expr *Exp) { // Check whether any member of 'Exp' is mutated. const auto MemberExprs = match( findAll(expr(anyOf(memberExpr(hasObjectExpression(canResolveToExpr(Exp))), @@ -456,7 +467,8 @@ const Stmt *ExprMutationAnalyzer::findMemberMutation(const Expr *Exp) { return findExprMutation(MemberExprs); } -const Stmt *ExprMutationAnalyzer::findArrayElementMutation(const Expr *Exp) { +const Stmt * +ExprMutationAnalyzer::Analyzer::findArrayElementMutation(const Expr *Exp) { // Check whether any element of an array is mutated. const auto SubscriptExprs = match( findAll(arraySubscriptExpr( @@ -469,7 +481,7 @@ const Stmt *ExprMutationAnalyzer::findArrayElementMutation(const Expr *Exp) { return findExprMutation(SubscriptExprs); } -const Stmt *ExprMutationAnalyzer::findCastMutation(const Expr *Exp) { +const Stmt *ExprMutationAnalyzer::Analyzer::findCastMutation(const Expr *Exp) { // If the 'Exp' is explicitly casted to a non-const reference type the // 'Exp' is considered to be modified. const auto ExplicitCast = @@ -504,7 +516,8 @@ const Stmt *ExprMutationAnalyzer::findCastMutation(const Expr *Exp) { return findExprMutation(Calls); } -const Stmt *ExprMutationAnalyzer::findRangeLoopMutation(const Expr *Exp) { +const Stmt * +ExprMutationAnalyzer::Analyzer::findRangeLoopMutation(const Expr *Exp) { // Keep the ordering for the specific initialization matches to happen first, // because it is cheaper to match all potential modifications of the loop // variable. @@ -567,7 +580,8 @@ const Stmt *ExprMutationAnalyzer::findRangeLoopMutation(const Expr *Exp) { return findDeclMutation(LoopVars); } -const Stmt *ExprMutationAnalyzer::findReferenceMutation(const Expr *Exp) { +const Stmt * +ExprMutationAnalyzer::Analyzer::findReferenceMutation(const Expr *Exp) { // Follow non-const reference returned by `operator*()` of move-only classes. // These are typically smart pointers with unique ownership so we treat // mutation of pointee as mutation of the smart pointer itself. @@ -599,7 +613,8 @@ const Stmt *ExprMutationAnalyzer::findReferenceMutation(const Expr *Exp) { return findDeclMutation(Refs); } -const Stmt *ExprMutationAnalyzer::findFunctionArgMutation(const Expr *Exp) { +const Stmt * +ExprMutationAnalyzer::Analyzer::findFunctionArgMutation(const Expr *Exp) { const auto NonConstRefParam = forEachArgumentWithParam( canResolveToExpr(Exp), parmVarDecl(hasType(nonConstReferenceType())).bind("parm")); @@ -637,10 +652,9 @@ const Stmt *ExprMutationAnalyzer::findFunctionArgMutation(const Expr *Exp) { if (const auto *RefType = ParmType->getAs()) { if (!RefType->getPointeeType().getQualifiers() && RefType->getPointeeType()->getAs()) { - std::unique_ptr &Analyzer = - FuncParmAnalyzer[Func]; - if (!Analyzer) - Analyzer.reset(new FunctionParmMutationAnalyzer(*Func, Context)); + FunctionParmMutationAnalyzer *Analyzer = + FunctionParmMutationAnalyzer::getFunctionParmMutationAnalyzer( + *Func, Context, Memorized); if (Analyzer->findMutation(Parm)) return Exp; continue; @@ -653,13 +667,15 @@ const Stmt *ExprMutationAnalyzer::findFunctionArgMutation(const Expr *Exp) { } FunctionParmMutationAnalyzer::FunctionParmMutationAnalyzer( - const FunctionDecl &Func, ASTContext &Context) - : BodyAnalyzer(*Func.getBody(), Context) { + const FunctionDecl &Func, ASTContext &Context, + ExprMutationAnalyzer::Memoized &Memorized) + : BodyAnalyzer(*Func.getBody(), Context, Memorized) { if (const auto *Ctor = dyn_cast(&Func)) { // CXXCtorInitializer might also mutate Param but they're not part of // function body, check them eagerly here since they're typically trivial. for (const CXXCtorInitializer *Init : Ctor->inits()) { - ExprMutationAnalyzer InitAnalyzer(*Init->getInit(), Context); + ExprMutationAnalyzer::Analyzer InitAnalyzer(*Init->getInit(), Context, + Memorized); for (const ParmVarDecl *Parm : Ctor->parameters()) { if (Results.contains(Parm)) continue; @@ -675,11 +691,14 @@ FunctionParmMutationAnalyzer::findMutation(const ParmVarDecl *Parm) { const auto Memoized = Results.find(Parm); if (Memoized != Results.end()) return Memoized->second; - + // To handle call A -> call B -> call A. Assume parameters of A is not mutated + // before analyzing parameters of A. Then when analyzing the second "call A", + // FunctionParmMutationAnalyzer can use this memoized value to avoid infinite + // recursion. + Results[Parm] = nullptr; if (const Stmt *S = BodyAnalyzer.findMutation(Parm)) return Results[Parm] = S; - - return Results[Parm] = nullptr; + return Results[Parm]; } } // namespace clang diff --git a/clang/lib/Analysis/FlowSensitive/ASTOps.cpp b/clang/lib/Analysis/FlowSensitive/ASTOps.cpp new file mode 100644 index 00000000000000..75188aef4d1a43 --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/ASTOps.cpp @@ -0,0 +1,249 @@ +//===-- ASTOps.cc -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Operations on AST nodes that are used in flow-sensitive analysis. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/ASTOps.h" +#include "clang/AST/ComputeDependence.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/FlowSensitive/StorageLocation.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include +#include +#include + +#define DEBUG_TYPE "dataflow" + +namespace clang::dataflow { + +const Expr &ignoreCFGOmittedNodes(const Expr &E) { + const Expr *Current = &E; + if (auto *EWC = dyn_cast(Current)) { + Current = EWC->getSubExpr(); + assert(Current != nullptr); + } + Current = Current->IgnoreParens(); + assert(Current != nullptr); + return *Current; +} + +const Stmt &ignoreCFGOmittedNodes(const Stmt &S) { + if (auto *E = dyn_cast(&S)) + return ignoreCFGOmittedNodes(*E); + return S; +} + +// FIXME: Does not precisely handle non-virtual diamond inheritance. A single +// field decl will be modeled for all instances of the inherited field. +static void getFieldsFromClassHierarchy(QualType Type, FieldSet &Fields) { + if (Type->isIncompleteType() || Type->isDependentType() || + !Type->isRecordType()) + return; + + for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) + Fields.insert(Field); + if (auto *CXXRecord = Type->getAsCXXRecordDecl()) + for (const CXXBaseSpecifier &Base : CXXRecord->bases()) + getFieldsFromClassHierarchy(Base.getType(), Fields); +} + +/// Gets the set of all fields in the type. +FieldSet getObjectFields(QualType Type) { + FieldSet Fields; + getFieldsFromClassHierarchy(Type, Fields); + return Fields; +} + +bool containsSameFields(const FieldSet &Fields, + const RecordStorageLocation::FieldToLoc &FieldLocs) { + if (Fields.size() != FieldLocs.size()) + return false; + for ([[maybe_unused]] auto [Field, Loc] : FieldLocs) + if (!Fields.contains(cast_or_null(Field))) + return false; + return true; +} + +/// Returns the fields of a `RecordDecl` that are initialized by an +/// `InitListExpr`, in the order in which they appear in +/// `InitListExpr::inits()`. +/// `Init->getType()` must be a record type. +static std::vector +getFieldsForInitListExpr(const InitListExpr *InitList) { + const RecordDecl *RD = InitList->getType()->getAsRecordDecl(); + assert(RD != nullptr); + + std::vector Fields; + + if (InitList->getType()->isUnionType()) { + Fields.push_back(InitList->getInitializedFieldInUnion()); + return Fields; + } + + // Unnamed bitfields are only used for padding and do not appear in + // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s + // field list, and we thus need to remove them before mapping inits to + // fields to avoid mapping inits to the wrongs fields. + llvm::copy_if( + RD->fields(), std::back_inserter(Fields), + [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); }); + return Fields; +} + +RecordInitListHelper::RecordInitListHelper(const InitListExpr *InitList) { + auto *RD = InitList->getType()->getAsCXXRecordDecl(); + assert(RD != nullptr); + + std::vector Fields = getFieldsForInitListExpr(InitList); + ArrayRef Inits = InitList->inits(); + + // Unions initialized with an empty initializer list need special treatment. + // For structs/classes initialized with an empty initializer list, Clang + // puts `ImplicitValueInitExpr`s in `InitListExpr::inits()`, but for unions, + // it doesn't do this -- so we create an `ImplicitValueInitExpr` ourselves. + SmallVector InitsForUnion; + if (InitList->getType()->isUnionType() && Inits.empty()) { + assert(Fields.size() == 1); + ImplicitValueInitForUnion.emplace(Fields.front()->getType()); + InitsForUnion.push_back(&*ImplicitValueInitForUnion); + Inits = InitsForUnion; + } + + size_t InitIdx = 0; + + assert(Fields.size() + RD->getNumBases() == Inits.size()); + for (const CXXBaseSpecifier &Base : RD->bases()) { + assert(InitIdx < Inits.size()); + Expr *Init = Inits[InitIdx++]; + BaseInits.emplace_back(&Base, Init); + } + + assert(Fields.size() == Inits.size() - InitIdx); + for (const FieldDecl *Field : Fields) { + assert(InitIdx < Inits.size()); + Expr *Init = Inits[InitIdx++]; + FieldInits.emplace_back(Field, Init); + } +} + +static void insertIfGlobal(const Decl &D, + llvm::DenseSet &Globals) { + if (auto *V = dyn_cast(&D)) + if (V->hasGlobalStorage()) + Globals.insert(V); +} + +static void insertIfFunction(const Decl &D, + llvm::DenseSet &Funcs) { + if (auto *FD = dyn_cast(&D)) + Funcs.insert(FD); +} + +static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) { + // Use getCalleeDecl instead of getMethodDecl in order to handle + // pointer-to-member calls. + const auto *MethodDecl = dyn_cast_or_null(C.getCalleeDecl()); + if (!MethodDecl) + return nullptr; + auto *Body = dyn_cast_or_null(MethodDecl->getBody()); + if (!Body || Body->size() != 1) + return nullptr; + if (auto *RS = dyn_cast(*Body->body_begin())) + if (auto *Return = RS->getRetValue()) + return dyn_cast(Return->IgnoreParenImpCasts()); + return nullptr; +} + +static void getReferencedDecls(const Decl &D, ReferencedDecls &Referenced) { + insertIfGlobal(D, Referenced.Globals); + insertIfFunction(D, Referenced.Functions); + if (const auto *Decomp = dyn_cast(&D)) + for (const auto *B : Decomp->bindings()) + if (auto *ME = dyn_cast_or_null(B->getBinding())) + // FIXME: should we be using `E->getFoundDecl()`? + if (const auto *FD = dyn_cast(ME->getMemberDecl())) + Referenced.Fields.insert(FD); +} + +/// Traverses `S` and inserts into `Referenced` any declarations that are +/// declared in or referenced from sub-statements. +static void getReferencedDecls(const Stmt &S, ReferencedDecls &Referenced) { + for (auto *Child : S.children()) + if (Child != nullptr) + getReferencedDecls(*Child, Referenced); + if (const auto *DefaultArg = dyn_cast(&S)) + getReferencedDecls(*DefaultArg->getExpr(), Referenced); + if (const auto *DefaultInit = dyn_cast(&S)) + getReferencedDecls(*DefaultInit->getExpr(), Referenced); + + if (auto *DS = dyn_cast(&S)) { + if (DS->isSingleDecl()) + getReferencedDecls(*DS->getSingleDecl(), Referenced); + else + for (auto *D : DS->getDeclGroup()) + getReferencedDecls(*D, Referenced); + } else if (auto *E = dyn_cast(&S)) { + insertIfGlobal(*E->getDecl(), Referenced.Globals); + insertIfFunction(*E->getDecl(), Referenced.Functions); + } else if (const auto *C = dyn_cast(&S)) { + // If this is a method that returns a member variable but does nothing else, + // model the field of the return value. + if (MemberExpr *E = getMemberForAccessor(*C)) + if (const auto *FD = dyn_cast(E->getMemberDecl())) + Referenced.Fields.insert(FD); + } else if (auto *E = dyn_cast(&S)) { + // FIXME: should we be using `E->getFoundDecl()`? + const ValueDecl *VD = E->getMemberDecl(); + insertIfGlobal(*VD, Referenced.Globals); + insertIfFunction(*VD, Referenced.Functions); + if (const auto *FD = dyn_cast(VD)) + Referenced.Fields.insert(FD); + } else if (auto *InitList = dyn_cast(&S)) { + if (InitList->getType()->isRecordType()) + for (const auto *FD : getFieldsForInitListExpr(InitList)) + Referenced.Fields.insert(FD); + } +} + +ReferencedDecls getReferencedDecls(const FunctionDecl &FD) { + ReferencedDecls Result; + // Look for global variable and field references in the + // constructor-initializers. + if (const auto *CtorDecl = dyn_cast(&FD)) { + for (const auto *Init : CtorDecl->inits()) { + if (Init->isMemberInitializer()) { + Result.Fields.insert(Init->getMember()); + } else if (Init->isIndirectMemberInitializer()) { + for (const auto *I : Init->getIndirectMember()->chain()) + Result.Fields.insert(cast(I)); + } + const Expr *E = Init->getInit(); + assert(E != nullptr); + getReferencedDecls(*E, Result); + } + // Add all fields mentioned in default member initializers. + for (const FieldDecl *F : CtorDecl->getParent()->fields()) + if (const auto *I = F->getInClassInitializer()) + getReferencedDecls(*I, Result); + } + getReferencedDecls(*FD.getBody(), Result); + + return Result; +} + +} // namespace clang::dataflow diff --git a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt index a3b5d9adc24bda..6631fe27f3d901 100644 --- a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt @@ -1,6 +1,7 @@ add_clang_library(clangAnalysisFlowSensitive AdornedCFG.cpp Arena.cpp + ASTOps.cpp DataflowAnalysisContext.cpp DataflowEnvironment.cpp Formula.cpp diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp index d520539dd25355..e94fd39c45dc15 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -14,6 +14,7 @@ #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/AST/ExprCXX.h" +#include "clang/Analysis/FlowSensitive/ASTOps.h" #include "clang/Analysis/FlowSensitive/DebugSupport.h" #include "clang/Analysis/FlowSensitive/Formula.h" #include "clang/Analysis/FlowSensitive/Logger.h" @@ -359,55 +360,3 @@ DataflowAnalysisContext::~DataflowAnalysisContext() = default; } // namespace dataflow } // namespace clang - -using namespace clang; - -const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) { - const Expr *Current = &E; - if (auto *EWC = dyn_cast(Current)) { - Current = EWC->getSubExpr(); - assert(Current != nullptr); - } - Current = Current->IgnoreParens(); - assert(Current != nullptr); - return *Current; -} - -const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) { - if (auto *E = dyn_cast(&S)) - return ignoreCFGOmittedNodes(*E); - return S; -} - -// FIXME: Does not precisely handle non-virtual diamond inheritance. A single -// field decl will be modeled for all instances of the inherited field. -static void getFieldsFromClassHierarchy(QualType Type, - clang::dataflow::FieldSet &Fields) { - if (Type->isIncompleteType() || Type->isDependentType() || - !Type->isRecordType()) - return; - - for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) - Fields.insert(Field); - if (auto *CXXRecord = Type->getAsCXXRecordDecl()) - for (const CXXBaseSpecifier &Base : CXXRecord->bases()) - getFieldsFromClassHierarchy(Base.getType(), Fields); -} - -/// Gets the set of all fields in the type. -clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) { - FieldSet Fields; - getFieldsFromClassHierarchy(Type, Fields); - return Fields; -} - -bool clang::dataflow::containsSameFields( - const clang::dataflow::FieldSet &Fields, - const clang::dataflow::RecordStorageLocation::FieldToLoc &FieldLocs) { - if (Fields.size() != FieldLocs.size()) - return false; - for ([[maybe_unused]] auto [Field, Loc] : FieldLocs) - if (!Fields.contains(cast_or_null(Field))) - return false; - return true; -} diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index ee2581143e1141..f2b4a67e5bc97b 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -17,6 +17,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Type.h" +#include "clang/Analysis/FlowSensitive/ASTOps.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/DenseMap.h" @@ -304,93 +305,6 @@ widenKeyToValueMap(const llvm::MapVector &CurMap, return WidenedMap; } -/// Initializes a global storage value. -static void insertIfGlobal(const Decl &D, - llvm::DenseSet &Vars) { - if (auto *V = dyn_cast(&D)) - if (V->hasGlobalStorage()) - Vars.insert(V); -} - -static void insertIfFunction(const Decl &D, - llvm::DenseSet &Funcs) { - if (auto *FD = dyn_cast(&D)) - Funcs.insert(FD); -} - -static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) { - // Use getCalleeDecl instead of getMethodDecl in order to handle - // pointer-to-member calls. - const auto *MethodDecl = dyn_cast_or_null(C.getCalleeDecl()); - if (!MethodDecl) - return nullptr; - auto *Body = dyn_cast_or_null(MethodDecl->getBody()); - if (!Body || Body->size() != 1) - return nullptr; - if (auto *RS = dyn_cast(*Body->body_begin())) - if (auto *Return = RS->getRetValue()) - return dyn_cast(Return->IgnoreParenImpCasts()); - return nullptr; -} - -static void -getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields, - llvm::DenseSet &Vars, - llvm::DenseSet &Funcs) { - insertIfGlobal(D, Vars); - insertIfFunction(D, Funcs); - if (const auto *Decomp = dyn_cast(&D)) - for (const auto *B : Decomp->bindings()) - if (auto *ME = dyn_cast_or_null(B->getBinding())) - // FIXME: should we be using `E->getFoundDecl()`? - if (const auto *FD = dyn_cast(ME->getMemberDecl())) - Fields.insert(FD); -} - -/// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields, -/// global variables and functions that are declared in or referenced from -/// sub-statements. -static void -getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, - llvm::DenseSet &Vars, - llvm::DenseSet &Funcs) { - for (auto *Child : S.children()) - if (Child != nullptr) - getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); - if (const auto *DefaultArg = dyn_cast(&S)) - getFieldsGlobalsAndFuncs(*DefaultArg->getExpr(), Fields, Vars, Funcs); - if (const auto *DefaultInit = dyn_cast(&S)) - getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); - - if (auto *DS = dyn_cast(&S)) { - if (DS->isSingleDecl()) - getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs); - else - for (auto *D : DS->getDeclGroup()) - getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs); - } else if (auto *E = dyn_cast(&S)) { - insertIfGlobal(*E->getDecl(), Vars); - insertIfFunction(*E->getDecl(), Funcs); - } else if (const auto *C = dyn_cast(&S)) { - // If this is a method that returns a member variable but does nothing else, - // model the field of the return value. - if (MemberExpr *E = getMemberForAccessor(*C)) - if (const auto *FD = dyn_cast(E->getMemberDecl())) - Fields.insert(FD); - } else if (auto *E = dyn_cast(&S)) { - // FIXME: should we be using `E->getFoundDecl()`? - const ValueDecl *VD = E->getMemberDecl(); - insertIfGlobal(*VD, Vars); - insertIfFunction(*VD, Funcs); - if (const auto *FD = dyn_cast(VD)) - Fields.insert(FD); - } else if (auto *InitList = dyn_cast(&S)) { - if (InitList->getType()->isRecordType()) - for (const auto *FD : getFieldsForInitListExpr(InitList)) - Fields.insert(FD); - } -} - namespace { // Visitor that builds a map from record prvalues to result objects. @@ -556,6 +470,11 @@ class ResultObjectVisitor : public RecursiveASTVisitor { return; } + if (auto *SE = dyn_cast(E)) { + PropagateResultObject(cast(SE->getSubStmt()->body_back()), Loc); + return; + } + // All other expression nodes that propagate a record prvalue should have // exactly one child. SmallVector Children(E->child_begin(), E->child_end()); @@ -653,36 +572,13 @@ void Environment::initialize() { void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { assert(FuncDecl->doesThisDeclarationHaveABody()); - FieldSet Fields; - llvm::DenseSet Vars; - llvm::DenseSet Funcs; - - // Look for global variable and field references in the - // constructor-initializers. - if (const auto *CtorDecl = dyn_cast(FuncDecl)) { - for (const auto *Init : CtorDecl->inits()) { - if (Init->isMemberInitializer()) { - Fields.insert(Init->getMember()); - } else if (Init->isIndirectMemberInitializer()) { - for (const auto *I : Init->getIndirectMember()->chain()) - Fields.insert(cast(I)); - } - const Expr *E = Init->getInit(); - assert(E != nullptr); - getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs); - } - // Add all fields mentioned in default member initializers. - for (const FieldDecl *F : CtorDecl->getParent()->fields()) - if (const auto *I = F->getInClassInitializer()) - getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs); - } - getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs); + ReferencedDecls Referenced = getReferencedDecls(*FuncDecl); // These have to be added before the lines that follow to ensure that // `create*` work correctly for structs. - DACtx->addModeledFields(Fields); + DACtx->addModeledFields(Referenced.Fields); - for (const VarDecl *D : Vars) { + for (const VarDecl *D : Referenced.Globals) { if (getStorageLocation(*D) != nullptr) continue; @@ -694,7 +590,7 @@ void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { setStorageLocation(*D, createObject(*D, nullptr)); } - for (const FunctionDecl *FD : Funcs) { + for (const FunctionDecl *FD : Referenced.Functions) { if (getStorageLocation(*FD) != nullptr) continue; auto &Loc = createStorageLocation(*FD); @@ -1354,64 +1250,6 @@ RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME, return Env.get(*Base); } -std::vector -getFieldsForInitListExpr(const InitListExpr *InitList) { - const RecordDecl *RD = InitList->getType()->getAsRecordDecl(); - assert(RD != nullptr); - - std::vector Fields; - - if (InitList->getType()->isUnionType()) { - Fields.push_back(InitList->getInitializedFieldInUnion()); - return Fields; - } - - // Unnamed bitfields are only used for padding and do not appear in - // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s - // field list, and we thus need to remove them before mapping inits to - // fields to avoid mapping inits to the wrongs fields. - llvm::copy_if( - RD->fields(), std::back_inserter(Fields), - [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); }); - return Fields; -} - -RecordInitListHelper::RecordInitListHelper(const InitListExpr *InitList) { - auto *RD = InitList->getType()->getAsCXXRecordDecl(); - assert(RD != nullptr); - - std::vector Fields = getFieldsForInitListExpr(InitList); - ArrayRef Inits = InitList->inits(); - - // Unions initialized with an empty initializer list need special treatment. - // For structs/classes initialized with an empty initializer list, Clang - // puts `ImplicitValueInitExpr`s in `InitListExpr::inits()`, but for unions, - // it doesn't do this -- so we create an `ImplicitValueInitExpr` ourselves. - SmallVector InitsForUnion; - if (InitList->getType()->isUnionType() && Inits.empty()) { - assert(Fields.size() == 1); - ImplicitValueInitForUnion.emplace(Fields.front()->getType()); - InitsForUnion.push_back(&*ImplicitValueInitForUnion); - Inits = InitsForUnion; - } - - size_t InitIdx = 0; - - assert(Fields.size() + RD->getNumBases() == Inits.size()); - for (const CXXBaseSpecifier &Base : RD->bases()) { - assert(InitIdx < Inits.size()); - Expr *Init = Inits[InitIdx++]; - BaseInits.emplace_back(&Base, Init); - } - - assert(Fields.size() == Inits.size() - InitIdx); - for (const FieldDecl *Field : Fields) { - assert(InitIdx < Inits.size()); - Expr *Init = Inits[InitIdx++]; - FieldInits.emplace_back(Field, Init); - } -} - RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) { auto &NewVal = Env.create(Loc); Env.setValue(Loc, NewVal); diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp index 88a9c0eccbebc0..1e034771014eaa 100644 --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -20,7 +20,9 @@ #include "clang/AST/OperationKinds.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/FlowSensitive/ASTOps.h" #include "clang/Analysis/FlowSensitive/AdornedCFG.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/NoopAnalysis.h" #include "clang/Analysis/FlowSensitive/RecordOps.h" diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index e03fe1b6830043..c42e70d5b95ac1 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -1114,7 +1114,7 @@ class UPCAddressofArraySubscriptGadget : public FixableGadget { virtual DeclUseList getClaimedVarUseSites() const override { const auto *ArraySubst = cast(Node->getSubExpr()); const auto *DRE = - cast(ArraySubst->getBase()->IgnoreImpCasts()); + cast(ArraySubst->getBase()->IgnoreParenImpCasts()); return {DRE}; } }; diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 1b1da6a1356f2c..113483db5729b0 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -86,7 +86,7 @@ static const CudaArchToStringMap arch_names[] = { // clang-format off {CudaArch::UNUSED, "", ""}, SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi - SM(30), SM(32), SM(35), SM(37), // Kepler + SM(30), {CudaArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler SM(50), SM(52), SM(53), // Maxwell SM(60), SM(61), SM(62), // Pascal SM(70), SM(72), // Volta @@ -186,7 +186,7 @@ CudaVersion MinVersionForCudaArch(CudaArch A) { case CudaArch::SM_20: case CudaArch::SM_21: case CudaArch::SM_30: - case CudaArch::SM_32: + case CudaArch::SM_32_: case CudaArch::SM_35: case CudaArch::SM_37: case CudaArch::SM_50: @@ -231,7 +231,7 @@ CudaVersion MaxVersionForCudaArch(CudaArch A) { case CudaArch::SM_21: return CudaVersion::CUDA_80; case CudaArch::SM_30: - case CudaArch::SM_32: + case CudaArch::SM_32_: return CudaVersion::CUDA_102; case CudaArch::SM_35: case CudaArch::SM_37: diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index b47c399fef6042..8ad9e6e5f58916 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -239,7 +239,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, return "210"; case CudaArch::SM_30: return "300"; - case CudaArch::SM_32: + case CudaArch::SM_32_: return "320"; case CudaArch::SM_35: return "350"; diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index f3d705e1551fe2..a7ce9dda34bdde 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -353,7 +353,8 @@ bool RISCVTargetInfo::handleTargetFeatures(std::vector &Features, if (ISAInfo->hasExtension("zfh") || ISAInfo->hasExtension("zhinx")) HasLegalHalfType = true; - FastUnalignedAccess = llvm::is_contained(Features, "+fast-unaligned-access"); + FastUnalignedAccess = llvm::is_contained(Features, "+unaligned-scalar-mem") && + llvm::is_contained(Features, "+unaligned-vector-mem"); if (llvm::is_contained(Features, "+experimental")) HasExperimental = true; diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index 9a4a8b501460b6..44265445ff004b 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -315,7 +315,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRVTargetInfo { // SPIR-V IDs are represented with a single 32-bit word. SizeType = TargetInfo::UnsignedInt; resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-" - "v96:128-v192:256-v256:256-v512:512-v1024:1024"); + "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"); } void getTargetDefines(const LangOptions &Opts, diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9f95697f284c40..a05874e63c73c2 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3436,6 +3436,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD}); return RValue::get(nullptr); } + case Builtin::BI__builtin_allow_runtime_check: { + StringRef Kind = + cast(E->getArg(0)->IgnoreParenCasts())->getString(); + LLVMContext &Ctx = CGM.getLLVMContext(); + llvm::Value *Allow = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check), + llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind))); + return RValue::get(Allow); + } case Builtin::BI__arithmetic_fence: { // Create the builtin call if FastMath is selected, and the target // supports the builtin, otherwise just return the argument. diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 0c860a3ccbd2f0..3f5463a9a70e9d 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -4124,8 +4124,7 @@ static bool isProvablyNull(llvm::Value *addr) { } static bool isProvablyNonNull(Address Addr, CodeGenFunction &CGF) { - return llvm::isKnownNonZero(Addr.getBasePointer(), /*Depth=*/0, - CGF.CGM.getDataLayout()); + return llvm::isKnownNonZero(Addr.getBasePointer(), CGF.CGM.getDataLayout()); } /// Emit the actual writing-back of a writeback. @@ -4694,11 +4693,11 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, AggValueSlot Slot = args.isUsingInAlloca() ? createPlaceholderSlot(*this, type) : CreateAggTemp(type, "agg.tmp"); - bool DestroyedInCallee = true, NeedsCleanup = true; + bool DestroyedInCallee = true, NeedsEHCleanup = true; if (const auto *RD = type->getAsCXXRecordDecl()) DestroyedInCallee = RD->hasNonTrivialDestructor(); else - NeedsCleanup = type.isDestructedType(); + NeedsEHCleanup = needsEHCleanup(type.isDestructedType()); if (DestroyedInCallee) Slot.setExternallyDestructed(); @@ -4707,15 +4706,14 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, RValue RV = Slot.asRValue(); args.add(RV, type); - if (DestroyedInCallee && NeedsCleanup) { + if (DestroyedInCallee && NeedsEHCleanup) { // Create a no-op GEP between the placeholder and the cleanup so we can // RAUW it successfully. It also serves as a marker of the first // instruction where the cleanup is active. - pushFullExprCleanup(NormalAndEHCleanup, - Slot.getAddress(), type); + pushFullExprCleanup(EHCleanup, Slot.getAddress(), + type); // This unreachable is a temporary marker which will be removed later. - llvm::Instruction *IsActive = - Builder.CreateFlagLoad(llvm::Constant::getNullValue(Int8PtrTy)); + llvm::Instruction *IsActive = Builder.CreateUnreachable(); args.addArgCleanupDeactivation(EHStack.stable_begin(), IsActive); } return; diff --git a/clang/lib/CodeGen/CGCleanup.cpp b/clang/lib/CodeGen/CGCleanup.cpp index 8683f19d9da28e..e6f8e6873004f2 100644 --- a/clang/lib/CodeGen/CGCleanup.cpp +++ b/clang/lib/CodeGen/CGCleanup.cpp @@ -634,19 +634,12 @@ static void destroyOptimisticNormalEntry(CodeGenFunction &CGF, /// Pops a cleanup block. If the block includes a normal cleanup, the /// current insertion point is threaded through the cleanup, as are /// any branch fixups on the cleanup. -void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough, - bool ForDeactivation) { +void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { assert(!EHStack.empty() && "cleanup stack is empty!"); assert(isa(*EHStack.begin()) && "top not a cleanup!"); EHCleanupScope &Scope = cast(*EHStack.begin()); assert(Scope.getFixupDepth() <= EHStack.getNumBranchFixups()); - // If we are deactivating a normal cleanup, we need to pretend that the - // fallthrough is unreachable. We restore this IP before returning. - CGBuilderTy::InsertPoint NormalDeactivateOrigIP; - if (ForDeactivation && (Scope.isNormalCleanup() || !getLangOpts().EHAsynch)) { - NormalDeactivateOrigIP = Builder.saveAndClearIP(); - } // Remember activation information. bool IsActive = Scope.isActive(); Address NormalActiveFlag = @@ -674,8 +667,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough, // - whether there's a fallthrough llvm::BasicBlock *FallthroughSource = Builder.GetInsertBlock(); - bool HasFallthrough = - FallthroughSource != nullptr && (IsActive || HasExistingBranches); + bool HasFallthrough = (FallthroughSource != nullptr && IsActive); // Branch-through fall-throughs leave the insertion point set to the // end of the last cleanup, which points to the current scope. The @@ -700,11 +692,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough, // If we have a prebranched fallthrough into an inactive normal // cleanup, rewrite it so that it leads to the appropriate place. - if (Scope.isNormalCleanup() && HasPrebranchedFallthrough && - !RequiresNormalCleanup) { - // FIXME: Come up with a program which would need forwarding prebranched - // fallthrough and add tests. Otherwise delete this and assert against it. - assert(!IsActive); + if (Scope.isNormalCleanup() && HasPrebranchedFallthrough && !IsActive) { llvm::BasicBlock *prebranchDest; // If the prebranch is semantically branching through the next @@ -736,8 +724,6 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough, EHStack.popCleanup(); // safe because there are no fixups assert(EHStack.getNumBranchFixups() == 0 || EHStack.hasNormalCleanups()); - if (NormalDeactivateOrigIP.isSet()) - Builder.restoreIP(NormalDeactivateOrigIP); return; } @@ -774,19 +760,11 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough, if (!RequiresNormalCleanup) { // Mark CPP scope end for passed-by-value Arg temp // per Windows ABI which is "normally" Cleanup in callee - if (IsEHa && getInvokeDest()) { - // If we are deactivating a normal cleanup then we don't have a - // fallthrough. Restore original IP to emit CPP scope ends in the correct - // block. - if (NormalDeactivateOrigIP.isSet()) - Builder.restoreIP(NormalDeactivateOrigIP); - if (Personality.isMSVCXXPersonality() && Builder.GetInsertBlock()) + if (IsEHa && getInvokeDest() && Builder.GetInsertBlock()) { + if (Personality.isMSVCXXPersonality()) EmitSehCppScopeEnd(); - if (NormalDeactivateOrigIP.isSet()) - NormalDeactivateOrigIP = Builder.saveAndClearIP(); } destroyOptimisticNormalEntry(*this, Scope); - Scope.MarkEmitted(); EHStack.popCleanup(); } else { // If we have a fallthrough and no other need for the cleanup, @@ -803,7 +781,6 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough, } destroyOptimisticNormalEntry(*this, Scope); - Scope.MarkEmitted(); EHStack.popCleanup(); EmitCleanup(*this, Fn, cleanupFlags, NormalActiveFlag); @@ -939,7 +916,6 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough, } // IV. Pop the cleanup and emit it. - Scope.MarkEmitted(); EHStack.popCleanup(); assert(EHStack.hasNormalCleanups() == HasEnclosingCleanups); @@ -1008,8 +984,6 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough, } } - if (NormalDeactivateOrigIP.isSet()) - Builder.restoreIP(NormalDeactivateOrigIP); assert(EHStack.hasNormalCleanups() || EHStack.getNumBranchFixups() == 0); // Emit the EH cleanup if required. @@ -1299,8 +1273,17 @@ void CodeGenFunction::DeactivateCleanupBlock(EHScopeStack::stable_iterator C, // to the current RunCleanupsScope. if (C == EHStack.stable_begin() && CurrentCleanupScopeDepth.strictlyEncloses(C)) { - PopCleanupBlock(/*FallthroughIsBranchThrough=*/false, - /*ForDeactivation=*/true); + // Per comment below, checking EHAsynch is not really necessary + // it's there to assure zero-impact w/o EHAsynch option + if (!Scope.isNormalCleanup() && getLangOpts().EHAsynch) { + PopCleanupBlock(); + } else { + // If it's a normal cleanup, we need to pretend that the + // fallthrough is unreachable. + CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP(); + PopCleanupBlock(); + Builder.restoreIP(SavedIP); + } return; } diff --git a/clang/lib/CodeGen/CGCleanup.h b/clang/lib/CodeGen/CGCleanup.h index c73c97146abc4d..03e4a29d7b3dbf 100644 --- a/clang/lib/CodeGen/CGCleanup.h +++ b/clang/lib/CodeGen/CGCleanup.h @@ -16,11 +16,8 @@ #include "EHScopeStack.h" #include "Address.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Instruction.h" namespace llvm { class BasicBlock; @@ -269,51 +266,6 @@ class alignas(8) EHCleanupScope : public EHScope { }; mutable struct ExtInfo *ExtInfo; - /// Erases auxillary allocas and their usages for an unused cleanup. - /// Cleanups should mark these allocas as 'used' if the cleanup is - /// emitted, otherwise these instructions would be erased. - struct AuxillaryAllocas { - SmallVector AuxAllocas; - bool used = false; - - // Records a potentially unused instruction to be erased later. - void Add(llvm::AllocaInst *Alloca) { AuxAllocas.push_back(Alloca); } - - // Mark all recorded instructions as used. These will not be erased later. - void MarkUsed() { - used = true; - AuxAllocas.clear(); - } - - ~AuxillaryAllocas() { - if (used) - return; - llvm::SetVector Uses; - for (auto *Inst : llvm::reverse(AuxAllocas)) - CollectUses(Inst, Uses); - // Delete uses in the reverse order of insertion. - for (auto *I : llvm::reverse(Uses)) - I->eraseFromParent(); - } - - private: - void CollectUses(llvm::Instruction *I, - llvm::SetVector &Uses) { - if (!I || !Uses.insert(I)) - return; - for (auto *User : I->users()) - CollectUses(cast(User), Uses); - } - }; - mutable struct AuxillaryAllocas *AuxAllocas; - - AuxillaryAllocas &getAuxillaryAllocas() { - if (!AuxAllocas) { - AuxAllocas = new struct AuxillaryAllocas(); - } - return *AuxAllocas; - } - /// The number of fixups required by enclosing scopes (not including /// this one). If this is the top cleanup scope, all the fixups /// from this index onwards belong to this scope. @@ -346,7 +298,7 @@ class alignas(8) EHCleanupScope : public EHScope { EHScopeStack::stable_iterator enclosingEH) : EHScope(EHScope::Cleanup, enclosingEH), EnclosingNormal(enclosingNormal), NormalBlock(nullptr), - ActiveFlag(Address::invalid()), ExtInfo(nullptr), AuxAllocas(nullptr), + ActiveFlag(Address::invalid()), ExtInfo(nullptr), FixupDepth(fixupDepth) { CleanupBits.IsNormalCleanup = isNormal; CleanupBits.IsEHCleanup = isEH; @@ -360,15 +312,8 @@ class alignas(8) EHCleanupScope : public EHScope { } void Destroy() { - if (AuxAllocas) - delete AuxAllocas; delete ExtInfo; } - void AddAuxAllocas(llvm::SmallVector Allocas) { - for (auto *Alloca : Allocas) - getAuxillaryAllocas().Add(Alloca); - } - void MarkEmitted() { getAuxillaryAllocas().MarkUsed(); } // Objects of EHCleanupScope are not destructed. Use Destroy(). ~EHCleanupScope() = delete; diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 3f05ebb561da57..ce6d6d8956076e 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -19,7 +19,6 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" -#include "EHScopeStack.h" #include "PatternInit.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" @@ -2202,27 +2201,6 @@ void CodeGenFunction::pushDestroy(CleanupKind cleanupKind, Address addr, destroyer, useEHCleanupForArray); } -// Pushes a destroy and defers its deactivation until its -// CleanupDeactivationScope is exited. -void CodeGenFunction::pushDestroyAndDeferDeactivation( - QualType::DestructionKind dtorKind, Address addr, QualType type) { - assert(dtorKind && "cannot push destructor for trivial type"); - - CleanupKind cleanupKind = getCleanupKind(dtorKind); - pushDestroyAndDeferDeactivation( - cleanupKind, addr, type, getDestroyer(dtorKind), cleanupKind & EHCleanup); -} - -void CodeGenFunction::pushDestroyAndDeferDeactivation( - CleanupKind cleanupKind, Address addr, QualType type, Destroyer *destroyer, - bool useEHCleanupForArray) { - llvm::Instruction *DominatingIP = - Builder.CreateFlagLoad(llvm::Constant::getNullValue(Int8PtrTy)); - pushDestroy(cleanupKind, addr, type, destroyer, useEHCleanupForArray); - DeferredDeactivationCleanupStack.push_back( - {EHStack.stable_begin(), DominatingIP}); -} - void CodeGenFunction::pushStackRestore(CleanupKind Kind, Address SPMem) { EHStack.pushCleanup(Kind, SPMem); } @@ -2239,19 +2217,16 @@ void CodeGenFunction::pushLifetimeExtendedDestroy(CleanupKind cleanupKind, // If we're not in a conditional branch, we don't need to bother generating a // conditional cleanup. if (!isInConditionalBranch()) { + // Push an EH-only cleanup for the object now. // FIXME: When popping normal cleanups, we need to keep this EH cleanup // around in case a temporary's destructor throws an exception. + if (cleanupKind & EHCleanup) + EHStack.pushCleanup( + static_cast(cleanupKind & ~NormalCleanup), addr, type, + destroyer, useEHCleanupForArray); - // Add the cleanup to the EHStack. After the full-expr, this would be - // deactivated before being popped from the stack. - pushDestroyAndDeferDeactivation(cleanupKind, addr, type, destroyer, - useEHCleanupForArray); - - // Since this is lifetime-extended, push it once again to the EHStack after - // the full expression. return pushCleanupAfterFullExprWithActiveFlag( - cleanupKind, Address::invalid(), addr, type, destroyer, - useEHCleanupForArray); + cleanupKind, Address::invalid(), addr, type, destroyer, useEHCleanupForArray); } // Otherwise, we should only destroy the object if it's been initialized. @@ -2266,12 +2241,13 @@ void CodeGenFunction::pushLifetimeExtendedDestroy(CleanupKind cleanupKind, Address ActiveFlag = createCleanupActiveFlag(); SavedType SavedAddr = saveValueInCond(addr); - pushCleanupAndDeferDeactivation( - cleanupKind, SavedAddr, type, destroyer, useEHCleanupForArray); - initFullExprCleanupWithFlag(ActiveFlag); + if (cleanupKind & EHCleanup) { + EHStack.pushCleanup( + static_cast(cleanupKind & ~NormalCleanup), SavedAddr, type, + destroyer, useEHCleanupForArray); + initFullExprCleanupWithFlag(ActiveFlag); + } - // Since this is lifetime-extended, push it once again to the EHStack after - // the full expression. pushCleanupAfterFullExprWithActiveFlag( cleanupKind, ActiveFlag, SavedAddr, type, destroyer, useEHCleanupForArray); @@ -2466,9 +2442,9 @@ namespace { }; } // end anonymous namespace -/// pushIrregularPartialArrayCleanup - Push a NormalAndEHCleanup to -/// destroy already-constructed elements of the given array. The cleanup may be -/// popped with DeactivateCleanupBlock or PopCleanupBlock. +/// pushIrregularPartialArrayCleanup - Push an EH cleanup to destroy +/// already-constructed elements of the given array. The cleanup +/// may be popped with DeactivateCleanupBlock or PopCleanupBlock. /// /// \param elementType - the immediate element type of the array; /// possibly still an array type @@ -2477,9 +2453,10 @@ void CodeGenFunction::pushIrregularPartialArrayCleanup(llvm::Value *arrayBegin, QualType elementType, CharUnits elementAlign, Destroyer *destroyer) { - pushFullExprCleanup( - NormalAndEHCleanup, arrayBegin, arrayEndPointer, elementType, - elementAlign, destroyer); + pushFullExprCleanup(EHCleanup, + arrayBegin, arrayEndPointer, + elementType, elementAlign, + destroyer); } /// pushRegularPartialArrayCleanup - Push an EH cleanup to destroy diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index c85a339f5e3f88..cf696a1c9f560f 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -115,16 +115,10 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, const Twine &Name, llvm::Value *ArraySize) { - llvm::AllocaInst *Alloca; if (ArraySize) - Alloca = Builder.CreateAlloca(Ty, ArraySize, Name); - else - Alloca = new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(), - ArraySize, Name, AllocaInsertPt); - if (Allocas) { - Allocas->Add(Alloca); - } - return Alloca; + return Builder.CreateAlloca(Ty, ArraySize, Name); + return new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(), + ArraySize, Name, AllocaInsertPt); } /// CreateDefaultAlignTempAlloca - This creates an alloca with the diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 560a9e2c5ead5c..1b9287ea239347 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -15,7 +15,6 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" -#include "EHScopeStack.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" @@ -25,7 +24,6 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" using namespace clang; @@ -560,27 +558,24 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, // For that, we'll need an EH cleanup. QualType::DestructionKind dtorKind = elementType.isDestructedType(); Address endOfInit = Address::invalid(); - CodeGenFunction::CleanupDeactivationScope deactivation(CGF); - - if (dtorKind) { - CodeGenFunction::AllocaTrackerRAII allocaTracker(CGF); + EHScopeStack::stable_iterator cleanup; + llvm::Instruction *cleanupDominator = nullptr; + if (CGF.needsEHCleanup(dtorKind)) { // In principle we could tell the cleanup where we are more // directly, but the control flow can get so varied here that it // would actually be quite complex. Therefore we go through an // alloca. - llvm::Instruction *dominatingIP = - Builder.CreateFlagLoad(llvm::ConstantInt::getNullValue(CGF.Int8PtrTy)); endOfInit = CGF.CreateTempAlloca(begin->getType(), CGF.getPointerAlign(), "arrayinit.endOfInit"); - Builder.CreateStore(begin, endOfInit); + cleanupDominator = Builder.CreateStore(begin, endOfInit); CGF.pushIrregularPartialArrayCleanup(begin, endOfInit, elementType, elementAlign, CGF.getDestroyer(dtorKind)); - cast(*CGF.EHStack.find(CGF.EHStack.stable_begin())) - .AddAuxAllocas(allocaTracker.Take()); + cleanup = CGF.EHStack.stable_begin(); - CGF.DeferredDeactivationCleanupStack.push_back( - {CGF.EHStack.stable_begin(), dominatingIP}); + // Otherwise, remember that we didn't need a cleanup. + } else { + dtorKind = QualType::DK_none; } llvm::Value *one = llvm::ConstantInt::get(CGF.SizeTy, 1); @@ -676,6 +671,9 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, CGF.EmitBlock(endBB); } + + // Leave the partial-array cleanup if we entered one. + if (dtorKind) CGF.DeactivateCleanupBlock(cleanup, cleanupDominator); } //===----------------------------------------------------------------------===// @@ -1376,8 +1374,9 @@ AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) { LValue SlotLV = CGF.MakeAddrLValue(Slot.getAddress(), E->getType()); // We'll need to enter cleanup scopes in case any of the element - // initializers throws an exception or contains branch out of the expressions. - CodeGenFunction::CleanupDeactivationScope scope(CGF); + // initializers throws an exception. + SmallVector Cleanups; + llvm::Instruction *CleanupDominator = nullptr; CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin(); for (LambdaExpr::const_capture_init_iterator i = E->capture_init_begin(), @@ -1396,12 +1395,28 @@ AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) { if (QualType::DestructionKind DtorKind = CurField->getType().isDestructedType()) { assert(LV.isSimple()); - if (DtorKind) - CGF.pushDestroyAndDeferDeactivation( - NormalAndEHCleanup, LV.getAddress(CGF), CurField->getType(), - CGF.getDestroyer(DtorKind), false); + if (CGF.needsEHCleanup(DtorKind)) { + if (!CleanupDominator) + CleanupDominator = CGF.Builder.CreateAlignedLoad( + CGF.Int8Ty, + llvm::Constant::getNullValue(CGF.Int8PtrTy), + CharUnits::One()); // placeholder + + CGF.pushDestroy(EHCleanup, LV.getAddress(CGF), CurField->getType(), + CGF.getDestroyer(DtorKind), false); + Cleanups.push_back(CGF.EHStack.stable_begin()); + } } } + + // Deactivate all the partial cleanups in reverse order, which + // generally means popping them. + for (unsigned i = Cleanups.size(); i != 0; --i) + CGF.DeactivateCleanupBlock(Cleanups[i-1], CleanupDominator); + + // Destroy the placeholder if we made one. + if (CleanupDominator) + CleanupDominator->eraseFromParent(); } void AggExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) { @@ -1690,7 +1705,14 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( // We'll need to enter cleanup scopes in case any of the element // initializers throws an exception. SmallVector cleanups; - CodeGenFunction::CleanupDeactivationScope DeactivateCleanups(CGF); + llvm::Instruction *cleanupDominator = nullptr; + auto addCleanup = [&](const EHScopeStack::stable_iterator &cleanup) { + cleanups.push_back(cleanup); + if (!cleanupDominator) // create placeholder once needed + cleanupDominator = CGF.Builder.CreateAlignedLoad( + CGF.Int8Ty, llvm::Constant::getNullValue(CGF.Int8PtrTy), + CharUnits::One()); + }; unsigned curInitIndex = 0; @@ -1713,8 +1735,10 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( CGF.EmitAggExpr(InitExprs[curInitIndex++], AggSlot); if (QualType::DestructionKind dtorKind = - Base.getType().isDestructedType()) - CGF.pushDestroyAndDeferDeactivation(dtorKind, V, Base.getType()); + Base.getType().isDestructedType()) { + CGF.pushDestroy(dtorKind, V, Base.getType()); + addCleanup(CGF.EHStack.stable_begin()); + } } } @@ -1789,10 +1813,10 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( if (QualType::DestructionKind dtorKind = field->getType().isDestructedType()) { assert(LV.isSimple()); - if (dtorKind) { - CGF.pushDestroyAndDeferDeactivation( - NormalAndEHCleanup, LV.getAddress(CGF), field->getType(), - CGF.getDestroyer(dtorKind), false); + if (CGF.needsEHCleanup(dtorKind)) { + CGF.pushDestroy(EHCleanup, LV.getAddress(CGF), field->getType(), + CGF.getDestroyer(dtorKind), false); + addCleanup(CGF.EHStack.stable_begin()); pushedCleanup = true; } } @@ -1805,6 +1829,17 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( if (GEP->use_empty()) GEP->eraseFromParent(); } + + // Deactivate all the partial cleanups in reverse order, which + // generally means popping them. + assert((cleanupDominator || cleanups.empty()) && + "Missing cleanupDominator before deactivating cleanup blocks"); + for (unsigned i = cleanups.size(); i != 0; --i) + CGF.DeactivateCleanupBlock(cleanups[i-1], cleanupDominator); + + // Destroy the placeholder if we made one. + if (cleanupDominator) + cleanupDominator->eraseFromParent(); } void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E, diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index a88b29b326bb92..a4fb673284ceca 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -1008,8 +1008,8 @@ void CodeGenFunction::EmitNewArrayInitializer( const Expr *Init = E->getInitializer(); Address EndOfInit = Address::invalid(); QualType::DestructionKind DtorKind = ElementType.isDestructedType(); - CleanupDeactivationScope deactivation(*this); - bool pushedCleanup = false; + EHScopeStack::stable_iterator Cleanup; + llvm::Instruction *CleanupDominator = nullptr; CharUnits ElementSize = getContext().getTypeSizeInChars(ElementType); CharUnits ElementAlign = @@ -1105,24 +1105,19 @@ void CodeGenFunction::EmitNewArrayInitializer( } // Enter a partial-destruction Cleanup if necessary. - if (DtorKind) { - AllocaTrackerRAII AllocaTracker(*this); + if (needsEHCleanup(DtorKind)) { // In principle we could tell the Cleanup where we are more // directly, but the control flow can get so varied here that it // would actually be quite complex. Therefore we go through an // alloca. - llvm::Instruction *DominatingIP = - Builder.CreateFlagLoad(llvm::ConstantInt::getNullValue(Int8PtrTy)); EndOfInit = CreateTempAlloca(BeginPtr.getType(), getPointerAlign(), "array.init.end"); + CleanupDominator = + Builder.CreateStore(BeginPtr.emitRawPointer(*this), EndOfInit); pushIrregularPartialArrayCleanup(BeginPtr.emitRawPointer(*this), EndOfInit, ElementType, ElementAlign, getDestroyer(DtorKind)); - cast(*EHStack.find(EHStack.stable_begin())) - .AddAuxAllocas(AllocaTracker.Take()); - DeferredDeactivationCleanupStack.push_back( - {EHStack.stable_begin(), DominatingIP}); - pushedCleanup = true; + Cleanup = EHStack.stable_begin(); } CharUnits StartAlign = CurPtr.getAlignment(); @@ -1169,6 +1164,9 @@ void CodeGenFunction::EmitNewArrayInitializer( // initialization. llvm::ConstantInt *ConstNum = dyn_cast(NumElements); if (ConstNum && ConstNum->getZExtValue() <= InitListElements) { + // If there was a Cleanup, deactivate it. + if (CleanupDominator) + DeactivateCleanupBlock(Cleanup, CleanupDominator); return; } @@ -1283,14 +1281,13 @@ void CodeGenFunction::EmitNewArrayInitializer( Builder.CreateStore(CurPtr.emitRawPointer(*this), EndOfInit); // Enter a partial-destruction Cleanup if necessary. - if (!pushedCleanup && needsEHCleanup(DtorKind)) { - llvm::Instruction *DominatingIP = - Builder.CreateFlagLoad(llvm::ConstantInt::getNullValue(Int8PtrTy)); - pushRegularPartialArrayCleanup(BeginPtr.emitRawPointer(*this), - CurPtr.emitRawPointer(*this), ElementType, + if (!CleanupDominator && needsEHCleanup(DtorKind)) { + llvm::Value *BeginPtrRaw = BeginPtr.emitRawPointer(*this); + llvm::Value *CurPtrRaw = CurPtr.emitRawPointer(*this); + pushRegularPartialArrayCleanup(BeginPtrRaw, CurPtrRaw, ElementType, ElementAlign, getDestroyer(DtorKind)); - DeferredDeactivationCleanupStack.push_back( - {EHStack.stable_begin(), DominatingIP}); + Cleanup = EHStack.stable_begin(); + CleanupDominator = Builder.CreateUnreachable(); } // Emit the initializer into this element. @@ -1298,7 +1295,10 @@ void CodeGenFunction::EmitNewArrayInitializer( AggValueSlot::DoesNotOverlap); // Leave the Cleanup if we entered one. - deactivation.ForceDeactivate(); + if (CleanupDominator) { + DeactivateCleanupBlock(Cleanup, CleanupDominator); + CleanupDominator->eraseFromParent(); + } // Advance to the next element by adjusting the pointer type as necessary. llvm::Value *NextPtr = Builder.CreateConstInBoundsGEP1_32( diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 59ba03c6b86253..eb716520e5ff56 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -3466,7 +3466,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective( case CudaArch::SM_20: case CudaArch::SM_21: case CudaArch::SM_30: - case CudaArch::SM_32: + case CudaArch::SM_32_: case CudaArch::SM_35: case CudaArch::SM_37: case CudaArch::SM_50: diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 87766a758311d5..86a6ddd80cc114 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -91,8 +91,6 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) CodeGenFunction::~CodeGenFunction() { assert(LifetimeExtendedCleanupStack.empty() && "failed to emit a cleanup"); - assert(DeferredDeactivationCleanupStack.empty() && - "missed to deactivate a cleanup"); if (getLangOpts().OpenMP && CurFn) CGM.getOpenMPRuntime().functionFinished(*this); @@ -348,10 +346,6 @@ static void EmitIfUsed(CodeGenFunction &CGF, llvm::BasicBlock *BB) { void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { assert(BreakContinueStack.empty() && "mismatched push/pop in break/continue stack!"); - assert(LifetimeExtendedCleanupStack.empty() && - "mismatched push/pop of cleanups in EHStack!"); - assert(DeferredDeactivationCleanupStack.empty() && - "mismatched activate/deactivate of cleanups!"); bool OnlySimpleReturnStmts = NumSimpleReturnExprs > 0 && NumSimpleReturnExprs == NumReturnExprs diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index d99188671f1f60..ff1873325d409f 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -39,7 +39,6 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/SanitizerStats.h" @@ -671,51 +670,6 @@ class CodeGenFunction : public CodeGenTypeCache { EHScopeStack EHStack; llvm::SmallVector LifetimeExtendedCleanupStack; - - // A stack of cleanups which were added to EHStack but have to be deactivated - // later before being popped or emitted. These are usually deactivated on - // exiting a `CleanupDeactivationScope` scope. For instance, after a - // full-expr. - // - // These are specially useful for correctly emitting cleanups while - // encountering branches out of expression (through stmt-expr or coroutine - // suspensions). - struct DeferredDeactivateCleanup { - EHScopeStack::stable_iterator Cleanup; - llvm::Instruction *DominatingIP; - }; - llvm::SmallVector DeferredDeactivationCleanupStack; - - // Enters a new scope for capturing cleanups which are deferred to be - // deactivated, all of which will be deactivated once the scope is exited. - struct CleanupDeactivationScope { - CodeGenFunction &CGF; - size_t OldDeactivateCleanupStackSize; - bool Deactivated; - CleanupDeactivationScope(CodeGenFunction &CGF) - : CGF(CGF), OldDeactivateCleanupStackSize( - CGF.DeferredDeactivationCleanupStack.size()), - Deactivated(false) {} - - void ForceDeactivate() { - assert(!Deactivated && "Deactivating already deactivated scope"); - auto &Stack = CGF.DeferredDeactivationCleanupStack; - for (size_t I = Stack.size(); I > OldDeactivateCleanupStackSize; I--) { - CGF.DeactivateCleanupBlock(Stack[I - 1].Cleanup, - Stack[I - 1].DominatingIP); - Stack[I - 1].DominatingIP->eraseFromParent(); - } - Stack.resize(OldDeactivateCleanupStackSize); - Deactivated = true; - } - - ~CleanupDeactivationScope() { - if (Deactivated) - return; - ForceDeactivate(); - } - }; - llvm::SmallVector SEHTryEpilogueStack; llvm::Instruction *CurrentFuncletPad = nullptr; @@ -921,19 +875,6 @@ class CodeGenFunction : public CodeGenTypeCache { new (Buffer + sizeof(Header) + sizeof(T)) RawAddress(ActiveFlag); } - // Push a cleanup onto EHStack and deactivate it later. It is usually - // deactivated when exiting a `CleanupDeactivationScope` (for example: after a - // full expression). - template - void pushCleanupAndDeferDeactivation(CleanupKind Kind, As... A) { - // Placeholder dominating IP for this cleanup. - llvm::Instruction *DominatingIP = - Builder.CreateFlagLoad(llvm::Constant::getNullValue(Int8PtrTy)); - EHStack.pushCleanup(Kind, A...); - DeferredDeactivationCleanupStack.push_back( - {EHStack.stable_begin(), DominatingIP}); - } - /// Set up the last cleanup that was pushed as a conditional /// full-expression cleanup. void initFullExprCleanup() { @@ -957,8 +898,7 @@ class CodeGenFunction : public CodeGenTypeCache { /// PopCleanupBlock - Will pop the cleanup entry on the stack and /// process all branch fixups. - void PopCleanupBlock(bool FallThroughIsBranchThrough = false, - bool ForDeactivation = false); + void PopCleanupBlock(bool FallThroughIsBranchThrough = false); /// DeactivateCleanupBlock - Deactivates the given cleanup block. /// The block cannot be reactivated. Pops it if it's the top of the @@ -986,7 +926,6 @@ class CodeGenFunction : public CodeGenTypeCache { class RunCleanupsScope { EHScopeStack::stable_iterator CleanupStackDepth, OldCleanupScopeDepth; size_t LifetimeExtendedCleanupStackSize; - CleanupDeactivationScope DeactivateCleanups; bool OldDidCallStackSave; protected: bool PerformCleanup; @@ -1001,7 +940,8 @@ class CodeGenFunction : public CodeGenTypeCache { public: /// Enter a new cleanup scope. explicit RunCleanupsScope(CodeGenFunction &CGF) - : DeactivateCleanups(CGF), PerformCleanup(true), CGF(CGF) { + : PerformCleanup(true), CGF(CGF) + { CleanupStackDepth = CGF.EHStack.stable_begin(); LifetimeExtendedCleanupStackSize = CGF.LifetimeExtendedCleanupStack.size(); @@ -1031,7 +971,6 @@ class CodeGenFunction : public CodeGenTypeCache { void ForceCleanup(std::initializer_list ValuesToReload = {}) { assert(PerformCleanup && "Already forced cleanup"); CGF.DidCallStackSave = OldDidCallStackSave; - DeactivateCleanups.ForceDeactivate(); CGF.PopCleanupBlocks(CleanupStackDepth, LifetimeExtendedCleanupStackSize, ValuesToReload); PerformCleanup = false; @@ -2221,11 +2160,6 @@ class CodeGenFunction : public CodeGenTypeCache { Address addr, QualType type); void pushDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray); - void pushDestroyAndDeferDeactivation(QualType::DestructionKind dtorKind, - Address addr, QualType type); - void pushDestroyAndDeferDeactivation(CleanupKind cleanupKind, Address addr, - QualType type, Destroyer *destroyer, - bool useEHCleanupForArray); void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray); @@ -2764,33 +2698,6 @@ class CodeGenFunction : public CodeGenTypeCache { TBAAAccessInfo *TBAAInfo = nullptr); LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy); -private: - struct AllocaTracker { - void Add(llvm::AllocaInst *I) { Allocas.push_back(I); } - llvm::SmallVector Take() { return std::move(Allocas); } - - private: - llvm::SmallVector Allocas; - }; - AllocaTracker *Allocas = nullptr; - -public: - // Captures all the allocas created during the scope of its RAII object. - struct AllocaTrackerRAII { - AllocaTrackerRAII(CodeGenFunction &CGF) - : CGF(CGF), OldTracker(CGF.Allocas) { - CGF.Allocas = &Tracker; - } - ~AllocaTrackerRAII() { CGF.Allocas = OldTracker; } - - llvm::SmallVector Take() { return Tracker.Take(); } - - private: - CodeGenFunction &CGF; - AllocaTracker *OldTracker; - AllocaTracker Tracker; - }; - /// CreateTempAlloca - This creates an alloca and inserts it into the entry /// block if \p ArraySize is nullptr, otherwise inserts it at the current /// insertion point of the builder. The caller is responsible for setting an diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index b1dd7c4372d475..96b3cc3bb8ffb1 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -68,8 +68,10 @@ static void getRISCFeaturesFromMcpu(const Driver &D, const Arg *A, << A->getSpelling() << Mcpu; } - if (llvm::RISCV::hasFastUnalignedAccess(Mcpu)) - Features.push_back("+fast-unaligned-access"); + if (llvm::RISCV::hasFastUnalignedAccess(Mcpu)) { + Features.push_back("+unaligned-scalar-mem"); + Features.push_back("+unaligned-vector-mem"); + } } void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple, @@ -168,12 +170,16 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple, } // Android requires fast unaligned access on RISCV64. - if (Triple.isAndroid()) - Features.push_back("+fast-unaligned-access"); + if (Triple.isAndroid()) { + Features.push_back("+unaligned-scalar-mem"); + Features.push_back("+unaligned-vector-mem"); + } // -mstrict-align is default, unless -mno-strict-align is specified. AddTargetFeature(Args, Features, options::OPT_mno_strict_align, - options::OPT_mstrict_align, "fast-unaligned-access"); + options::OPT_mstrict_align, "unaligned-scalar-mem"); + AddTargetFeature(Args, Features, options::OPT_mno_strict_align, + options::OPT_mstrict_align, "unaligned-vector-mem"); // Now add any that the user explicitly requested on the command line, // which may override the defaults. diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 2b934234b7cf5d..c881b37507771a 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -28,6 +28,7 @@ #include "clang/Sema/Scope.h" #include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaDiagnostic.h" +#include "clang/Sema/SemaOpenMP.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" @@ -2383,7 +2384,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS, } if (getLangOpts().OpenMP) - Actions.startOpenMPCXXRangeFor(); + Actions.OpenMP().startOpenMPCXXRangeFor(); if (Tok.is(tok::l_brace)) FRI->RangeExpr = ParseBraceInitializer(); else diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 473ec9afd60181..32d96f81c4c8de 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -31,6 +31,7 @@ #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "clang/Sema/SemaCUDA.h" +#include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/SemaSYCL.h" #include "clang/Sema/TypoCorrection.h" #include "llvm/ADT/SmallVector.h" @@ -2075,7 +2076,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { // replace this call to ActOnOpenACCArraySectionExpr in the future. // Eventually we'll genericize the OPenMPArraySectionExpr type as // well. - LHS = Actions.ActOnOMPArraySectionExpr( + LHS = Actions.OpenMP().ActOnOMPArraySectionExpr( LHS.get(), Loc, ArgExprs.empty() ? nullptr : ArgExprs[0], ColonLocFirst, ColonLocSecond, Length.get(), Stride.get(), RLoc); } else { @@ -3277,7 +3278,7 @@ Parser::ParseParenExpression(ParenParseOption &ExprType, bool stopIfCastExpr, if (ErrorFound) { Result = ExprError(); } else if (!Result.isInvalid()) { - Result = Actions.ActOnOMPArrayShapingExpr( + Result = Actions.OpenMP().ActOnOMPArrayShapingExpr( Result.get(), OpenLoc, RParenLoc, OMPDimensions, OMPBracketsRanges); } return Result; diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp index 91f2b8afcf0c24..123be476e928ee 100644 --- a/clang/lib/Parse/ParseOpenACC.cpp +++ b/clang/lib/Parse/ParseOpenACC.cpp @@ -835,19 +835,23 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams( case OpenACCClauseKind::Default: { Token DefKindTok = getCurToken(); - if (expectIdentifierOrKeyword(*this)) - break; + if (expectIdentifierOrKeyword(*this)) { + Parens.skipToEnd(); + return OpenACCCanContinue(); + } ConsumeToken(); OpenACCDefaultClauseKind DefKind = getOpenACCDefaultClauseKind(DefKindTok); - if (DefKind == OpenACCDefaultClauseKind::Invalid) + if (DefKind == OpenACCDefaultClauseKind::Invalid) { Diag(DefKindTok, diag::err_acc_invalid_default_clause_kind); - else - ParsedClause.setDefaultDetails(DefKind); + Parens.skipToEnd(); + return OpenACCCanContinue(); + } + ParsedClause.setDefaultDetails(DefKind); break; } case OpenACCClauseKind::If: { @@ -977,6 +981,8 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams( case OpenACCClauseKind::Self: { assert(DirKind != OpenACCDirectiveKind::Update); ExprResult CondExpr = ParseOpenACCConditionExpr(); + ParsedClause.setConditionDetails(CondExpr.isUsable() ? CondExpr.get() + : nullptr); if (CondExpr.isInvalid()) { Parens.skipToEnd(); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 814126e321d3bc..480201bc06f613 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -21,6 +21,7 @@ #include "clang/Parse/RAIIObjectsForParser.h" #include "clang/Sema/EnterExpressionEvaluationContext.h" #include "clang/Sema/Scope.h" +#include "clang/Sema/SemaOpenMP.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/UniqueVector.h" @@ -87,7 +88,7 @@ class DeclDirectiveListParserHelper final { DeclDirectiveListParserHelper(Parser *P, OpenMPDirectiveKind Kind) : P(P), Kind(Kind) {} void operator()(CXXScopeSpec &SS, DeclarationNameInfo NameInfo) { - ExprResult Res = P->getActions().ActOnOpenMPIdExpression( + ExprResult Res = P->getActions().OpenMP().ActOnOpenMPIdExpression( P->getCurScope(), SS, NameInfo, Kind); if (Res.isUsable()) Identifiers.push_back(Res.get()); @@ -322,8 +323,8 @@ Parser::ParseOpenMPDeclareReductionDirective(AccessSpecifier AS) { SourceRange Range; TypeResult TR = ParseTypeName(&Range, DeclaratorContext::Prototype, AS); if (TR.isUsable()) { - QualType ReductionType = - Actions.ActOnOpenMPDeclareReductionType(Range.getBegin(), TR); + QualType ReductionType = Actions.OpenMP().ActOnOpenMPDeclareReductionType( + Range.getBegin(), TR); if (!ReductionType.isNull()) { ReductionTypes.push_back( std::make_pair(ReductionType, Range.getBegin())); @@ -363,8 +364,10 @@ Parser::ParseOpenMPDeclareReductionDirective(AccessSpecifier AS) { return DeclGroupPtrTy(); } - DeclGroupPtrTy DRD = Actions.ActOnOpenMPDeclareReductionDirectiveStart( - getCurScope(), Actions.getCurLexicalContext(), Name, ReductionTypes, AS); + DeclGroupPtrTy DRD = + Actions.OpenMP().ActOnOpenMPDeclareReductionDirectiveStart( + getCurScope(), Actions.getCurLexicalContext(), Name, ReductionTypes, + AS); // Parse expression and then parse initializer if any for each // correct type. @@ -375,10 +378,11 @@ Parser::ParseOpenMPDeclareReductionDirective(AccessSpecifier AS) { Scope::CompoundStmtScope | Scope::OpenMPDirectiveScope); // Parse expression. - Actions.ActOnOpenMPDeclareReductionCombinerStart(getCurScope(), D); + Actions.OpenMP().ActOnOpenMPDeclareReductionCombinerStart(getCurScope(), D); ExprResult CombinerResult = Actions.ActOnFinishFullExpr( ParseExpression().get(), D->getLocation(), /*DiscardedValue*/ false); - Actions.ActOnOpenMPDeclareReductionCombinerEnd(D, CombinerResult.get()); + Actions.OpenMP().ActOnOpenMPDeclareReductionCombinerEnd( + D, CombinerResult.get()); if (CombinerResult.isInvalid() && Tok.isNot(tok::r_paren) && Tok.isNot(tok::annot_pragma_openmp_end)) { @@ -411,8 +415,8 @@ Parser::ParseOpenMPDeclareReductionDirective(AccessSpecifier AS) { Scope::OpenMPDirectiveScope); // Parse expression. VarDecl *OmpPrivParm = - Actions.ActOnOpenMPDeclareReductionInitializerStart(getCurScope(), - D); + Actions.OpenMP().ActOnOpenMPDeclareReductionInitializerStart( + getCurScope(), D); // Check if initializer is omp_priv or something else. if (Tok.is(tok::identifier) && Tok.getIdentifierInfo()->isStr("omp_priv")) { @@ -423,7 +427,7 @@ Parser::ParseOpenMPDeclareReductionDirective(AccessSpecifier AS) { ParseAssignmentExpression().get(), D->getLocation(), /*DiscardedValue*/ false); } - Actions.ActOnOpenMPDeclareReductionInitializerEnd( + Actions.OpenMP().ActOnOpenMPDeclareReductionInitializerEnd( D, InitializerResult.get(), OmpPrivParm); if (InitializerResult.isInvalid() && Tok.isNot(tok::r_paren) && Tok.isNot(tok::annot_pragma_openmp_end)) { @@ -444,8 +448,8 @@ Parser::ParseOpenMPDeclareReductionDirective(AccessSpecifier AS) { else TPA.Commit(); } - return Actions.ActOnOpenMPDeclareReductionDirectiveEnd(getCurScope(), DRD, - IsCorrect); + return Actions.OpenMP().ActOnOpenMPDeclareReductionDirectiveEnd( + getCurScope(), DRD, IsCorrect); } void Parser::ParseOpenMPReductionInitializerForDecl(VarDecl *OmpPrivParm) { @@ -569,8 +573,8 @@ Parser::ParseOpenMPDeclareMapperDirective(AccessSpecifier AS) { SourceRange Range; TypeResult ParsedType = parseOpenMPDeclareMapperVarDecl(Range, VName, AS); if (ParsedType.isUsable()) - MapperType = - Actions.ActOnOpenMPDeclareMapperType(Range.getBegin(), ParsedType); + MapperType = Actions.OpenMP().ActOnOpenMPDeclareMapperType(Range.getBegin(), + ParsedType); if (MapperType.isNull()) IsCorrect = false; if (!IsCorrect) { @@ -591,11 +595,13 @@ Parser::ParseOpenMPDeclareMapperDirective(AccessSpecifier AS) { unsigned ScopeFlags = Scope::FnScope | Scope::DeclScope | Scope::CompoundStmtScope | Scope::OpenMPDirectiveScope; ParseScope OMPDirectiveScope(this, ScopeFlags); - Actions.StartOpenMPDSABlock(OMPD_declare_mapper, DirName, getCurScope(), Loc); + Actions.OpenMP().StartOpenMPDSABlock(OMPD_declare_mapper, DirName, + getCurScope(), Loc); // Add the mapper variable declaration. - ExprResult MapperVarRef = Actions.ActOnOpenMPDeclareMapperDirectiveVarDecl( - getCurScope(), MapperType, Range.getBegin(), VName); + ExprResult MapperVarRef = + Actions.OpenMP().ActOnOpenMPDeclareMapperDirectiveVarDecl( + getCurScope(), MapperType, Range.getBegin(), VName); // Parse map clauses. SmallVector Clauses; @@ -603,7 +609,7 @@ Parser::ParseOpenMPDeclareMapperDirective(AccessSpecifier AS) { OpenMPClauseKind CKind = Tok.isAnnotation() ? OMPC_unknown : getOpenMPClauseKind(PP.getSpelling(Tok)); - Actions.StartOpenMPClause(CKind); + Actions.OpenMP().StartOpenMPClause(CKind); OMPClause *Clause = ParseOpenMPClause(OMPD_declare_mapper, CKind, Clauses.empty()); if (Clause) @@ -613,7 +619,7 @@ Parser::ParseOpenMPDeclareMapperDirective(AccessSpecifier AS) { // Skip ',' if any. if (Tok.is(tok::comma)) ConsumeToken(); - Actions.EndOpenMPClause(); + Actions.OpenMP().EndOpenMPClause(); } if (Clauses.empty()) { Diag(Tok, diag::err_omp_expected_clause) @@ -622,9 +628,9 @@ Parser::ParseOpenMPDeclareMapperDirective(AccessSpecifier AS) { } // Exit scope. - Actions.EndOpenMPDSABlock(nullptr); + Actions.OpenMP().EndOpenMPDSABlock(nullptr); OMPDirectiveScope.Exit(); - DeclGroupPtrTy DG = Actions.ActOnOpenMPDeclareMapperDirective( + DeclGroupPtrTy DG = Actions.OpenMP().ActOnOpenMPDeclareMapperDirective( getCurScope(), Actions.getCurLexicalContext(), MapperId, MapperType, Range.getBegin(), VName, AS, MapperVarRef.get(), Clauses); if (!IsCorrect) @@ -652,7 +658,8 @@ TypeResult Parser::parseOpenMPDeclareMapperVarDecl(SourceRange &Range, } Name = Actions.GetNameForDeclarator(DeclaratorInfo).getName(); - return Actions.ActOnOpenMPDeclareMapperVarDecl(getCurScope(), DeclaratorInfo); + return Actions.OpenMP().ActOnOpenMPDeclareMapperVarDecl(getCurScope(), + DeclaratorInfo); } namespace { @@ -748,7 +755,7 @@ static bool parseDeclareSimdClauses( OpenMPClauseKind CKind = getOpenMPClauseKind(ClauseName); if (CKind == OMPC_uniform || CKind == OMPC_aligned || CKind == OMPC_linear) { - Sema::OpenMPVarListDataTy Data; + SemaOpenMP::OpenMPVarListDataTy Data; SmallVectorImpl *Vars = &Uniforms; if (CKind == OMPC_aligned) { Vars = &Aligneds; @@ -768,7 +775,7 @@ static bool parseDeclareSimdClauses( assert(0 <= Data.ExtraModifier && Data.ExtraModifier <= OMPC_LINEAR_unknown && "Unexpected linear modifier."); - if (P.getActions().CheckOpenMPLinearModifier( + if (P.getActions().OpenMP().CheckOpenMPLinearModifier( static_cast(Data.ExtraModifier), Data.ExtraModifierLoc)) Data.ExtraModifier = OMPC_LINEAR_val; @@ -816,7 +823,7 @@ Parser::ParseOMPDeclareSimdClauses(Parser::DeclGroupPtrTy Ptr, SourceLocation EndLoc = ConsumeAnnotationToken(); if (IsError) return Ptr; - return Actions.ActOnOpenMPDeclareSimdDirective( + return Actions.OpenMP().ActOnOpenMPDeclareSimdDirective( Ptr, BS, Simdlen.get(), Uniforms, Aligneds, Alignments, Linears, LinModifiers, Steps, SourceRange(Loc, EndLoc)); } @@ -1412,7 +1419,8 @@ void Parser::ParseOMPDeclareVariantClauses(Parser::DeclGroupPtrTy Ptr, return; } - OMPTraitInfo *ParentTI = Actions.getOMPTraitInfoForSurroundingScope(); + OMPTraitInfo *ParentTI = + Actions.OpenMP().getOMPTraitInfoForSurroundingScope(); ASTContext &ASTCtx = Actions.getASTContext(); OMPTraitInfo &TI = ASTCtx.getNewOMPTraitInfo(); SmallVector AdjustNothing; @@ -1445,7 +1453,7 @@ void Parser::ParseOMPDeclareVariantClauses(Parser::DeclGroupPtrTy Ptr, case OMPC_adjust_args: { AdjustArgsLoc = Tok.getLocation(); ConsumeToken(); - Sema::OpenMPVarListDataTy Data; + SemaOpenMP::OpenMPVarListDataTy Data; SmallVector Vars; IsError = ParseOpenMPVarList(OMPD_declare_variant, OMPC_adjust_args, Vars, Data); @@ -1486,12 +1494,12 @@ void Parser::ParseOMPDeclareVariantClauses(Parser::DeclGroupPtrTy Ptr, } std::optional> DeclVarData = - Actions.checkOpenMPDeclareVariantFunction( + Actions.OpenMP().checkOpenMPDeclareVariantFunction( Ptr, AssociatedFunction.get(), TI, AppendArgs.size(), SourceRange(Loc, Tok.getLocation())); if (DeclVarData && !TI.Sets.empty()) - Actions.ActOnOpenMPDeclareVariantDirective( + Actions.OpenMP().ActOnOpenMPDeclareVariantDirective( DeclVarData->first, DeclVarData->second, TI, AdjustNothing, AdjustNeedDevicePtr, AppendArgs, AdjustArgsLoc, AppendArgsLoc, SourceRange(Loc, Tok.getLocation())); @@ -1642,7 +1650,7 @@ void Parser::ParseOpenMPClauses(OpenMPDirectiveKind DKind, OpenMPClauseKind CKind = Tok.isAnnotation() ? OMPC_unknown : getOpenMPClauseKind(PP.getSpelling(Tok)); - Actions.StartOpenMPClause(CKind); + Actions.OpenMP().StartOpenMPClause(CKind); OMPClause *Clause = ParseOpenMPClause( DKind, CKind, !FirstClauses[unsigned(CKind)].getInt()); SkipUntil(tok::comma, tok::identifier, tok::annot_pragma_openmp_end, @@ -1651,13 +1659,13 @@ void Parser::ParseOpenMPClauses(OpenMPDirectiveKind DKind, if (Clause != nullptr) Clauses.push_back(Clause); if (Tok.is(tok::annot_pragma_openmp_end)) { - Actions.EndOpenMPClause(); + Actions.OpenMP().EndOpenMPClause(); break; } // Skip ',' if any. if (Tok.is(tok::comma)) ConsumeToken(); - Actions.EndOpenMPClause(); + Actions.OpenMP().EndOpenMPClause(); } } @@ -1750,12 +1758,13 @@ void Parser::ParseOpenMPAssumesDirective(OpenMPDirectiveKind DKind, Assumptions.push_back(Assumption); } - Actions.ActOnOpenMPAssumesDirective(Loc, DKind, Assumptions, SkippedClauses); + Actions.OpenMP().ActOnOpenMPAssumesDirective(Loc, DKind, Assumptions, + SkippedClauses); } void Parser::ParseOpenMPEndAssumesDirective(SourceLocation Loc) { - if (Actions.isInOpenMPAssumeScope()) - Actions.ActOnOpenMPEndAssumesDirective(); + if (Actions.OpenMP().isInOpenMPAssumeScope()) + Actions.OpenMP().ActOnOpenMPEndAssumesDirective(); else Diag(Loc, diag::err_expected_begin_assumes); } @@ -1811,7 +1820,7 @@ parseOpenMPSimpleClause(Parser &P, OpenMPClauseKind Kind) { } void Parser::ParseOMPDeclareTargetClauses( - Sema::DeclareTargetContextInfo &DTCI) { + SemaOpenMP::DeclareTargetContextInfo &DTCI) { SourceLocation DeviceTypeLoc; bool RequiresToOrLinkOrIndirectClause = false; bool HasToOrLinkOrIndirectClause = false; @@ -1910,11 +1919,11 @@ void Parser::ParseOMPDeclareTargetClauses( if (DTCI.Kind == OMPD_declare_target || HasIdentifier) { auto &&Callback = [this, MT, &DTCI](CXXScopeSpec &SS, DeclarationNameInfo NameInfo) { - NamedDecl *ND = - Actions.lookupOpenMPDeclareTargetName(getCurScope(), SS, NameInfo); + NamedDecl *ND = Actions.OpenMP().lookupOpenMPDeclareTargetName( + getCurScope(), SS, NameInfo); if (!ND) return; - Sema::DeclareTargetContextInfo::MapInfo MI{MT, NameInfo.getLoc()}; + SemaOpenMP::DeclareTargetContextInfo::MapInfo MI{MT, NameInfo.getLoc()}; bool FirstMapping = DTCI.ExplicitlyMapped.try_emplace(ND, MI).second; if (!FirstMapping) Diag(NameInfo.getLoc(), diag::err_omp_declare_target_multiple) @@ -2090,8 +2099,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( skipUntilPragmaOpenMPEnd(DKind); // Skip the last annot_pragma_openmp_end. ConsumeAnnotationToken(); - return Actions.ActOnOpenMPThreadprivateDirective(Loc, - Helper.getIdentifiers()); + return Actions.OpenMP().ActOnOpenMPThreadprivateDirective( + Loc, Helper.getIdentifiers()); } break; } @@ -2109,7 +2118,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( OpenMPClauseKind CKind = Tok.isAnnotation() ? OMPC_unknown : getOpenMPClauseKind(PP.getSpelling(Tok)); - Actions.StartOpenMPClause(CKind); + Actions.OpenMP().StartOpenMPClause(CKind); OMPClause *Clause = ParseOpenMPClause( OMPD_allocate, CKind, !FirstClauses[unsigned(CKind)].getInt()); SkipUntil(tok::comma, tok::identifier, tok::annot_pragma_openmp_end, @@ -2118,20 +2127,20 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( if (Clause != nullptr) Clauses.push_back(Clause); if (Tok.is(tok::annot_pragma_openmp_end)) { - Actions.EndOpenMPClause(); + Actions.OpenMP().EndOpenMPClause(); break; } // Skip ',' if any. if (Tok.is(tok::comma)) ConsumeToken(); - Actions.EndOpenMPClause(); + Actions.OpenMP().EndOpenMPClause(); } skipUntilPragmaOpenMPEnd(DKind); } // Skip the last annot_pragma_openmp_end. ConsumeAnnotationToken(); - return Actions.ActOnOpenMPAllocateDirective(Loc, Helper.getIdentifiers(), - Clauses); + return Actions.OpenMP().ActOnOpenMPAllocateDirective( + Loc, Helper.getIdentifiers(), Clauses); } break; } @@ -2150,7 +2159,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( OpenMPClauseKind CKind = Tok.isAnnotation() ? OMPC_unknown : getOpenMPClauseKind(PP.getSpelling(Tok)); - Actions.StartOpenMPClause(CKind); + Actions.OpenMP().StartOpenMPClause(CKind); OMPClause *Clause = ParseOpenMPClause( OMPD_requires, CKind, !FirstClauses[unsigned(CKind)].getInt()); SkipUntil(tok::comma, tok::identifier, tok::annot_pragma_openmp_end, @@ -2159,13 +2168,13 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( if (Clause != nullptr) Clauses.push_back(Clause); if (Tok.is(tok::annot_pragma_openmp_end)) { - Actions.EndOpenMPClause(); + Actions.OpenMP().EndOpenMPClause(); break; } // Skip ',' if any. if (Tok.is(tok::comma)) ConsumeToken(); - Actions.EndOpenMPClause(); + Actions.OpenMP().EndOpenMPClause(); } // Consume final annot_pragma_openmp_end if (Clauses.empty()) { @@ -2175,14 +2184,15 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( return nullptr; } ConsumeAnnotationToken(); - return Actions.ActOnOpenMPRequiresDirective(StartLoc, Clauses); + return Actions.OpenMP().ActOnOpenMPRequiresDirective(StartLoc, Clauses); } case OMPD_error: { SmallVector Clauses; SourceLocation StartLoc = ConsumeToken(); ParseOpenMPClauses(DKind, Clauses, StartLoc); - Actions.ActOnOpenMPErrorDirective(Clauses, StartLoc, SourceLocation(), - /*InExContext = */ false); + Actions.OpenMP().ActOnOpenMPErrorDirective(Clauses, StartLoc, + SourceLocation(), + /*InExContext = */ false); break; } case OMPD_assumes: @@ -2217,7 +2227,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( // { #pragma omp end declare variant } // ConsumeToken(); - OMPTraitInfo *ParentTI = Actions.getOMPTraitInfoForSurroundingScope(); + OMPTraitInfo *ParentTI = + Actions.OpenMP().getOMPTraitInfoForSurroundingScope(); ASTContext &ASTCtx = Actions.getASTContext(); OMPTraitInfo &TI = ASTCtx.getNewOMPTraitInfo(); if (parseOMPDeclareVariantMatchClause(Loc, TI, ParentTI)) { @@ -2248,7 +2259,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( /* ConstructTraits */ ArrayRef()); if (isVariantApplicableInContext(VMI, OMPCtx, /* DeviceSetOnly */ true)) { - Actions.ActOnOpenMPBeginDeclareVariant(Loc, TI); + Actions.OpenMP().ActOnOpenMPBeginDeclareVariant(Loc, TI); break; } @@ -2275,8 +2286,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( break; } case OMPD_end_declare_variant: { - if (Actions.isInOpenMPDeclareVariantScope()) - Actions.ActOnOpenMPEndDeclareVariant(); + if (Actions.OpenMP().isInOpenMPDeclareVariantScope()) + Actions.OpenMP().ActOnOpenMPEndDeclareVariant(); else Diag(Loc, diag::err_expected_begin_declare_variant); ConsumeToken(); @@ -2331,7 +2342,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( case OMPD_declare_target: { SourceLocation DTLoc = ConsumeAnyToken(); bool HasClauses = Tok.isNot(tok::annot_pragma_openmp_end); - Sema::DeclareTargetContextInfo DTCI(DKind, DTLoc); + SemaOpenMP::DeclareTargetContextInfo DTCI(DKind, DTLoc); if (HasClauses) ParseOMPDeclareTargetClauses(DTCI); bool HasImplicitMappings = DKind == OMPD_begin_declare_target || @@ -2342,24 +2353,24 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( ConsumeAnyToken(); if (HasImplicitMappings) { - Actions.ActOnStartOpenMPDeclareTargetContext(DTCI); + Actions.OpenMP().ActOnStartOpenMPDeclareTargetContext(DTCI); return nullptr; } - Actions.ActOnFinishedOpenMPDeclareTargetContext(DTCI); + Actions.OpenMP().ActOnFinishedOpenMPDeclareTargetContext(DTCI); llvm::SmallVector Decls; for (auto &It : DTCI.ExplicitlyMapped) Decls.push_back(It.first); return Actions.BuildDeclaratorGroup(Decls); } case OMPD_end_declare_target: { - if (!Actions.isInOpenMPDeclareTargetContext()) { + if (!Actions.OpenMP().isInOpenMPDeclareTargetContext()) { Diag(Tok, diag::err_omp_unexpected_directive) << 1 << getOpenMPDirectiveName(DKind); break; } - const Sema::DeclareTargetContextInfo &DTCI = - Actions.ActOnOpenMPEndDeclareTargetDirective(); + const SemaOpenMP::DeclareTargetContextInfo &DTCI = + Actions.OpenMP().ActOnOpenMPEndDeclareTargetDirective(); ParseOMPEndDeclareTargetDirective(DTCI.Kind, DKind, DTCI.Loc); return nullptr; } @@ -2683,7 +2694,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( if (!ParseOpenMPSimpleVarList(DKind, Helper, /*AllowScopeSpecifier=*/false)) { skipUntilPragmaOpenMPEnd(DKind); - DeclGroupPtrTy Res = Actions.ActOnOpenMPThreadprivateDirective( + DeclGroupPtrTy Res = Actions.OpenMP().ActOnOpenMPThreadprivateDirective( Loc, Helper.getIdentifiers()); Directive = Actions.ActOnDeclStmt(Res, Loc, Tok.getLocation()); } @@ -2710,7 +2721,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( OpenMPClauseKind CKind = Tok.isAnnotation() ? OMPC_unknown : getOpenMPClauseKind(PP.getSpelling(Tok)); - Actions.StartOpenMPClause(CKind); + Actions.OpenMP().StartOpenMPClause(CKind); OMPClause *Clause = ParseOpenMPClause( OMPD_allocate, CKind, !FirstClauses[unsigned(CKind)].getInt()); SkipUntil(tok::comma, tok::identifier, tok::annot_pragma_openmp_end, @@ -2719,17 +2730,17 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( if (Clause != nullptr) Clauses.push_back(Clause); if (Tok.is(tok::annot_pragma_openmp_end)) { - Actions.EndOpenMPClause(); + Actions.OpenMP().EndOpenMPClause(); break; } // Skip ',' if any. if (Tok.is(tok::comma)) ConsumeToken(); - Actions.EndOpenMPClause(); + Actions.OpenMP().EndOpenMPClause(); } skipUntilPragmaOpenMPEnd(DKind); } - DeclGroupPtrTy Res = Actions.ActOnOpenMPAllocateDirective( + DeclGroupPtrTy Res = Actions.OpenMP().ActOnOpenMPAllocateDirective( Loc, Helper.getIdentifiers(), Clauses); Directive = Actions.ActOnDeclStmt(Res, Loc, Tok.getLocation()); } @@ -2875,7 +2886,8 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( if (isOpenMPSimdDirective(DKind)) ScopeFlags |= Scope::OpenMPSimdDirectiveScope; ParseScope OMPDirectiveScope(this, ScopeFlags); - Actions.StartOpenMPDSABlock(DKind, DirName, Actions.getCurScope(), Loc); + Actions.OpenMP().StartOpenMPDSABlock(DKind, DirName, Actions.getCurScope(), + Loc); while (Tok.isNot(tok::annot_pragma_openmp_end)) { // If we are parsing for a directive within a metadirective, the directive @@ -2909,7 +2921,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( } // No more implicit clauses allowed. ImplicitClauseAllowed = false; - Actions.StartOpenMPClause(CKind); + Actions.OpenMP().StartOpenMPClause(CKind); HasImplicitClause = false; OMPClause *Clause = ParseOpenMPClause( DKind, CKind, !FirstClauses[unsigned(CKind)].getInt()); @@ -2922,7 +2934,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( // Skip ',' if any. if (Tok.is(tok::comma)) ConsumeToken(); - Actions.EndOpenMPClause(); + Actions.OpenMP().EndOpenMPClause(); } // End location of the directive. EndLoc = Tok.getLocation(); @@ -2953,7 +2965,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( StmtResult AssociatedStmt; if (HasAssociatedStatement) { // The body is a block scope like in Lambdas and Blocks. - Actions.ActOnOpenMPRegionStart(DKind, getCurScope()); + Actions.OpenMP().ActOnOpenMPRegionStart(DKind, getCurScope()); // FIXME: We create a bogus CompoundStmt scope to hold the contents of // the captured region. Code elsewhere assumes that any FunctionScopeInfo // should have at least one compound statement scope within it. @@ -2964,30 +2976,33 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( if (AssociatedStmt.isUsable() && isOpenMPLoopDirective(DKind) && getLangOpts().OpenMPIRBuilder) - AssociatedStmt = Actions.ActOnOpenMPLoopnest(AssociatedStmt.get()); + AssociatedStmt = + Actions.OpenMP().ActOnOpenMPLoopnest(AssociatedStmt.get()); } - AssociatedStmt = Actions.ActOnOpenMPRegionEnd(AssociatedStmt, Clauses); + AssociatedStmt = + Actions.OpenMP().ActOnOpenMPRegionEnd(AssociatedStmt, Clauses); } else if (DKind == OMPD_target_update || DKind == OMPD_target_enter_data || DKind == OMPD_target_exit_data) { - Actions.ActOnOpenMPRegionStart(DKind, getCurScope()); + Actions.OpenMP().ActOnOpenMPRegionStart(DKind, getCurScope()); AssociatedStmt = (Sema::CompoundScopeRAII(Actions), Actions.ActOnCompoundStmt(Loc, Loc, std::nullopt, /*isStmtExpr=*/false)); - AssociatedStmt = Actions.ActOnOpenMPRegionEnd(AssociatedStmt, Clauses); + AssociatedStmt = + Actions.OpenMP().ActOnOpenMPRegionEnd(AssociatedStmt, Clauses); } - Directive = Actions.ActOnOpenMPExecutableDirective( + Directive = Actions.OpenMP().ActOnOpenMPExecutableDirective( DKind, DirName, CancelRegion, Clauses, AssociatedStmt.get(), Loc, EndLoc); // Exit scope. - Actions.EndOpenMPDSABlock(Directive.get()); + Actions.OpenMP().EndOpenMPDSABlock(Directive.get()); OMPDirectiveScope.Exit(); break; } case OMPD_declare_target: { SourceLocation DTLoc = ConsumeAnyToken(); bool HasClauses = Tok.isNot(tok::annot_pragma_openmp_end); - Sema::DeclareTargetContextInfo DTCI(DKind, DTLoc); + SemaOpenMP::DeclareTargetContextInfo DTCI(DKind, DTLoc); if (HasClauses) ParseOMPDeclareTargetClauses(DTCI); bool HasImplicitMappings = @@ -3003,7 +3018,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( // Skip the last annot_pragma_openmp_end. ConsumeAnyToken(); - Actions.ActOnFinishedOpenMPDeclareTargetContext(DTCI); + Actions.OpenMP().ActOnFinishedOpenMPDeclareTargetContext(DTCI); break; } case OMPD_declare_simd: @@ -3118,7 +3133,7 @@ OMPClause *Parser::ParseOpenMPSizesClause() { T.consumeClose(); - return Actions.ActOnOpenMPSizesClause( + return Actions.OpenMP().ActOnOpenMPSizesClause( ValExprs, ClauseNameLoc, T.getOpenLocation(), T.getCloseLocation()); } @@ -3130,7 +3145,7 @@ OMPClause *Parser::ParseOpenMPUsesAllocatorClause(OpenMPDirectiveKind DKind) { BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end); if (T.expectAndConsume(diag::err_expected_lparen_after, "uses_allocator")) return nullptr; - SmallVector Data; + SmallVector Data; do { CXXScopeSpec SS; Token Replacement; @@ -3144,7 +3159,7 @@ OMPClause *Parser::ParseOpenMPUsesAllocatorClause(OpenMPDirectiveKind DKind) { StopBeforeMatch); break; } - Sema::UsesAllocatorsData &D = Data.emplace_back(); + SemaOpenMP::UsesAllocatorsData &D = Data.emplace_back(); D.Allocator = Allocator.get(); if (Tok.is(tok::l_paren)) { BalancedDelimiterTracker T(*this, tok::l_paren, @@ -3169,8 +3184,8 @@ OMPClause *Parser::ParseOpenMPUsesAllocatorClause(OpenMPDirectiveKind DKind) { ConsumeAnyToken(); } while (Tok.isNot(tok::r_paren) && Tok.isNot(tok::annot_pragma_openmp_end)); T.consumeClose(); - return Actions.ActOnOpenMPUsesAllocatorClause(Loc, T.getOpenLocation(), - T.getCloseLocation(), Data); + return Actions.OpenMP().ActOnOpenMPUsesAllocatorClause( + Loc, T.getOpenLocation(), T.getCloseLocation(), Data); } /// Parsing of OpenMP clauses. @@ -3538,15 +3553,16 @@ OMPClause *Parser::ParseOpenMPSingleExprClause(OpenMPClauseKind Kind, if (ParseOnly) return nullptr; - return Actions.ActOnOpenMPSingleExprClause(Kind, Val.get(), Loc, LLoc, RLoc); + return Actions.OpenMP().ActOnOpenMPSingleExprClause(Kind, Val.get(), Loc, + LLoc, RLoc); } /// Parse indirect clause for '#pragma omp declare target' directive. /// 'indirect' '[' '(' invoked-by-fptr ')' ']' /// where invoked-by-fptr is a constant boolean expression that evaluates to /// true or false at compile time. -bool Parser::ParseOpenMPIndirectClause(Sema::DeclareTargetContextInfo &DTCI, - bool ParseOnly) { +bool Parser::ParseOpenMPIndirectClause( + SemaOpenMP::DeclareTargetContextInfo &DTCI, bool ParseOnly) { SourceLocation Loc = ConsumeToken(); SourceLocation RLoc; @@ -3721,15 +3737,16 @@ OMPClause *Parser::ParseOpenMPInteropClause(OpenMPClauseKind Kind, return nullptr; if (Kind == OMPC_init) - return Actions.ActOnOpenMPInitClause(InteropVarExpr.get(), InteropInfo, Loc, - T.getOpenLocation(), VarLoc, RLoc); + return Actions.OpenMP().ActOnOpenMPInitClause( + InteropVarExpr.get(), InteropInfo, Loc, T.getOpenLocation(), VarLoc, + RLoc); if (Kind == OMPC_use) - return Actions.ActOnOpenMPUseClause(InteropVarExpr.get(), Loc, - T.getOpenLocation(), VarLoc, RLoc); + return Actions.OpenMP().ActOnOpenMPUseClause( + InteropVarExpr.get(), Loc, T.getOpenLocation(), VarLoc, RLoc); if (Kind == OMPC_destroy) - return Actions.ActOnOpenMPDestroyClause(InteropVarExpr.get(), Loc, - T.getOpenLocation(), VarLoc, RLoc); + return Actions.OpenMP().ActOnOpenMPDestroyClause( + InteropVarExpr.get(), Loc, T.getOpenLocation(), VarLoc, RLoc); llvm_unreachable("Unexpected interop variable clause."); } @@ -3787,8 +3804,8 @@ OMPClause *Parser::ParseOpenMPOMPXAttributesClause(bool ParseOnly) { }; } - return Actions.ActOnOpenMPXAttributeClause(Attrs, Loc, T.getOpenLocation(), - T.getCloseLocation()); + return Actions.OpenMP().ActOnOpenMPXAttributeClause( + Attrs, Loc, T.getOpenLocation(), T.getCloseLocation()); } /// Parsing of simple OpenMP clauses like 'default' or 'proc_bind'. @@ -3823,9 +3840,8 @@ OMPClause *Parser::ParseOpenMPSimpleClause(OpenMPClauseKind Kind, << getOpenMPClauseName(OMPC_default) << "5.1"; return nullptr; } - return Actions.ActOnOpenMPSimpleClause(Kind, Val->Type, - Val->TypeLoc, Val->LOpen, - Val->Loc, Val->RLoc); + return Actions.OpenMP().ActOnOpenMPSimpleClause( + Kind, Val->Type, Val->TypeLoc, Val->LOpen, Val->Loc, Val->RLoc); } /// Parsing of OpenMP clauses like 'ordered'. @@ -3860,7 +3876,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPClauseKind Kind, bool ParseOnly) { if (ParseOnly) return nullptr; - return Actions.ActOnOpenMPClause(Kind, Loc, Tok.getLocation()); + return Actions.OpenMP().ActOnOpenMPClause(Kind, Loc, Tok.getLocation()); } /// Parsing of OpenMP clauses with single expressions and some additional @@ -4118,7 +4134,7 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPDirectiveKind DKind, if (ParseOnly) return nullptr; - return Actions.ActOnOpenMPSingleExprWithArgClause( + return Actions.OpenMP().ActOnOpenMPSingleExprWithArgClause( Kind, Arg, Val.get(), Loc, T.getOpenLocation(), KLoc, DelimLoc, RLoc); } @@ -4184,7 +4200,7 @@ static OpenMPMapModifierKind isMapModifier(Parser &P) { } /// Parse the mapper modifier in map, to, and from clauses. -bool Parser::parseMapperModifier(Sema::OpenMPVarListDataTy &Data) { +bool Parser::parseMapperModifier(SemaOpenMP::OpenMPVarListDataTy &Data) { // Parse '('. BalancedDelimiterTracker T(*this, tok::l_paren, tok::colon); if (T.expectAndConsume(diag::err_expected_lparen_after, "mapper")) { @@ -4216,7 +4232,7 @@ bool Parser::parseMapperModifier(Sema::OpenMPVarListDataTy &Data) { /// map([ [map-type-modifier[,] [map-type-modifier[,] ...] map-type : ] list) /// where, map-type-modifier ::= always | close | mapper(mapper-identifier) | /// present -bool Parser::parseMapTypeModifiers(Sema::OpenMPVarListDataTy &Data) { +bool Parser::parseMapTypeModifiers(SemaOpenMP::OpenMPVarListDataTy &Data) { while (getCurToken().isNot(tok::colon)) { OpenMPMapModifierKind TypeModifier = isMapModifier(*this); if (TypeModifier == OMPC_MAP_MODIFIER_always || @@ -4282,7 +4298,7 @@ static OpenMPMapClauseKind isMapType(Parser &P) { /// Parse map-type in map clause. /// map([ [map-type-modifier[,] [map-type-modifier[,] ...] map-type : ] list) /// where, map-type ::= to | from | tofrom | alloc | release | delete -static void parseMapType(Parser &P, Sema::OpenMPVarListDataTy &Data) { +static void parseMapType(Parser &P, SemaOpenMP::OpenMPVarListDataTy &Data) { Token Tok = P.getCurToken(); if (Tok.is(tok::colon)) { P.Diag(Tok, diag::err_omp_map_type_missing); @@ -4306,7 +4322,7 @@ ExprResult Parser::ParseOpenMPIteratorsExpr() { return ExprError(); SourceLocation LLoc = T.getOpenLocation(); - SmallVector Data; + SmallVector Data; while (Tok.isNot(tok::r_paren) && Tok.isNot(tok::annot_pragma_openmp_end)) { // Check if the type parsing is required. ParsedType IteratorType; @@ -4380,7 +4396,7 @@ ExprResult Parser::ParseOpenMPIteratorsExpr() { if (Tok.is(tok::comma)) ConsumeToken(); - Sema::OMPIteratorData &D = Data.emplace_back(); + SemaOpenMP::OMPIteratorData &D = Data.emplace_back(); D.DeclIdent = II; D.DeclIdentLoc = IdLoc; D.Type = IteratorType; @@ -4397,12 +4413,12 @@ ExprResult Parser::ParseOpenMPIteratorsExpr() { if (!T.consumeClose()) RLoc = T.getCloseLocation(); - return Actions.ActOnOMPIteratorExpr(getCurScope(), IteratorKwLoc, LLoc, RLoc, - Data); + return Actions.OpenMP().ActOnOMPIteratorExpr(getCurScope(), IteratorKwLoc, + LLoc, RLoc, Data); } bool Parser::ParseOpenMPReservedLocator(OpenMPClauseKind Kind, - Sema::OpenMPVarListDataTy &Data, + SemaOpenMP::OpenMPVarListDataTy &Data, const LangOptions &LangOpts) { // Currently the only reserved locator is 'omp_all_memory' which is only // allowed on a depend clause. @@ -4430,7 +4446,7 @@ bool Parser::ParseOpenMPReservedLocator(OpenMPClauseKind Kind, /// Parse step size expression. Returns true if parsing is successfull, /// otherwise returns false. -static bool parseStepSize(Parser &P, Sema::OpenMPVarListDataTy &Data, +static bool parseStepSize(Parser &P, SemaOpenMP::OpenMPVarListDataTy &Data, OpenMPClauseKind CKind, SourceLocation ELoc) { ExprResult Tail = P.ParseAssignmentExpression(); Sema &Actions = P.getActions(); @@ -4451,7 +4467,7 @@ static bool parseStepSize(Parser &P, Sema::OpenMPVarListDataTy &Data, bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind, OpenMPClauseKind Kind, SmallVectorImpl &Vars, - Sema::OpenMPVarListDataTy &Data) { + SemaOpenMP::OpenMPVarListDataTy &Data) { UnqualifiedId UnqualifiedReductionId; bool InvalidReductionId = false; bool IsInvalidMapperModifier = false; @@ -4961,7 +4977,7 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPDirectiveKind DKind, SourceLocation Loc = Tok.getLocation(); SourceLocation LOpen = ConsumeToken(); SmallVector Vars; - Sema::OpenMPVarListDataTy Data; + SemaOpenMP::OpenMPVarListDataTy Data; if (ParseOpenMPVarList(DKind, Kind, Vars, Data)) return nullptr; @@ -4969,5 +4985,5 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPDirectiveKind DKind, if (ParseOnly) return nullptr; OMPVarListLocTy Locs(Loc, LOpen, Data.RLoc); - return Actions.ActOnOpenMPVarListClause(Kind, Vars, Locs, Data); + return Actions.OpenMP().ActOnOpenMPVarListClause(Kind, Vars, Locs, Data); } diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index 76a3fa8f2627de..629421c01d17d2 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -22,6 +22,7 @@ #include "clang/Sema/DeclSpec.h" #include "clang/Sema/EnterExpressionEvaluationContext.h" #include "clang/Sema/Scope.h" +#include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/TypoCorrection.h" #include "llvm/ADT/STLExtras.h" #include @@ -2301,7 +2302,7 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) { // In OpenMP loop region loop control variable must be captured and be // private. Perform analysis of first part (if any). if (getLangOpts().OpenMP && FirstPart.isUsable()) { - Actions.ActOnOpenMPLoopInitialization(ForLoc, FirstPart.get()); + Actions.OpenMP().ActOnOpenMPLoopInitialization(ForLoc, FirstPart.get()); } } diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 8de202f4f7a0c3..a1e32d391ed0cc 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -46,6 +46,7 @@ #include "clang/Sema/SemaHLSL.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/SemaOpenACC.h" +#include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/SemaSYCL.h" #include "clang/Sema/TemplateDeduction.h" #include "clang/Sema/TemplateInstCallback.h" @@ -203,6 +204,7 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer, CUDAPtr(std::make_unique(*this)), HLSLPtr(std::make_unique(*this)), OpenACCPtr(std::make_unique(*this)), + OpenMPPtr(std::make_unique(*this)), SYCLPtr(std::make_unique(*this)), MSPointerToMemberRepresentationMethod( LangOpts.getMSPointerToMemberRepresentationMethod()), @@ -226,8 +228,7 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer, StringWithUTF8StringMethod(nullptr), ValueWithBytesObjCTypeMethod(nullptr), NSArrayDecl(nullptr), ArrayWithObjectsMethod(nullptr), NSDictionaryDecl(nullptr), - DictionaryWithObjectsMethod(nullptr), CodeCompleter(CodeCompleter), - VarDataSharingAttributesStack(nullptr) { + DictionaryWithObjectsMethod(nullptr), CodeCompleter(CodeCompleter) { assert(pp.TUKind == TUKind); TUScope = nullptr; @@ -252,7 +253,7 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer, nullptr, ExpressionEvaluationContextRecord::EK_Other); // Initialization of data sharing attributes stack for OpenMP - InitDataSharingAttributesStack(); + OpenMP().InitDataSharingAttributesStack(); std::unique_ptr Callbacks = std::make_unique(); @@ -501,7 +502,7 @@ Sema::~Sema() { threadSafety::threadSafetyCleanup(ThreadSafetyDeclCache); // Destroys data sharing attributes stack for OpenMP - DestroyDataSharingAttributesStack(); + OpenMP().DestroyDataSharingAttributesStack(); // Detach from the PP callback handler which outlives Sema since it's owned // by the preprocessor. @@ -1159,7 +1160,7 @@ void Sema::ActOnEndOfTranslationUnit() { DiagnoseUnterminatedPragmaAlignPack(); DiagnoseUnterminatedPragmaAttribute(); - DiagnoseUnterminatedOpenMPDeclareTarget(); + OpenMP().DiagnoseUnterminatedOpenMPDeclareTarget(); // All delayed member exception specs should be checked or we end up accepting // incompatible declarations. @@ -1747,7 +1748,7 @@ class DeferredDiagnosticsEmitter // Finalize analysis of OpenMP-specific constructs. if (Caller && S.LangOpts.OpenMP && UsePath.size() == 1 && (ShouldEmitRootNode || InOMPDeviceContext)) - S.finalizeOpenMPDelayedAnalysis(Caller, FD, Loc); + S.OpenMP().finalizeOpenMPDelayedAnalysis(Caller, FD, Loc); if (Caller) S.CUDA().DeviceKnownEmittedFns[FD] = {Caller, Loc}; // Always emit deferred diagnostics for the direct users. This does not @@ -1899,8 +1900,8 @@ Sema::targetDiag(SourceLocation Loc, unsigned DiagID, const FunctionDecl *FD) { FD = FD ? FD : getCurFunctionDecl(); if (LangOpts.OpenMP) return LangOpts.OpenMPIsTargetDevice - ? diagIfOpenMPDeviceCode(Loc, DiagID, FD) - : diagIfOpenMPHostCode(Loc, DiagID, FD); + ? OpenMP().diagIfOpenMPDeviceCode(Loc, DiagID, FD) + : OpenMP().diagIfOpenMPHostCode(Loc, DiagID, FD); if (getLangOpts().CUDA) return getLangOpts().CUDAIsDevice ? CUDA().DiagIfDeviceCode(Loc, DiagID) : CUDA().DiagIfHostCode(Loc, DiagID); @@ -2131,7 +2132,7 @@ void Sema::PushFunctionScope() { FunctionScopes.push_back(new FunctionScopeInfo(getDiagnostics())); } if (LangOpts.OpenMP) - pushOpenMPFunctionRegion(); + OpenMP().pushOpenMPFunctionRegion(); } void Sema::PushBlockScope(Scope *BlockScope, BlockDecl *Block) { @@ -2251,7 +2252,7 @@ Sema::PopFunctionScopeInfo(const AnalysisBasedWarnings::Policy *WP, PoppedFunctionScopeDeleter(this)); if (LangOpts.OpenMP) - popOpenMPFunctionRegion(Scope.get()); + OpenMP().popOpenMPFunctionRegion(Scope.get()); // Issue any analysis-based warnings. if (WP && D) @@ -2687,7 +2688,9 @@ void Sema::PushCapturedRegionScope(Scope *S, CapturedDecl *CD, RecordDecl *RD, unsigned OpenMPCaptureLevel) { auto *CSI = new CapturedRegionScopeInfo( getDiagnostics(), S, CD, RD, CD->getContextParam(), K, - (getLangOpts().OpenMP && K == CR_OpenMP) ? getOpenMPNestingLevel() : 0, + (getLangOpts().OpenMP && K == CR_OpenMP) + ? OpenMP().getOpenMPNestingLevel() + : 0, OpenMPCaptureLevel); CSI->ReturnType = Context.VoidTy; FunctionScopes.push_back(CSI); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 8e21811b67d900..99b0a00083535e 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3233,6 +3233,17 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, if (BuiltinCountZeroBitsGeneric(*this, TheCall)) return ExprError(); break; + + case Builtin::BI__builtin_allow_runtime_check: { + Expr *Arg = TheCall->getArg(0); + // Check if the argument is a string literal. + if (!isa(Arg->IgnoreParenImpCasts())) { + Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal) + << Arg->getSourceRange(); + return ExprError(); + } + break; + } } if (getLangOpts().HLSL && CheckHLSLBuiltinFunctionCall(BuiltinID, TheCall)) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 8b3b9d020db572..745cf41e204e7a 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -48,6 +48,7 @@ #include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaHLSL.h" #include "clang/Sema/SemaInternal.h" +#include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/Template.h" #include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/SmallString.h" @@ -6168,11 +6169,12 @@ Decl *Sema::ActOnDeclarator(Scope *S, Declarator &D) { // Check if we are in an `omp begin/end declare variant` scope. Handle this // declaration only if the `bind_to_declaration` extension is set. SmallVector Bases; - if (LangOpts.OpenMP && isInOpenMPDeclareVariantScope()) - if (getOMPTraitInfoForSurroundingScope()->isExtensionActive(llvm::omp::TraitProperty:: - implementation_extension_bind_to_declaration)) - ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( - S, D, MultiTemplateParamsArg(), Bases); + if (LangOpts.OpenMP && OpenMP().isInOpenMPDeclareVariantScope()) + if (OpenMP().getOMPTraitInfoForSurroundingScope()->isExtensionActive( + llvm::omp::TraitProperty:: + implementation_extension_bind_to_declaration)) + OpenMP().ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( + S, D, MultiTemplateParamsArg(), Bases); Decl *Dcl = HandleDeclarator(S, D, MultiTemplateParamsArg()); @@ -6181,7 +6183,8 @@ Decl *Sema::ActOnDeclarator(Scope *S, Declarator &D) { Dcl->setTopLevelDeclInObjCContainer(); if (!Bases.empty()) - ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope(Dcl, Bases); + OpenMP().ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope(Dcl, + Bases); return Dcl; } @@ -6568,8 +6571,8 @@ NamedDecl *Sema::HandleDeclarator(Scope *S, Declarator &D, if (New->getDeclName() && AddToScope) PushOnScopeChains(New, S); - if (isInOpenMPDeclareTargetContext()) - checkDeclIsAllowedInOpenMPTarget(nullptr, New); + if (OpenMP().isInOpenMPDeclareTargetContext()) + OpenMP().checkDeclIsAllowedInOpenMPTarget(nullptr, New); return New; } @@ -12268,7 +12271,7 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, } if (LangOpts.OpenMP) - ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(NewFD); + OpenMP().ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(NewFD); // Semantic checking for this function declaration (in isolation). @@ -12668,7 +12671,7 @@ void Sema::CheckMSVCRTEntryPoint(FunctionDecl *FD) { } } -bool Sema::CheckForConstantInitializer(Expr *Init, QualType DclT) { +bool Sema::CheckForConstantInitializer(Expr *Init, unsigned DiagID) { // FIXME: Need strict checking. In C89, we need to check for // any assignment, increment, decrement, function-calls, or // commas outside of a sizeof. In C99, it's the same list, @@ -12686,8 +12689,7 @@ bool Sema::CheckForConstantInitializer(Expr *Init, QualType DclT) { const Expr *Culprit; if (Init->isConstantInitializer(Context, false, &Culprit)) return false; - Diag(Culprit->getExprLoc(), diag::err_init_element_not_constant) - << Culprit->getSourceRange(); + Diag(Culprit->getExprLoc(), DiagID) << Culprit->getSourceRange(); return true; } @@ -13805,29 +13807,24 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { // OpenCL v1.2 s6.5.3: __constant locals must be constant-initialized. // This is true even in C++ for OpenCL. } else if (VDecl->getType().getAddressSpace() == LangAS::opencl_constant) { - CheckForConstantInitializer(Init, DclT); + CheckForConstantInitializer(Init); - // Otherwise, C++ does not restrict the initializer. + // Otherwise, C++ does not restrict the initializer. } else if (getLangOpts().CPlusPlus) { // do nothing // C99 6.7.8p4: All the expressions in an initializer for an object that has // static storage duration shall be constant expressions or string literals. } else if (VDecl->getStorageClass() == SC_Static) { - CheckForConstantInitializer(Init, DclT); + CheckForConstantInitializer(Init); - // C89 is stricter than C99 for aggregate initializers. - // C89 6.5.7p3: All the expressions [...] in an initializer list - // for an object that has aggregate or union type shall be - // constant expressions. + // C89 is stricter than C99 for aggregate initializers. + // C89 6.5.7p3: All the expressions [...] in an initializer list + // for an object that has aggregate or union type shall be + // constant expressions. } else if (!getLangOpts().C99 && VDecl->getType()->isAggregateType() && isa(Init)) { - const Expr *Culprit; - if (!Init->isConstantInitializer(Context, false, &Culprit)) { - Diag(Culprit->getExprLoc(), - diag::ext_aggregate_init_not_constant) - << Culprit->getSourceRange(); - } + CheckForConstantInitializer(Init, diag::ext_aggregate_init_not_constant); } if (auto *E = dyn_cast(Init)) @@ -13960,7 +13957,7 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { // Avoid duplicate diagnostics for constexpr variables. if (!getLangOpts().CPlusPlus && !VDecl->isInvalidDecl() && !VDecl->isConstexpr()) - CheckForConstantInitializer(Init, DclT); + CheckForConstantInitializer(Init); } QualType InitType = Init->getType(); @@ -14956,7 +14953,7 @@ Sema::DeclGroupPtrTy Sema::FinalizeDeclaratorGroup(Scope *S, const DeclSpec &DS, if (auto *VD = dyn_cast(D); LangOpts.OpenMP && VD && VD->hasAttr() && VD->hasGlobalStorage()) - ActOnOpenMPDeclareTargetInitializer(D); + OpenMP().ActOnOpenMPDeclareTargetInitializer(D); // For declarators, there are some additional syntactic-ish checks we need // to perform. if (auto *DD = dyn_cast(D)) { @@ -15495,8 +15492,8 @@ Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Declarator &D, // specialization function under the OpenMP context defined as part of the // `omp begin declare variant`. SmallVector Bases; - if (LangOpts.OpenMP && isInOpenMPDeclareVariantScope()) - ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( + if (LangOpts.OpenMP && OpenMP().isInOpenMPDeclareVariantScope()) + OpenMP().ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( ParentScope, D, TemplateParameterLists, Bases); D.setFunctionDefinitionKind(FunctionDefinitionKind::Definition); @@ -15504,7 +15501,8 @@ Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Declarator &D, Decl *Dcl = ActOnStartOfFunctionDef(FnBodyScope, DP, SkipBody, BodyKind); if (!Bases.empty()) - ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope(Dcl, Bases); + OpenMP().ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope(Dcl, + Bases); return Dcl; } @@ -20651,7 +20649,7 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD, return FunctionEmissionStatus::OMPDiscarded; // If we have an explicit value for the device type, or we are in a target // declare context, we need to emit all extern and used symbols. - if (isInOpenMPDeclareTargetContext() || DevTy) + if (OpenMP().isInOpenMPDeclareTargetContext() || DevTy) if (IsEmittedForExternalSymbol()) return FunctionEmissionStatus::Emitted; // Device mode only emits what it must, if it wasn't tagged yet and needed, diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index d26f130b5774ce..c3bf18a3f79e23 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -2001,6 +2001,8 @@ static void markUsedForAliasOrIfunc(Sema &S, Decl *D, const ParsedAttr &AL, LookupResult LR(S, Target, Sema::LookupOrdinaryName); if (S.LookupName(LR, S.TUScope)) { for (NamedDecl *ND : LR) { + if (!isa(ND) && !isa(ND)) + continue; if (MC->shouldMangleDeclName(ND)) { llvm::raw_svector_ostream Out(Name); Name.clear(); diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 7669171fea56ff..8c6bae545bfd15 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -44,6 +44,7 @@ #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaInternal.h" +#include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/Template.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" @@ -962,8 +963,8 @@ Sema::ActOnDecompositionDeclarator(Scope *S, Declarator &D, CurContext->addHiddenDecl(New); } - if (isInOpenMPDeclareTargetContext()) - checkDeclIsAllowedInOpenMPTarget(nullptr, New); + if (OpenMP().isInOpenMPDeclareTargetContext()) + OpenMP().checkDeclIsAllowedInOpenMPTarget(nullptr, New); return New; } @@ -18654,8 +18655,8 @@ void Sema::MarkVTableUsed(SourceLocation Loc, CXXRecordDecl *Class, // Do not mark as used if compiling for the device outside of the target // region. if (TUKind != TU_Prefix && LangOpts.OpenMP && LangOpts.OpenMPIsTargetDevice && - !isInOpenMPDeclareTargetContext() && - !isInOpenMPTargetExecutionDirective()) { + !OpenMP().isInOpenMPDeclareTargetContext() && + !OpenMP().isInOpenMPTargetExecutionDirective()) { if (!DefinitionRequired) MarkVirtualMembersReferenced(Loc, Class); return; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 189764cb4b6b08..7c3faba0f78819 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -52,6 +52,7 @@ #include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaFixItUtils.h" #include "clang/Sema/SemaInternal.h" +#include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/Template.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLForwardCompat.h" @@ -360,9 +361,9 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef Locs, // at the same location. // [OpenMP 5.2] Also allow iterator declared variables. if (LangOpts.OpenMP && isa(D) && - !isOpenMPDeclareMapperVarDeclAllowed(cast(D))) { + !OpenMP().isOpenMPDeclareMapperVarDeclAllowed(cast(D))) { Diag(Loc, diag::err_omp_declare_mapper_wrong_var) - << getOpenMPDeclareMapperVarName(); + << OpenMP().getOpenMPDeclareMapperVarName(); Diag(D->getLocation(), diag::note_entity_declared_at) << D; return true; } @@ -2267,7 +2268,7 @@ NonOdrUseReason Sema::getNonOdrUseReasonInCurrentContext(ValueDecl *D) { // be loaded from the captured. if (VarDecl *VD = dyn_cast(D)) { if (VD->getType()->isReferenceType() && - !(getLangOpts().OpenMP && isOpenMPCapturedDecl(D)) && + !(getLangOpts().OpenMP && OpenMP().isOpenMPCapturedDecl(D)) && !isCapturingReferenceToHostVarInCUDADeviceLambda(*this, VD) && VD->isUsableInConstantExpressions(Context)) return NOUR_Constant; @@ -5080,9 +5081,10 @@ ExprResult Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, if (base && !base->getType().isNull() && base->hasPlaceholderType(BuiltinType::OMPArraySection)) - return ActOnOMPArraySectionExpr(base, lbLoc, ArgExprs.front(), SourceLocation(), - SourceLocation(), /*Length*/ nullptr, - /*Stride=*/nullptr, rbLoc); + return OpenMP().ActOnOMPArraySectionExpr(base, lbLoc, ArgExprs.front(), + SourceLocation(), SourceLocation(), + /*Length*/ nullptr, + /*Stride=*/nullptr, rbLoc); // Since this might be a postfix expression, get rid of ParenListExprs. if (isa(base)) { @@ -5354,558 +5356,6 @@ void Sema::CheckSubscriptAccessOfNoDeref(const ArraySubscriptExpr *E) { } } -ExprResult Sema::ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc, - Expr *LowerBound, - SourceLocation ColonLocFirst, - SourceLocation ColonLocSecond, - Expr *Length, Expr *Stride, - SourceLocation RBLoc) { - if (Base->hasPlaceholderType() && - !Base->hasPlaceholderType(BuiltinType::OMPArraySection)) { - ExprResult Result = CheckPlaceholderExpr(Base); - if (Result.isInvalid()) - return ExprError(); - Base = Result.get(); - } - if (LowerBound && LowerBound->getType()->isNonOverloadPlaceholderType()) { - ExprResult Result = CheckPlaceholderExpr(LowerBound); - if (Result.isInvalid()) - return ExprError(); - Result = DefaultLvalueConversion(Result.get()); - if (Result.isInvalid()) - return ExprError(); - LowerBound = Result.get(); - } - if (Length && Length->getType()->isNonOverloadPlaceholderType()) { - ExprResult Result = CheckPlaceholderExpr(Length); - if (Result.isInvalid()) - return ExprError(); - Result = DefaultLvalueConversion(Result.get()); - if (Result.isInvalid()) - return ExprError(); - Length = Result.get(); - } - if (Stride && Stride->getType()->isNonOverloadPlaceholderType()) { - ExprResult Result = CheckPlaceholderExpr(Stride); - if (Result.isInvalid()) - return ExprError(); - Result = DefaultLvalueConversion(Result.get()); - if (Result.isInvalid()) - return ExprError(); - Stride = Result.get(); - } - - // Build an unanalyzed expression if either operand is type-dependent. - if (Base->isTypeDependent() || - (LowerBound && - (LowerBound->isTypeDependent() || LowerBound->isValueDependent())) || - (Length && (Length->isTypeDependent() || Length->isValueDependent())) || - (Stride && (Stride->isTypeDependent() || Stride->isValueDependent()))) { - return new (Context) OMPArraySectionExpr( - Base, LowerBound, Length, Stride, Context.DependentTy, VK_LValue, - OK_Ordinary, ColonLocFirst, ColonLocSecond, RBLoc); - } - - // Perform default conversions. - QualType OriginalTy = OMPArraySectionExpr::getBaseOriginalType(Base); - QualType ResultTy; - if (OriginalTy->isAnyPointerType()) { - ResultTy = OriginalTy->getPointeeType(); - } else if (OriginalTy->isArrayType()) { - ResultTy = OriginalTy->getAsArrayTypeUnsafe()->getElementType(); - } else { - return ExprError( - Diag(Base->getExprLoc(), diag::err_omp_typecheck_section_value) - << Base->getSourceRange()); - } - // C99 6.5.2.1p1 - if (LowerBound) { - auto Res = PerformOpenMPImplicitIntegerConversion(LowerBound->getExprLoc(), - LowerBound); - if (Res.isInvalid()) - return ExprError(Diag(LowerBound->getExprLoc(), - diag::err_omp_typecheck_section_not_integer) - << 0 << LowerBound->getSourceRange()); - LowerBound = Res.get(); - - if (LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || - LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) - Diag(LowerBound->getExprLoc(), diag::warn_omp_section_is_char) - << 0 << LowerBound->getSourceRange(); - } - if (Length) { - auto Res = - PerformOpenMPImplicitIntegerConversion(Length->getExprLoc(), Length); - if (Res.isInvalid()) - return ExprError(Diag(Length->getExprLoc(), - diag::err_omp_typecheck_section_not_integer) - << 1 << Length->getSourceRange()); - Length = Res.get(); - - if (Length->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || - Length->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) - Diag(Length->getExprLoc(), diag::warn_omp_section_is_char) - << 1 << Length->getSourceRange(); - } - if (Stride) { - ExprResult Res = - PerformOpenMPImplicitIntegerConversion(Stride->getExprLoc(), Stride); - if (Res.isInvalid()) - return ExprError(Diag(Stride->getExprLoc(), - diag::err_omp_typecheck_section_not_integer) - << 1 << Stride->getSourceRange()); - Stride = Res.get(); - - if (Stride->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || - Stride->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) - Diag(Stride->getExprLoc(), diag::warn_omp_section_is_char) - << 1 << Stride->getSourceRange(); - } - - // C99 6.5.2.1p1: "shall have type "pointer to *object* type". Similarly, - // C++ [expr.sub]p1: The type "T" shall be a completely-defined object - // type. Note that functions are not objects, and that (in C99 parlance) - // incomplete types are not object types. - if (ResultTy->isFunctionType()) { - Diag(Base->getExprLoc(), diag::err_omp_section_function_type) - << ResultTy << Base->getSourceRange(); - return ExprError(); - } - - if (RequireCompleteType(Base->getExprLoc(), ResultTy, - diag::err_omp_section_incomplete_type, Base)) - return ExprError(); - - if (LowerBound && !OriginalTy->isAnyPointerType()) { - Expr::EvalResult Result; - if (LowerBound->EvaluateAsInt(Result, Context)) { - // OpenMP 5.0, [2.1.5 Array Sections] - // The array section must be a subset of the original array. - llvm::APSInt LowerBoundValue = Result.Val.getInt(); - if (LowerBoundValue.isNegative()) { - Diag(LowerBound->getExprLoc(), diag::err_omp_section_not_subset_of_array) - << LowerBound->getSourceRange(); - return ExprError(); - } - } - } - - if (Length) { - Expr::EvalResult Result; - if (Length->EvaluateAsInt(Result, Context)) { - // OpenMP 5.0, [2.1.5 Array Sections] - // The length must evaluate to non-negative integers. - llvm::APSInt LengthValue = Result.Val.getInt(); - if (LengthValue.isNegative()) { - Diag(Length->getExprLoc(), diag::err_omp_section_length_negative) - << toString(LengthValue, /*Radix=*/10, /*Signed=*/true) - << Length->getSourceRange(); - return ExprError(); - } - } - } else if (ColonLocFirst.isValid() && - (OriginalTy.isNull() || (!OriginalTy->isConstantArrayType() && - !OriginalTy->isVariableArrayType()))) { - // OpenMP 5.0, [2.1.5 Array Sections] - // When the size of the array dimension is not known, the length must be - // specified explicitly. - Diag(ColonLocFirst, diag::err_omp_section_length_undefined) - << (!OriginalTy.isNull() && OriginalTy->isArrayType()); - return ExprError(); - } - - if (Stride) { - Expr::EvalResult Result; - if (Stride->EvaluateAsInt(Result, Context)) { - // OpenMP 5.0, [2.1.5 Array Sections] - // The stride must evaluate to a positive integer. - llvm::APSInt StrideValue = Result.Val.getInt(); - if (!StrideValue.isStrictlyPositive()) { - Diag(Stride->getExprLoc(), diag::err_omp_section_stride_non_positive) - << toString(StrideValue, /*Radix=*/10, /*Signed=*/true) - << Stride->getSourceRange(); - return ExprError(); - } - } - } - - if (!Base->hasPlaceholderType(BuiltinType::OMPArraySection)) { - ExprResult Result = DefaultFunctionArrayLvalueConversion(Base); - if (Result.isInvalid()) - return ExprError(); - Base = Result.get(); - } - return new (Context) OMPArraySectionExpr( - Base, LowerBound, Length, Stride, Context.OMPArraySectionTy, VK_LValue, - OK_Ordinary, ColonLocFirst, ColonLocSecond, RBLoc); -} - -ExprResult Sema::ActOnOMPArrayShapingExpr(Expr *Base, SourceLocation LParenLoc, - SourceLocation RParenLoc, - ArrayRef Dims, - ArrayRef Brackets) { - if (Base->hasPlaceholderType()) { - ExprResult Result = CheckPlaceholderExpr(Base); - if (Result.isInvalid()) - return ExprError(); - Result = DefaultLvalueConversion(Result.get()); - if (Result.isInvalid()) - return ExprError(); - Base = Result.get(); - } - QualType BaseTy = Base->getType(); - // Delay analysis of the types/expressions if instantiation/specialization is - // required. - if (!BaseTy->isPointerType() && Base->isTypeDependent()) - return OMPArrayShapingExpr::Create(Context, Context.DependentTy, Base, - LParenLoc, RParenLoc, Dims, Brackets); - if (!BaseTy->isPointerType() || - (!Base->isTypeDependent() && - BaseTy->getPointeeType()->isIncompleteType())) - return ExprError(Diag(Base->getExprLoc(), - diag::err_omp_non_pointer_type_array_shaping_base) - << Base->getSourceRange()); - - SmallVector NewDims; - bool ErrorFound = false; - for (Expr *Dim : Dims) { - if (Dim->hasPlaceholderType()) { - ExprResult Result = CheckPlaceholderExpr(Dim); - if (Result.isInvalid()) { - ErrorFound = true; - continue; - } - Result = DefaultLvalueConversion(Result.get()); - if (Result.isInvalid()) { - ErrorFound = true; - continue; - } - Dim = Result.get(); - } - if (!Dim->isTypeDependent()) { - ExprResult Result = - PerformOpenMPImplicitIntegerConversion(Dim->getExprLoc(), Dim); - if (Result.isInvalid()) { - ErrorFound = true; - Diag(Dim->getExprLoc(), diag::err_omp_typecheck_shaping_not_integer) - << Dim->getSourceRange(); - continue; - } - Dim = Result.get(); - Expr::EvalResult EvResult; - if (!Dim->isValueDependent() && Dim->EvaluateAsInt(EvResult, Context)) { - // OpenMP 5.0, [2.1.4 Array Shaping] - // Each si is an integral type expression that must evaluate to a - // positive integer. - llvm::APSInt Value = EvResult.Val.getInt(); - if (!Value.isStrictlyPositive()) { - Diag(Dim->getExprLoc(), diag::err_omp_shaping_dimension_not_positive) - << toString(Value, /*Radix=*/10, /*Signed=*/true) - << Dim->getSourceRange(); - ErrorFound = true; - continue; - } - } - } - NewDims.push_back(Dim); - } - if (ErrorFound) - return ExprError(); - return OMPArrayShapingExpr::Create(Context, Context.OMPArrayShapingTy, Base, - LParenLoc, RParenLoc, NewDims, Brackets); -} - -ExprResult Sema::ActOnOMPIteratorExpr(Scope *S, SourceLocation IteratorKwLoc, - SourceLocation LLoc, SourceLocation RLoc, - ArrayRef Data) { - SmallVector ID; - bool IsCorrect = true; - for (const OMPIteratorData &D : Data) { - TypeSourceInfo *TInfo = nullptr; - SourceLocation StartLoc; - QualType DeclTy; - if (!D.Type.getAsOpaquePtr()) { - // OpenMP 5.0, 2.1.6 Iterators - // In an iterator-specifier, if the iterator-type is not specified then - // the type of that iterator is of int type. - DeclTy = Context.IntTy; - StartLoc = D.DeclIdentLoc; - } else { - DeclTy = GetTypeFromParser(D.Type, &TInfo); - StartLoc = TInfo->getTypeLoc().getBeginLoc(); - } - - bool IsDeclTyDependent = DeclTy->isDependentType() || - DeclTy->containsUnexpandedParameterPack() || - DeclTy->isInstantiationDependentType(); - if (!IsDeclTyDependent) { - if (!DeclTy->isIntegralType(Context) && !DeclTy->isAnyPointerType()) { - // OpenMP 5.0, 2.1.6 Iterators, Restrictions, C/C++ - // The iterator-type must be an integral or pointer type. - Diag(StartLoc, diag::err_omp_iterator_not_integral_or_pointer) - << DeclTy; - IsCorrect = false; - continue; - } - if (DeclTy.isConstant(Context)) { - // OpenMP 5.0, 2.1.6 Iterators, Restrictions, C/C++ - // The iterator-type must not be const qualified. - Diag(StartLoc, diag::err_omp_iterator_not_integral_or_pointer) - << DeclTy; - IsCorrect = false; - continue; - } - } - - // Iterator declaration. - assert(D.DeclIdent && "Identifier expected."); - // Always try to create iterator declarator to avoid extra error messages - // about unknown declarations use. - auto *VD = VarDecl::Create(Context, CurContext, StartLoc, D.DeclIdentLoc, - D.DeclIdent, DeclTy, TInfo, SC_None); - VD->setImplicit(); - if (S) { - // Check for conflicting previous declaration. - DeclarationNameInfo NameInfo(VD->getDeclName(), D.DeclIdentLoc); - LookupResult Previous(*this, NameInfo, LookupOrdinaryName, - ForVisibleRedeclaration); - Previous.suppressDiagnostics(); - LookupName(Previous, S); - - FilterLookupForScope(Previous, CurContext, S, /*ConsiderLinkage=*/false, - /*AllowInlineNamespace=*/false); - if (!Previous.empty()) { - NamedDecl *Old = Previous.getRepresentativeDecl(); - Diag(D.DeclIdentLoc, diag::err_redefinition) << VD->getDeclName(); - Diag(Old->getLocation(), diag::note_previous_definition); - } else { - PushOnScopeChains(VD, S); - } - } else { - CurContext->addDecl(VD); - } - - /// Act on the iterator variable declaration. - ActOnOpenMPIteratorVarDecl(VD); - - Expr *Begin = D.Range.Begin; - if (!IsDeclTyDependent && Begin && !Begin->isTypeDependent()) { - ExprResult BeginRes = - PerformImplicitConversion(Begin, DeclTy, AA_Converting); - Begin = BeginRes.get(); - } - Expr *End = D.Range.End; - if (!IsDeclTyDependent && End && !End->isTypeDependent()) { - ExprResult EndRes = PerformImplicitConversion(End, DeclTy, AA_Converting); - End = EndRes.get(); - } - Expr *Step = D.Range.Step; - if (!IsDeclTyDependent && Step && !Step->isTypeDependent()) { - if (!Step->getType()->isIntegralType(Context)) { - Diag(Step->getExprLoc(), diag::err_omp_iterator_step_not_integral) - << Step << Step->getSourceRange(); - IsCorrect = false; - continue; - } - std::optional Result = - Step->getIntegerConstantExpr(Context); - // OpenMP 5.0, 2.1.6 Iterators, Restrictions - // If the step expression of a range-specification equals zero, the - // behavior is unspecified. - if (Result && Result->isZero()) { - Diag(Step->getExprLoc(), diag::err_omp_iterator_step_constant_zero) - << Step << Step->getSourceRange(); - IsCorrect = false; - continue; - } - } - if (!Begin || !End || !IsCorrect) { - IsCorrect = false; - continue; - } - OMPIteratorExpr::IteratorDefinition &IDElem = ID.emplace_back(); - IDElem.IteratorDecl = VD; - IDElem.AssignmentLoc = D.AssignLoc; - IDElem.Range.Begin = Begin; - IDElem.Range.End = End; - IDElem.Range.Step = Step; - IDElem.ColonLoc = D.ColonLoc; - IDElem.SecondColonLoc = D.SecColonLoc; - } - if (!IsCorrect) { - // Invalidate all created iterator declarations if error is found. - for (const OMPIteratorExpr::IteratorDefinition &D : ID) { - if (Decl *ID = D.IteratorDecl) - ID->setInvalidDecl(); - } - return ExprError(); - } - SmallVector Helpers; - if (!CurContext->isDependentContext()) { - // Build number of ityeration for each iteration range. - // Ni = ((Stepi > 0) ? ((Endi + Stepi -1 - Begini)/Stepi) : - // ((Begini-Stepi-1-Endi) / -Stepi); - for (OMPIteratorExpr::IteratorDefinition &D : ID) { - // (Endi - Begini) - ExprResult Res = CreateBuiltinBinOp(D.AssignmentLoc, BO_Sub, D.Range.End, - D.Range.Begin); - if(!Res.isUsable()) { - IsCorrect = false; - continue; - } - ExprResult St, St1; - if (D.Range.Step) { - St = D.Range.Step; - // (Endi - Begini) + Stepi - Res = CreateBuiltinBinOp(D.AssignmentLoc, BO_Add, Res.get(), St.get()); - if (!Res.isUsable()) { - IsCorrect = false; - continue; - } - // (Endi - Begini) + Stepi - 1 - Res = - CreateBuiltinBinOp(D.AssignmentLoc, BO_Sub, Res.get(), - ActOnIntegerConstant(D.AssignmentLoc, 1).get()); - if (!Res.isUsable()) { - IsCorrect = false; - continue; - } - // ((Endi - Begini) + Stepi - 1) / Stepi - Res = CreateBuiltinBinOp(D.AssignmentLoc, BO_Div, Res.get(), St.get()); - if (!Res.isUsable()) { - IsCorrect = false; - continue; - } - St1 = CreateBuiltinUnaryOp(D.AssignmentLoc, UO_Minus, D.Range.Step); - // (Begini - Endi) - ExprResult Res1 = CreateBuiltinBinOp(D.AssignmentLoc, BO_Sub, - D.Range.Begin, D.Range.End); - if (!Res1.isUsable()) { - IsCorrect = false; - continue; - } - // (Begini - Endi) - Stepi - Res1 = - CreateBuiltinBinOp(D.AssignmentLoc, BO_Add, Res1.get(), St1.get()); - if (!Res1.isUsable()) { - IsCorrect = false; - continue; - } - // (Begini - Endi) - Stepi - 1 - Res1 = - CreateBuiltinBinOp(D.AssignmentLoc, BO_Sub, Res1.get(), - ActOnIntegerConstant(D.AssignmentLoc, 1).get()); - if (!Res1.isUsable()) { - IsCorrect = false; - continue; - } - // ((Begini - Endi) - Stepi - 1) / (-Stepi) - Res1 = - CreateBuiltinBinOp(D.AssignmentLoc, BO_Div, Res1.get(), St1.get()); - if (!Res1.isUsable()) { - IsCorrect = false; - continue; - } - // Stepi > 0. - ExprResult CmpRes = - CreateBuiltinBinOp(D.AssignmentLoc, BO_GT, D.Range.Step, - ActOnIntegerConstant(D.AssignmentLoc, 0).get()); - if (!CmpRes.isUsable()) { - IsCorrect = false; - continue; - } - Res = ActOnConditionalOp(D.AssignmentLoc, D.AssignmentLoc, CmpRes.get(), - Res.get(), Res1.get()); - if (!Res.isUsable()) { - IsCorrect = false; - continue; - } - } - Res = ActOnFinishFullExpr(Res.get(), /*DiscardedValue=*/false); - if (!Res.isUsable()) { - IsCorrect = false; - continue; - } - - // Build counter update. - // Build counter. - auto *CounterVD = - VarDecl::Create(Context, CurContext, D.IteratorDecl->getBeginLoc(), - D.IteratorDecl->getBeginLoc(), nullptr, - Res.get()->getType(), nullptr, SC_None); - CounterVD->setImplicit(); - ExprResult RefRes = - BuildDeclRefExpr(CounterVD, CounterVD->getType(), VK_LValue, - D.IteratorDecl->getBeginLoc()); - // Build counter update. - // I = Begini + counter * Stepi; - ExprResult UpdateRes; - if (D.Range.Step) { - UpdateRes = CreateBuiltinBinOp( - D.AssignmentLoc, BO_Mul, - DefaultLvalueConversion(RefRes.get()).get(), St.get()); - } else { - UpdateRes = DefaultLvalueConversion(RefRes.get()); - } - if (!UpdateRes.isUsable()) { - IsCorrect = false; - continue; - } - UpdateRes = CreateBuiltinBinOp(D.AssignmentLoc, BO_Add, D.Range.Begin, - UpdateRes.get()); - if (!UpdateRes.isUsable()) { - IsCorrect = false; - continue; - } - ExprResult VDRes = - BuildDeclRefExpr(cast(D.IteratorDecl), - cast(D.IteratorDecl)->getType(), VK_LValue, - D.IteratorDecl->getBeginLoc()); - UpdateRes = CreateBuiltinBinOp(D.AssignmentLoc, BO_Assign, VDRes.get(), - UpdateRes.get()); - if (!UpdateRes.isUsable()) { - IsCorrect = false; - continue; - } - UpdateRes = - ActOnFinishFullExpr(UpdateRes.get(), /*DiscardedValue=*/true); - if (!UpdateRes.isUsable()) { - IsCorrect = false; - continue; - } - ExprResult CounterUpdateRes = - CreateBuiltinUnaryOp(D.AssignmentLoc, UO_PreInc, RefRes.get()); - if (!CounterUpdateRes.isUsable()) { - IsCorrect = false; - continue; - } - CounterUpdateRes = - ActOnFinishFullExpr(CounterUpdateRes.get(), /*DiscardedValue=*/true); - if (!CounterUpdateRes.isUsable()) { - IsCorrect = false; - continue; - } - OMPIteratorHelperData &HD = Helpers.emplace_back(); - HD.CounterVD = CounterVD; - HD.Upper = Res.get(); - HD.Update = UpdateRes.get(); - HD.CounterUpdate = CounterUpdateRes.get(); - } - } else { - Helpers.assign(ID.size(), {}); - } - if (!IsCorrect) { - // Invalidate all created iterator declarations if error is found. - for (const OMPIteratorExpr::IteratorDefinition &D : ID) { - if (Decl *ID = D.IteratorDecl) - ID->setInvalidDecl(); - } - return ExprError(); - } - return OMPIteratorExpr::Create(Context, Context.OMPIteratorTy, IteratorKwLoc, - LLoc, RLoc, ID, Helpers); -} - ExprResult Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, Expr *Idx, SourceLocation RLoc) { @@ -7190,8 +6640,8 @@ ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc, } if (LangOpts.OpenMP) - Call = ActOnOpenMPCall(Call, Scope, LParenLoc, ArgExprs, RParenLoc, - ExecConfig); + Call = OpenMP().ActOnOpenMPCall(Call, Scope, LParenLoc, ArgExprs, RParenLoc, + ExecConfig); if (LangOpts.CPlusPlus) { if (const auto *CE = dyn_cast(Call.get())) DiagnosedUnqualifiedCallsToStdFunctions(*this, CE); @@ -7881,7 +7331,7 @@ Sema::BuildCompoundLiteralExpr(SourceLocation LParenLoc, TypeSourceInfo *TInfo, if (!LiteralExpr->isTypeDependent() && !LiteralExpr->isValueDependent() && !literalType->isDependentType()) // C99 6.5.2.5p3 - if (CheckForConstantInitializer(LiteralExpr, literalType)) + if (CheckForConstantInitializer(LiteralExpr)) return ExprError(); } else if (literalType.getAddressSpace() != LangAS::opencl_private && literalType.getAddressSpace() != LangAS::Default) { @@ -19193,7 +18643,7 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef, } QualType CaptureType, DeclRefType; if (SemaRef.LangOpts.OpenMP) - SemaRef.tryCaptureOpenMPLambdas(V); + SemaRef.OpenMP().tryCaptureOpenMPLambdas(V); SemaRef.tryCaptureVariable(V, Loc, Sema::TryCapture_Implicit, /*EllipsisLoc*/ SourceLocation(), /*BuildAndDiagnose*/ true, CaptureType, @@ -19474,7 +18924,7 @@ static bool captureInBlock(BlockScopeInfo *BSI, ValueDecl *Var, const bool HasBlocksAttr = Var->hasAttr(); if (HasBlocksAttr || CaptureType->isReferenceType() || - (S.getLangOpts().OpenMP && S.isOpenMPCapturedDecl(Var))) { + (S.getLangOpts().OpenMP && S.OpenMP().isOpenMPCapturedDecl(Var))) { // Block capture by reference does not change the capture or // declaration reference types. ByRef = true; @@ -19504,7 +18954,7 @@ static bool captureInCapturedRegion( ByRef = (Kind == Sema::TryCapture_ExplicitByRef); } else if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) { // Using an LValue reference type is consistent with Lambdas (see below). - if (S.isOpenMPCapturedDecl(Var)) { + if (S.OpenMP().isOpenMPCapturedDecl(Var)) { bool HasConst = DeclRefType.isConstQualified(); DeclRefType = DeclRefType.getUnqualifiedType(); // Don't lose diagnostics about assignments to const. @@ -19512,11 +18962,11 @@ static bool captureInCapturedRegion( DeclRefType.addConst(); } // Do not capture firstprivates in tasks. - if (S.isOpenMPPrivateDecl(Var, RSI->OpenMPLevel, RSI->OpenMPCaptureLevel) != - OMPC_unknown) + if (S.OpenMP().isOpenMPPrivateDecl(Var, RSI->OpenMPLevel, + RSI->OpenMPCaptureLevel) != OMPC_unknown) return true; - ByRef = S.isOpenMPCapturedByRef(Var, RSI->OpenMPLevel, - RSI->OpenMPCaptureLevel); + ByRef = S.OpenMP().isOpenMPCapturedByRef(Var, RSI->OpenMPLevel, + RSI->OpenMPCaptureLevel); } if (ByRef) @@ -19777,9 +19227,9 @@ bool Sema::tryCaptureVariable( // Capture global variables if it is required to use private copy of this // variable. bool IsGlobal = !VD->hasLocalStorage(); - if (IsGlobal && - !(LangOpts.OpenMP && isOpenMPCapturedDecl(Var, /*CheckScopeInfo=*/true, - MaxFunctionScopesIndex))) + if (IsGlobal && !(LangOpts.OpenMP && + OpenMP().isOpenMPCapturedDecl(Var, /*CheckScopeInfo=*/true, + MaxFunctionScopesIndex))) return true; if (isa(Var)) @@ -19897,7 +19347,7 @@ bool Sema::tryCaptureVariable( } return true; } - OpenMPClauseKind IsOpenMPPrivateDecl = isOpenMPPrivateDecl( + OpenMPClauseKind IsOpenMPPrivateDecl = OpenMP().isOpenMPPrivateDecl( Var, RSI->OpenMPLevel, RSI->OpenMPCaptureLevel); // If the variable is private (i.e. not captured) and has variably // modified type, we still need to capture the type for correct @@ -19908,7 +19358,8 @@ bool Sema::tryCaptureVariable( QualType QTy = Var->getType(); if (ParmVarDecl *PVD = dyn_cast_or_null(Var)) QTy = PVD->getOriginalType(); - for (int I = 1, E = getNumberOfConstructScopes(RSI->OpenMPLevel); + for (int I = 1, + E = OpenMP().getNumberOfConstructScopes(RSI->OpenMPLevel); I < E; ++I) { auto *OuterRSI = cast( FunctionScopes[FunctionScopesIndex - I]); @@ -19920,18 +19371,19 @@ bool Sema::tryCaptureVariable( } bool IsTargetCap = IsOpenMPPrivateDecl != OMPC_private && - isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel, - RSI->OpenMPCaptureLevel); + OpenMP().isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel, + RSI->OpenMPCaptureLevel); // Do not capture global if it is not privatized in outer regions. bool IsGlobalCap = - IsGlobal && isOpenMPGlobalCapturedDecl(Var, RSI->OpenMPLevel, - RSI->OpenMPCaptureLevel); + IsGlobal && OpenMP().isOpenMPGlobalCapturedDecl( + Var, RSI->OpenMPLevel, RSI->OpenMPCaptureLevel); // When we detect target captures we are looking from inside the // target region, therefore we need to propagate the capture from the // enclosing region. Therefore, the capture is not initially nested. if (IsTargetCap) - adjustOpenMPTargetScopeIndex(FunctionScopesIndex, RSI->OpenMPLevel); + OpenMP().adjustOpenMPTargetScopeIndex(FunctionScopesIndex, + RSI->OpenMPLevel); if (IsTargetCap || IsOpenMPPrivateDecl == OMPC_private || (IsGlobal && !IsGlobalCap)) { @@ -20753,8 +20205,8 @@ static void MarkExprReferenced(Sema &SemaRef, SourceLocation Loc, Decl *D, Expr *E, bool MightBeOdrUse, llvm::DenseMap &RefsMinusAssignments) { - if (SemaRef.isInOpenMPDeclareTargetContext()) - SemaRef.checkDeclIsAllowedInOpenMPTarget(E, D); + if (SemaRef.OpenMP().isInOpenMPDeclareTargetContext()) + SemaRef.OpenMP().checkDeclIsAllowedInOpenMPTarget(E, D); if (VarDecl *Var = dyn_cast(D)) { DoMarkVarDeclReferenced(SemaRef, Loc, Var, E, RefsMinusAssignments); diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp index 32998ae60eafe2..7ea6d733fe5a2d 100644 --- a/clang/lib/Sema/SemaExprMember.cpp +++ b/clang/lib/Sema/SemaExprMember.cpp @@ -9,7 +9,6 @@ // This file implements semantic analysis member access expressions. // //===----------------------------------------------------------------------===// -#include "clang/Sema/Overload.h" #include "clang/AST/ASTLambda.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" @@ -18,9 +17,11 @@ #include "clang/AST/ExprObjC.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/Lookup.h" +#include "clang/Sema/Overload.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaInternal.h" +#include "clang/Sema/SemaOpenMP.h" using namespace clang; using namespace sema; @@ -1900,9 +1901,9 @@ Sema::BuildFieldReferenceExpr(Expr *BaseExpr, bool IsArrow, if (getLangOpts().OpenMP && IsArrow && !CurContext->isDependentContext() && isa(Base.get()->IgnoreParenImpCasts())) { - if (auto *PrivateCopy = isOpenMPCapturedDecl(Field)) { - return getOpenMPCapturedExpr(PrivateCopy, VK, OK, - MemberNameInfo.getLoc()); + if (auto *PrivateCopy = OpenMP().isOpenMPCapturedDecl(Field)) { + return OpenMP().getOpenMPCapturedExpr(PrivateCopy, VK, OK, + MemberNameInfo.getLoc()); } } diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index 35a51c6c2328db..1743afaf15287f 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -21,6 +21,7 @@ #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaInternal.h" +#include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/Template.h" #include "llvm/ADT/STLExtras.h" #include @@ -1398,7 +1399,7 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, // OpenMP lambdas might get assumumption attributes. if (LangOpts.OpenMP) - ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(Method); + OpenMP().ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(Method); handleLambdaNumbering(Class, Method); diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 1249136c87650b..59f65eaf47a6da 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -15,6 +15,7 @@ #include "clang/AST/StmtOpenACC.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Sema/Sema.h" +#include "llvm/Support/Casting.h" using namespace clang; @@ -76,6 +77,19 @@ bool doesClauseApplyToDirective(OpenACCDirectiveKind DirectiveKind, default: return false; } + case OpenACCClauseKind::Self: + switch (DirectiveKind) { + case OpenACCDirectiveKind::Parallel: + case OpenACCDirectiveKind::Serial: + case OpenACCDirectiveKind::Kernels: + case OpenACCDirectiveKind::Update: + case OpenACCDirectiveKind::ParallelLoop: + case OpenACCDirectiveKind::SerialLoop: + case OpenACCDirectiveKind::KernelsLoop: + return true; + default: + return false; + } default: // Do nothing so we can go to the 'unimplemented' diagnostic instead. return true; @@ -121,9 +135,7 @@ SemaOpenACC::ActOnClause(ArrayRef ExistingClauses, // Restrictions only properly implemented on 'compute' constructs, and // 'compute' constructs are the only construct that can do anything with // this yet, so skip/treat as unimplemented in this case. - if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Parallel && - Clause.getDirectiveKind() != OpenACCDirectiveKind::Serial && - Clause.getDirectiveKind() != OpenACCDirectiveKind::Kernels) + if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind())) break; // Don't add an invalid clause to the AST. @@ -146,9 +158,7 @@ SemaOpenACC::ActOnClause(ArrayRef ExistingClauses, // Restrictions only properly implemented on 'compute' constructs, and // 'compute' constructs are the only construct that can do anything with // this yet, so skip/treat as unimplemented in this case. - if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Parallel && - Clause.getDirectiveKind() != OpenACCDirectiveKind::Serial && - Clause.getDirectiveKind() != OpenACCDirectiveKind::Kernels) + if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind())) break; // There is no prose in the standard that says duplicates aren't allowed, @@ -160,12 +170,54 @@ SemaOpenACC::ActOnClause(ArrayRef ExistingClauses, // The parser has ensured that we have a proper condition expr, so there // isn't really much to do here. - // TODO OpenACC: When we implement 'self', this clauses causes us to - // 'ignore' the self clause, so we should implement a warning here. + // If the 'if' clause is true, it makes the 'self' clause have no effect, + // diagnose that here. + // TODO OpenACC: When we add these two to other constructs, we might not + // want to warn on this (for example, 'update'). + const auto *Itr = + llvm::find_if(ExistingClauses, llvm::IsaPred); + if (Itr != ExistingClauses.end()) { + Diag(Clause.getBeginLoc(), diag::warn_acc_if_self_conflict); + Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here); + } + return OpenACCIfClause::Create( getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getConditionExpr(), Clause.getEndLoc()); } + + case OpenACCClauseKind::Self: { + // Restrictions only properly implemented on 'compute' constructs, and + // 'compute' constructs are the only construct that can do anything with + // this yet, so skip/treat as unimplemented in this case. + if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind())) + break; + + // TODO OpenACC: When we implement this for 'update', this takes a + // 'var-list' instead of a condition expression, so semantics/handling has + // to happen differently here. + + // There is no prose in the standard that says duplicates aren't allowed, + // but this diagnostic is present in other compilers, as well as makes + // sense. + if (checkAlreadyHasClauseOfKind(*this, ExistingClauses, Clause)) + return nullptr; + + // If the 'if' clause is true, it makes the 'self' clause have no effect, + // diagnose that here. + // TODO OpenACC: When we add these two to other constructs, we might not + // want to warn on this (for example, 'update'). + const auto *Itr = + llvm::find_if(ExistingClauses, llvm::IsaPred); + if (Itr != ExistingClauses.end()) { + Diag(Clause.getBeginLoc(), diag::warn_acc_if_self_conflict); + Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here); + } + + return OpenACCSelfClause::Create( + getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(), + Clause.getConditionExpr(), Clause.getEndLoc()); + } default: break; } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index e9efb4721133fe..d229ef650bccb0 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -11,6 +11,7 @@ /// //===----------------------------------------------------------------------===// +#include "clang/Sema/SemaOpenMP.h" #include "TreeTransform.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTMutationListener.h" @@ -33,6 +34,7 @@ #include "clang/Sema/ParsedAttr.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" +#include "clang/Sema/Sema.h" #include "clang/Sema/SemaInternal.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/PointerEmbeddedInt.h" @@ -1808,9 +1810,9 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, return DVar; } const_iterator End = end(); - if (!SemaRef.isOpenMPCapturedByRef(D, - std::distance(ParentIterTarget, End), - /*OpenMPCaptureLevel=*/0)) { + if (!SemaRef.OpenMP().isOpenMPCapturedByRef( + D, std::distance(ParentIterTarget, End), + /*OpenMPCaptureLevel=*/0)) { DVar.RefExpr = buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(), IterTarget->ConstructLoc); @@ -2018,22 +2020,22 @@ bool DSAStackTy::hasDirective( return false; } -void Sema::InitDataSharingAttributesStack() { - VarDataSharingAttributesStack = new DSAStackTy(*this); +void SemaOpenMP::InitDataSharingAttributesStack() { + VarDataSharingAttributesStack = new DSAStackTy(SemaRef); } #define DSAStack static_cast(VarDataSharingAttributesStack) -void Sema::pushOpenMPFunctionRegion() { DSAStack->pushFunction(); } +void SemaOpenMP::pushOpenMPFunctionRegion() { DSAStack->pushFunction(); } -void Sema::popOpenMPFunctionRegion(const FunctionScopeInfo *OldFSI) { +void SemaOpenMP::popOpenMPFunctionRegion(const FunctionScopeInfo *OldFSI) { DSAStack->popFunction(OldFSI); } static bool isOpenMPDeviceDelayedContext(Sema &S) { assert(S.LangOpts.OpenMP && S.LangOpts.OpenMPIsTargetDevice && "Expected OpenMP device compilation."); - return !S.isInOpenMPTargetExecutionDirective(); + return !S.OpenMP().isInOpenMPTargetExecutionDirective(); } namespace { @@ -2045,20 +2047,20 @@ enum class FunctionEmissionStatus { }; } // anonymous namespace -Sema::SemaDiagnosticBuilder -Sema::diagIfOpenMPDeviceCode(SourceLocation Loc, unsigned DiagID, - const FunctionDecl *FD) { - assert(LangOpts.OpenMP && LangOpts.OpenMPIsTargetDevice && +SemaBase::SemaDiagnosticBuilder +SemaOpenMP::diagIfOpenMPDeviceCode(SourceLocation Loc, unsigned DiagID, + const FunctionDecl *FD) { + assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice && "Expected OpenMP device compilation."); SemaDiagnosticBuilder::Kind Kind = SemaDiagnosticBuilder::K_Nop; if (FD) { - FunctionEmissionStatus FES = getEmissionStatus(FD); + Sema::FunctionEmissionStatus FES = SemaRef.getEmissionStatus(FD); switch (FES) { - case FunctionEmissionStatus::Emitted: + case Sema::FunctionEmissionStatus::Emitted: Kind = SemaDiagnosticBuilder::K_Immediate; break; - case FunctionEmissionStatus::Unknown: + case Sema::FunctionEmissionStatus::Unknown: // TODO: We should always delay diagnostics here in case a target // region is in a function we do not emit. However, as the // current diagnostics are associated with the function containing @@ -2066,48 +2068,48 @@ Sema::diagIfOpenMPDeviceCode(SourceLocation Loc, unsigned DiagID, // on diagnostics for the target region itself. We need to anchor // the diagnostics with the new generated function *or* ensure we // emit diagnostics associated with the surrounding function. - Kind = isOpenMPDeviceDelayedContext(*this) + Kind = isOpenMPDeviceDelayedContext(SemaRef) ? SemaDiagnosticBuilder::K_Deferred : SemaDiagnosticBuilder::K_Immediate; break; - case FunctionEmissionStatus::TemplateDiscarded: - case FunctionEmissionStatus::OMPDiscarded: + case Sema::FunctionEmissionStatus::TemplateDiscarded: + case Sema::FunctionEmissionStatus::OMPDiscarded: Kind = SemaDiagnosticBuilder::K_Nop; break; - case FunctionEmissionStatus::CUDADiscarded: + case Sema::FunctionEmissionStatus::CUDADiscarded: llvm_unreachable("CUDADiscarded unexpected in OpenMP device compilation"); break; } } - return SemaDiagnosticBuilder(Kind, Loc, DiagID, FD, *this); + return SemaDiagnosticBuilder(Kind, Loc, DiagID, FD, SemaRef); } -Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPHostCode(SourceLocation Loc, - unsigned DiagID, - const FunctionDecl *FD) { - assert(LangOpts.OpenMP && !LangOpts.OpenMPIsTargetDevice && +SemaBase::SemaDiagnosticBuilder +SemaOpenMP::diagIfOpenMPHostCode(SourceLocation Loc, unsigned DiagID, + const FunctionDecl *FD) { + assert(getLangOpts().OpenMP && !getLangOpts().OpenMPIsTargetDevice && "Expected OpenMP host compilation."); SemaDiagnosticBuilder::Kind Kind = SemaDiagnosticBuilder::K_Nop; if (FD) { - FunctionEmissionStatus FES = getEmissionStatus(FD); + Sema::FunctionEmissionStatus FES = SemaRef.getEmissionStatus(FD); switch (FES) { - case FunctionEmissionStatus::Emitted: + case Sema::FunctionEmissionStatus::Emitted: Kind = SemaDiagnosticBuilder::K_Immediate; break; - case FunctionEmissionStatus::Unknown: + case Sema::FunctionEmissionStatus::Unknown: Kind = SemaDiagnosticBuilder::K_Deferred; break; - case FunctionEmissionStatus::TemplateDiscarded: - case FunctionEmissionStatus::OMPDiscarded: - case FunctionEmissionStatus::CUDADiscarded: + case Sema::FunctionEmissionStatus::TemplateDiscarded: + case Sema::FunctionEmissionStatus::OMPDiscarded: + case Sema::FunctionEmissionStatus::CUDADiscarded: Kind = SemaDiagnosticBuilder::K_Nop; break; } } - return SemaDiagnosticBuilder(Kind, Loc, DiagID, FD, *this); + return SemaDiagnosticBuilder(Kind, Loc, DiagID, FD, SemaRef); } static OpenMPDefaultmapClauseKind @@ -2124,9 +2126,9 @@ getVariableCategoryFromDecl(const LangOptions &LO, const ValueDecl *VD) { return OMPC_DEFAULTMAP_aggregate; } -bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, - unsigned OpenMPCaptureLevel) const { - assert(LangOpts.OpenMP && "OpenMP is not allowed"); +bool SemaOpenMP::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, + unsigned OpenMPCaptureLevel) const { + assert(getLangOpts().OpenMP && "OpenMP is not allowed"); ASTContext &Ctx = getASTContext(); bool IsByRef = true; @@ -2252,7 +2254,7 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, !Ty->isAnyPointerType()) || !Ty->isScalarType() || DSAStack->isDefaultmapCapturedByRef( - Level, getVariableCategoryFromDecl(LangOpts, D)) || + Level, getVariableCategoryFromDecl(getLangOpts(), D)) || DSAStack->hasExplicitDSA( D, [](OpenMPClauseKind K, bool AppliedToPointee) { @@ -2303,17 +2305,17 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, return IsByRef; } -unsigned Sema::getOpenMPNestingLevel() const { +unsigned SemaOpenMP::getOpenMPNestingLevel() const { assert(getLangOpts().OpenMP); return DSAStack->getNestingLevel(); } -bool Sema::isInOpenMPTaskUntiedContext() const { +bool SemaOpenMP::isInOpenMPTaskUntiedContext() const { return isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) && DSAStack->isUntiedRegion(); } -bool Sema::isInOpenMPTargetExecutionDirective() const { +bool SemaOpenMP::isInOpenMPTargetExecutionDirective() const { return (isOpenMPTargetExecutionDirective(DSAStack->getCurrentDirective()) && !DSAStack->isClauseParsingMode()) || DSAStack->hasDirective( @@ -2324,7 +2326,7 @@ bool Sema::isInOpenMPTargetExecutionDirective() const { false); } -bool Sema::isOpenMPRebuildMemberExpr(ValueDecl *D) { +bool SemaOpenMP::isOpenMPRebuildMemberExpr(ValueDecl *D) { // Only rebuild for Field. if (!dyn_cast(D)) return false; @@ -2347,9 +2349,9 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id, DeclContext *CurContext, bool AsExpression); -VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, - unsigned StopAt) { - assert(LangOpts.OpenMP && "OpenMP is not allowed"); +VarDecl *SemaOpenMP::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, + unsigned StopAt) { + assert(getLangOpts().OpenMP && "OpenMP is not allowed"); D = getCanonicalDecl(D); auto *VD = dyn_cast(D); @@ -2368,7 +2370,8 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, // 'target' we return true so that this global is also mapped to the device. // if (VD && !VD->hasLocalStorage() && - (getCurCapturedRegion() || getCurBlock() || getCurLambda())) { + (SemaRef.getCurCapturedRegion() || SemaRef.getCurBlock() || + SemaRef.getCurLambda())) { if (isInOpenMPTargetExecutionDirective()) { DSAStackTy::DSAVarData DVarTop = DSAStack->getTopDSA(D, DSAStack->isClauseParsingMode()); @@ -2381,8 +2384,9 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, return nullptr; CapturedRegionScopeInfo *CSI = nullptr; for (FunctionScopeInfo *FSI : llvm::drop_begin( - llvm::reverse(FunctionScopes), - CheckScopeInfo ? (FunctionScopes.size() - (StopAt + 1)) : 0)) { + llvm::reverse(SemaRef.FunctionScopes), + CheckScopeInfo ? (SemaRef.FunctionScopes.size() - (StopAt + 1)) + : 0)) { if (!isa(FSI)) return nullptr; if (auto *RSI = dyn_cast(FSI)) @@ -2401,7 +2405,7 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, if (isInOpenMPDeclareTargetContext()) { // Try to mark variable as declare target if it is used in capturing // regions. - if (LangOpts.OpenMP <= 45 && + if (getLangOpts().OpenMP <= 45 && !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) checkDeclIsAllowedInOpenMPTarget(nullptr, VD); return nullptr; @@ -2411,7 +2415,7 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, if (CheckScopeInfo) { bool OpenMPFound = false; for (unsigned I = StopAt + 1; I > 0; --I) { - FunctionScopeInfo *FSI = FunctionScopes[I - 1]; + FunctionScopeInfo *FSI = SemaRef.FunctionScopes[I - 1]; if (!isa(FSI)) return nullptr; if (auto *RSI = dyn_cast(FSI)) @@ -2476,22 +2480,23 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, VarDecl *VD = DSAStack->getImplicitFDCapExprDecl(FD); if (VD) return VD; - if (getCurrentThisType().isNull()) + if (SemaRef.getCurrentThisType().isNull()) return nullptr; - Expr *ThisExpr = BuildCXXThisExpr(SourceLocation(), getCurrentThisType(), - /*IsImplicit=*/true); + Expr *ThisExpr = SemaRef.BuildCXXThisExpr(SourceLocation(), + SemaRef.getCurrentThisType(), + /*IsImplicit=*/true); const CXXScopeSpec CS = CXXScopeSpec(); - Expr *ME = BuildMemberExpr(ThisExpr, /*IsArrow=*/true, SourceLocation(), - NestedNameSpecifierLoc(), SourceLocation(), FD, - DeclAccessPair::make(FD, FD->getAccess()), - /*HadMultipleCandidates=*/false, - DeclarationNameInfo(), FD->getType(), - VK_LValue, OK_Ordinary); + Expr *ME = SemaRef.BuildMemberExpr( + ThisExpr, /*IsArrow=*/true, SourceLocation(), + NestedNameSpecifierLoc(), SourceLocation(), FD, + DeclAccessPair::make(FD, FD->getAccess()), + /*HadMultipleCandidates=*/false, DeclarationNameInfo(), FD->getType(), + VK_LValue, OK_Ordinary); OMPCapturedExprDecl *CD = buildCaptureDecl( - *this, FD->getIdentifier(), ME, DVarPrivate.CKind != OMPC_private, - CurContext->getParent(), /*AsExpression=*/false); + SemaRef, FD->getIdentifier(), ME, DVarPrivate.CKind != OMPC_private, + SemaRef.CurContext->getParent(), /*AsExpression=*/false); DeclRefExpr *VDPrivateRefExpr = buildDeclRefExpr( - *this, CD, CD->getType().getNonReferenceType(), SourceLocation()); + SemaRef, CD, CD->getType().getNonReferenceType(), SourceLocation()); VD = cast(VDPrivateRefExpr->getDecl()); DSAStack->addImplicitDefaultFirstprivateFD(FD, VD); return VD; @@ -2505,28 +2510,28 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, return nullptr; } -void Sema::adjustOpenMPTargetScopeIndex(unsigned &FunctionScopesIndex, - unsigned Level) const { +void SemaOpenMP::adjustOpenMPTargetScopeIndex(unsigned &FunctionScopesIndex, + unsigned Level) const { FunctionScopesIndex -= getOpenMPCaptureLevels(DSAStack->getDirective(Level)); } -void Sema::startOpenMPLoop() { - assert(LangOpts.OpenMP && "OpenMP must be enabled."); +void SemaOpenMP::startOpenMPLoop() { + assert(getLangOpts().OpenMP && "OpenMP must be enabled."); if (isOpenMPLoopDirective(DSAStack->getCurrentDirective())) DSAStack->loopInit(); } -void Sema::startOpenMPCXXRangeFor() { - assert(LangOpts.OpenMP && "OpenMP must be enabled."); +void SemaOpenMP::startOpenMPCXXRangeFor() { + assert(getLangOpts().OpenMP && "OpenMP must be enabled."); if (isOpenMPLoopDirective(DSAStack->getCurrentDirective())) { DSAStack->resetPossibleLoopCounter(); DSAStack->loopStart(); } } -OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, - unsigned CapLevel) const { - assert(LangOpts.OpenMP && "OpenMP is not allowed"); +OpenMPClauseKind SemaOpenMP::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, + unsigned CapLevel) const { + assert(getLangOpts().OpenMP && "OpenMP is not allowed"); if (DSAStack->getCurrentDirective() != OMPD_unknown && (!DSAStack->isClauseParsingMode() || DSAStack->getParentDirective() != OMPD_unknown)) { @@ -2546,7 +2551,8 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, } if (DSAStack->hasExplicitDirective(isOpenMPTaskingDirective, Level)) { bool IsTriviallyCopyable = - D->getType().getNonReferenceType().isTriviallyCopyableType(Context) && + D->getType().getNonReferenceType().isTriviallyCopyableType( + getASTContext()) && !D->getType() .getNonReferenceType() .getCanonicalType() @@ -2620,9 +2626,9 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, : OMPC_unknown; } -void Sema::setOpenMPCaptureKind(FieldDecl *FD, const ValueDecl *D, - unsigned Level) { - assert(LangOpts.OpenMP && "OpenMP is not allowed"); +void SemaOpenMP::setOpenMPCaptureKind(FieldDecl *FD, const ValueDecl *D, + unsigned Level) { + assert(getLangOpts().OpenMP && "OpenMP is not allowed"); D = getCanonicalDecl(D); OpenMPClauseKind OMPC = OMPC_unknown; for (unsigned I = DSAStack->getNestingLevel() + 1; I > Level; --I) { @@ -2649,18 +2655,19 @@ void Sema::setOpenMPCaptureKind(FieldDecl *FD, const ValueDecl *D, NewLevel)) { OMPC = OMPC_map; if (DSAStack->mustBeFirstprivateAtLevel( - NewLevel, getVariableCategoryFromDecl(LangOpts, D))) + NewLevel, getVariableCategoryFromDecl(getLangOpts(), D))) OMPC = OMPC_firstprivate; break; } } if (OMPC != OMPC_unknown) - FD->addAttr(OMPCaptureKindAttr::CreateImplicit(Context, unsigned(OMPC))); + FD->addAttr( + OMPCaptureKindAttr::CreateImplicit(getASTContext(), unsigned(OMPC))); } -bool Sema::isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level, - unsigned CaptureLevel) const { - assert(LangOpts.OpenMP && "OpenMP is not allowed"); +bool SemaOpenMP::isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level, + unsigned CaptureLevel) const { + assert(getLangOpts().OpenMP && "OpenMP is not allowed"); // Return true if the current level is no longer enclosed in a target region. SmallVector Regions; @@ -2672,9 +2679,9 @@ bool Sema::isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level, Regions[CaptureLevel] != OMPD_task; } -bool Sema::isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level, - unsigned CaptureLevel) const { - assert(LangOpts.OpenMP && "OpenMP is not allowed"); +bool SemaOpenMP::isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level, + unsigned CaptureLevel) const { + assert(getLangOpts().OpenMP && "OpenMP is not allowed"); // Return true if the current level is no longer enclosed in a target region. if (const auto *VD = dyn_cast(D)) { @@ -2702,37 +2709,37 @@ bool Sema::isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level, return true; } -void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; } +void SemaOpenMP::DestroyDataSharingAttributesStack() { delete DSAStack; } -void Sema::ActOnOpenMPBeginDeclareVariant(SourceLocation Loc, - OMPTraitInfo &TI) { +void SemaOpenMP::ActOnOpenMPBeginDeclareVariant(SourceLocation Loc, + OMPTraitInfo &TI) { OMPDeclareVariantScopes.push_back(OMPDeclareVariantScope(TI)); } -void Sema::ActOnOpenMPEndDeclareVariant() { +void SemaOpenMP::ActOnOpenMPEndDeclareVariant() { assert(isInOpenMPDeclareVariantScope() && "Not in OpenMP declare variant scope!"); OMPDeclareVariantScopes.pop_back(); } -void Sema::finalizeOpenMPDelayedAnalysis(const FunctionDecl *Caller, - const FunctionDecl *Callee, - SourceLocation Loc) { - assert(LangOpts.OpenMP && "Expected OpenMP compilation mode."); +void SemaOpenMP::finalizeOpenMPDelayedAnalysis(const FunctionDecl *Caller, + const FunctionDecl *Callee, + SourceLocation Loc) { + assert(getLangOpts().OpenMP && "Expected OpenMP compilation mode."); std::optional DevTy = OMPDeclareTargetDeclAttr::getDeviceType(Caller->getMostRecentDecl()); // Ignore host functions during device analyzis. - if (LangOpts.OpenMPIsTargetDevice && + if (getLangOpts().OpenMPIsTargetDevice && (!DevTy || *DevTy == OMPDeclareTargetDeclAttr::DT_Host)) return; // Ignore nohost functions during host analyzis. - if (!LangOpts.OpenMPIsTargetDevice && DevTy && + if (!getLangOpts().OpenMPIsTargetDevice && DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) return; const FunctionDecl *FD = Callee->getMostRecentDecl(); DevTy = OMPDeclareTargetDeclAttr::getDeviceType(FD); - if (LangOpts.OpenMPIsTargetDevice && DevTy && + if (getLangOpts().OpenMPIsTargetDevice && DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) { // Diagnose host function called during device codegen. StringRef HostDevTy = @@ -2743,8 +2750,9 @@ void Sema::finalizeOpenMPDelayedAnalysis(const FunctionDecl *Caller, << HostDevTy; return; } - if (!LangOpts.OpenMPIsTargetDevice && !LangOpts.OpenMPOffloadMandatory && - DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) { + if (!getLangOpts().OpenMPIsTargetDevice && + !getLangOpts().OpenMPOffloadMandatory && DevTy && + *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) { // In OpenMP 5.2 or later, if the function has a host variant then allow // that to be called instead auto &&HasHostAttr = [](const FunctionDecl *Callee) { @@ -2773,21 +2781,21 @@ void Sema::finalizeOpenMPDelayedAnalysis(const FunctionDecl *Caller, } } -void Sema::StartOpenMPDSABlock(OpenMPDirectiveKind DKind, - const DeclarationNameInfo &DirName, - Scope *CurScope, SourceLocation Loc) { +void SemaOpenMP::StartOpenMPDSABlock(OpenMPDirectiveKind DKind, + const DeclarationNameInfo &DirName, + Scope *CurScope, SourceLocation Loc) { DSAStack->push(DKind, DirName, CurScope, Loc); - PushExpressionEvaluationContext( - ExpressionEvaluationContext::PotentiallyEvaluated); + SemaRef.PushExpressionEvaluationContext( + Sema::ExpressionEvaluationContext::PotentiallyEvaluated); } -void Sema::StartOpenMPClause(OpenMPClauseKind K) { +void SemaOpenMP::StartOpenMPClause(OpenMPClauseKind K) { DSAStack->setClauseParsingMode(K); } -void Sema::EndOpenMPClause() { +void SemaOpenMP::EndOpenMPClause() { DSAStack->setClauseParsingMode(/*K=*/OMPC_unknown); - CleanupVarDeclMarking(); + SemaRef.CleanupVarDeclMarking(); } static std::pair @@ -2871,7 +2879,7 @@ static void reportOriginalDsa(Sema &SemaRef, const DSAStackTy *Stack, const DSAStackTy::DSAVarData &DVar, bool IsLoopIterVar = false); -void Sema::EndOpenMPDSABlock(Stmt *CurDirective) { +void SemaOpenMP::EndOpenMPDSABlock(Stmt *CurDirective) { // OpenMP [2.14.3.5, Restrictions, C/C++, p.1] // A variable of class type (or array thereof) that appears in a lastprivate // clause requires an accessible, unambiguous default constructor for the @@ -2898,15 +2906,15 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) { // variable is not added to IdResolver, so the code in the OpenMP // region uses original variable for proper diagnostics. VarDecl *VDPrivate = buildVarDecl( - *this, DE->getExprLoc(), Type.getUnqualifiedType(), + SemaRef, DE->getExprLoc(), Type.getUnqualifiedType(), VD->getName(), VD->hasAttrs() ? &VD->getAttrs() : nullptr, DRE); - ActOnUninitializedDecl(VDPrivate); + SemaRef.ActOnUninitializedDecl(VDPrivate); if (VDPrivate->isInvalidDecl()) { PrivateCopies.push_back(nullptr); continue; } PrivateCopies.push_back(buildDeclRefExpr( - *this, VDPrivate, DE->getType(), DE->getExprLoc())); + SemaRef, VDPrivate, DE->getType(), DE->getExprLoc())); } else { // The variable is also a firstprivate, so initialization sequence // for private copy is generated already. @@ -2924,7 +2932,7 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) { SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange); + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange); if (Res.second) // It will be analyzed later. PrivateRefs.push_back(RefExpr); @@ -2977,7 +2985,7 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) { diag::err_omp_allocator_used_in_clauses) << D.Allocator->getSourceRange(); if (DVar.RefExpr) - reportOriginalDsa(*this, DSAStack, VD, DVar); + reportOriginalDsa(SemaRef, DSAStack, VD, DVar); else Diag(MapExpr->getExprLoc(), diag::note_used_here) << MapExpr->getSourceRange(); @@ -2987,14 +2995,14 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) { } } // Check allocate clauses. - if (!CurContext->isDependentContext()) - checkAllocateClauses(*this, DSAStack, D->clauses()); - checkReductionClauses(*this, DSAStack, D->clauses()); + if (!SemaRef.CurContext->isDependentContext()) + checkAllocateClauses(SemaRef, DSAStack, D->clauses()); + checkReductionClauses(SemaRef, DSAStack, D->clauses()); } DSAStack->pop(); - DiscardCleanupsInEvaluationContext(); - PopExpressionEvaluationContext(); + SemaRef.DiscardCleanupsInEvaluationContext(); + SemaRef.PopExpressionEvaluationContext(); } static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV, @@ -3047,27 +3055,28 @@ class VarOrFuncDeclFilterCCC final : public CorrectionCandidateCallback { } // namespace -ExprResult Sema::ActOnOpenMPIdExpression(Scope *CurScope, - CXXScopeSpec &ScopeSpec, - const DeclarationNameInfo &Id, - OpenMPDirectiveKind Kind) { - LookupResult Lookup(*this, Id, LookupOrdinaryName); - LookupParsedName(Lookup, CurScope, &ScopeSpec, true); +ExprResult SemaOpenMP::ActOnOpenMPIdExpression(Scope *CurScope, + CXXScopeSpec &ScopeSpec, + const DeclarationNameInfo &Id, + OpenMPDirectiveKind Kind) { + ASTContext &Context = getASTContext(); + LookupResult Lookup(SemaRef, Id, Sema::LookupOrdinaryName); + SemaRef.LookupParsedName(Lookup, CurScope, &ScopeSpec, true); if (Lookup.isAmbiguous()) return ExprError(); VarDecl *VD; if (!Lookup.isSingleResult()) { - VarDeclFilterCCC CCC(*this); + VarDeclFilterCCC CCC(SemaRef); if (TypoCorrection Corrected = - CorrectTypo(Id, LookupOrdinaryName, CurScope, nullptr, CCC, - CTK_ErrorRecovery)) { - diagnoseTypo(Corrected, - PDiag(Lookup.empty() - ? diag::err_undeclared_var_use_suggest - : diag::err_omp_expected_var_arg_suggest) - << Id.getName()); + SemaRef.CorrectTypo(Id, Sema::LookupOrdinaryName, CurScope, nullptr, + CCC, Sema::CTK_ErrorRecovery)) { + SemaRef.diagnoseTypo( + Corrected, + SemaRef.PDiag(Lookup.empty() ? diag::err_undeclared_var_use_suggest + : diag::err_omp_expected_var_arg_suggest) + << Id.getName()); VD = Corrected.getCorrectionDeclAs(); } else { Diag(Id.getLoc(), Lookup.empty() ? diag::err_undeclared_var_use @@ -3101,7 +3110,7 @@ ExprResult Sema::ActOnOpenMPIdExpression(Scope *CurScope, // A threadprivate directive for file-scope variables must appear outside // any definition or declaration. if (CanonicalVD->getDeclContext()->isTranslationUnit() && - !getCurLexicalContext()->isTranslationUnit()) { + !SemaRef.getCurLexicalContext()->isTranslationUnit()) { Diag(Id.getLoc(), diag::err_omp_var_scope) << getOpenMPDirectiveName(Kind) << VD; bool IsDecl = @@ -3116,7 +3125,7 @@ ExprResult Sema::ActOnOpenMPIdExpression(Scope *CurScope, // in the class definition, in the same scope in which the member // variables are declared. if (CanonicalVD->isStaticDataMember() && - !CanonicalVD->getDeclContext()->Equals(getCurLexicalContext())) { + !CanonicalVD->getDeclContext()->Equals(SemaRef.getCurLexicalContext())) { Diag(Id.getLoc(), diag::err_omp_var_scope) << getOpenMPDirectiveName(Kind) << VD; bool IsDecl = @@ -3131,8 +3140,9 @@ ExprResult Sema::ActOnOpenMPIdExpression(Scope *CurScope, // outside any definition or declaration other than the namespace // definition itself. if (CanonicalVD->getDeclContext()->isNamespace() && - (!getCurLexicalContext()->isFileContext() || - !getCurLexicalContext()->Encloses(CanonicalVD->getDeclContext()))) { + (!SemaRef.getCurLexicalContext()->isFileContext() || + !SemaRef.getCurLexicalContext()->Encloses( + CanonicalVD->getDeclContext()))) { Diag(Id.getLoc(), diag::err_omp_var_scope) << getOpenMPDirectiveName(Kind) << VD; bool IsDecl = @@ -3146,7 +3156,7 @@ ExprResult Sema::ActOnOpenMPIdExpression(Scope *CurScope, // A threadprivate directive for static block-scope variables must appear // in the scope of the variable and not in a nested scope. if (CanonicalVD->isLocalVarDecl() && CurScope && - !isDeclInScope(ND, getCurLexicalContext(), CurScope)) { + !SemaRef.isDeclInScope(ND, SemaRef.getCurLexicalContext(), CurScope)) { Diag(Id.getLoc(), diag::err_omp_var_scope) << getOpenMPDirectiveName(Kind) << VD; bool IsDecl = @@ -3174,11 +3184,11 @@ ExprResult Sema::ActOnOpenMPIdExpression(Scope *CurScope, Id.getLoc(), ExprType, VK_LValue); } -Sema::DeclGroupPtrTy -Sema::ActOnOpenMPThreadprivateDirective(SourceLocation Loc, - ArrayRef VarList) { +SemaOpenMP::DeclGroupPtrTy +SemaOpenMP::ActOnOpenMPThreadprivateDirective(SourceLocation Loc, + ArrayRef VarList) { if (OMPThreadPrivateDecl *D = CheckOMPThreadPrivateDecl(Loc, VarList)) { - CurContext->addDecl(D); + SemaRef.CurContext->addDecl(D); return DeclGroupPtrTy::make(DeclGroupRef(D)); } return nullptr; @@ -3215,7 +3225,9 @@ class LocalVarRefChecker final } // namespace OMPThreadPrivateDecl * -Sema::CheckOMPThreadPrivateDecl(SourceLocation Loc, ArrayRef VarList) { +SemaOpenMP::CheckOMPThreadPrivateDecl(SourceLocation Loc, + ArrayRef VarList) { + ASTContext &Context = getASTContext(); SmallVector Vars; for (Expr *RefExpr : VarList) { auto *DE = cast(RefExpr); @@ -3235,8 +3247,8 @@ Sema::CheckOMPThreadPrivateDecl(SourceLocation Loc, ArrayRef VarList) { // OpenMP [2.9.2, Restrictions, C/C++, p.10] // A threadprivate variable must not have an incomplete type. - if (RequireCompleteType(ILoc, VD->getType(), - diag::err_omp_threadprivate_incomplete_type)) { + if (SemaRef.RequireCompleteType( + ILoc, VD->getType(), diag::err_omp_threadprivate_incomplete_type)) { continue; } @@ -3274,7 +3286,7 @@ Sema::CheckOMPThreadPrivateDecl(SourceLocation Loc, ArrayRef VarList) { // Check if initial value of threadprivate variable reference variable with // local storage (it is not supported by runtime). if (const Expr *Init = VD->getAnyInitializer()) { - LocalVarRefChecker Checker(*this); + LocalVarRefChecker Checker(SemaRef); if (Checker.Visit(Init)) continue; } @@ -3288,8 +3300,8 @@ Sema::CheckOMPThreadPrivateDecl(SourceLocation Loc, ArrayRef VarList) { } OMPThreadPrivateDecl *D = nullptr; if (!Vars.empty()) { - D = OMPThreadPrivateDecl::Create(Context, getCurLexicalContext(), Loc, - Vars); + D = OMPThreadPrivateDecl::Create(Context, SemaRef.getCurLexicalContext(), + Loc, Vars); D->setAccess(AS_public); } return D; @@ -3395,10 +3407,9 @@ applyOMPAllocateAttribute(Sema &S, VarDecl *VD, ML->DeclarationMarkedOpenMPAllocate(VD, A); } -Sema::DeclGroupPtrTy -Sema::ActOnOpenMPAllocateDirective(SourceLocation Loc, ArrayRef VarList, - ArrayRef Clauses, - DeclContext *Owner) { +SemaOpenMP::DeclGroupPtrTy SemaOpenMP::ActOnOpenMPAllocateDirective( + SourceLocation Loc, ArrayRef VarList, ArrayRef Clauses, + DeclContext *Owner) { assert(Clauses.size() <= 2 && "Expected at most two clauses."); Expr *Alignment = nullptr; Expr *Allocator = nullptr; @@ -3407,9 +3418,9 @@ Sema::ActOnOpenMPAllocateDirective(SourceLocation Loc, ArrayRef VarList, // allocate directives that appear in a target region must specify an // allocator clause unless a requires directive with the dynamic_allocators // clause is present in the same compilation unit. - if (LangOpts.OpenMPIsTargetDevice && + if (getLangOpts().OpenMPIsTargetDevice && !DSAStack->hasRequiresDeclWithClause()) - targetDiag(Loc, diag::err_expected_allocator_clause); + SemaRef.targetDiag(Loc, diag::err_expected_allocator_clause); } else { for (const OMPClause *C : Clauses) if (const auto *AC = dyn_cast(C)) @@ -3420,7 +3431,7 @@ Sema::ActOnOpenMPAllocateDirective(SourceLocation Loc, ArrayRef VarList, llvm_unreachable("Unexpected clause on allocate directive"); } OMPAllocateDeclAttr::AllocatorTypeTy AllocatorKind = - getAllocatorKind(*this, DSAStack, Allocator); + getAllocatorKind(SemaRef, DSAStack, Allocator); SmallVector Vars; for (Expr *RefExpr : VarList) { auto *DE = cast(RefExpr); @@ -3435,7 +3446,7 @@ Sema::ActOnOpenMPAllocateDirective(SourceLocation Loc, ArrayRef VarList, // If the used several times in the allocate directive, the same allocator // must be used. - if (checkPreviousOMPAllocateAttribute(*this, DSAStack, RefExpr, VD, + if (checkPreviousOMPAllocateAttribute(SemaRef, DSAStack, RefExpr, VD, AllocatorKind, Allocator)) continue; @@ -3448,7 +3459,7 @@ Sema::ActOnOpenMPAllocateDirective(SourceLocation Loc, ArrayRef VarList, Diag(Allocator->getExprLoc(), diag::err_omp_expected_predefined_allocator) << Allocator->getSourceRange(); - bool IsDecl = VD->isThisDeclarationADefinition(Context) == + bool IsDecl = VD->isThisDeclarationADefinition(getASTContext()) == VarDecl::DeclarationOnly; Diag(VD->getLocation(), IsDecl ? diag::note_previous_decl : diag::note_defined_here) @@ -3458,45 +3469,46 @@ Sema::ActOnOpenMPAllocateDirective(SourceLocation Loc, ArrayRef VarList, } Vars.push_back(RefExpr); - applyOMPAllocateAttribute(*this, VD, AllocatorKind, Allocator, Alignment, + applyOMPAllocateAttribute(SemaRef, VD, AllocatorKind, Allocator, Alignment, DE->getSourceRange()); } if (Vars.empty()) return nullptr; if (!Owner) - Owner = getCurLexicalContext(); - auto *D = OMPAllocateDecl::Create(Context, Owner, Loc, Vars, Clauses); + Owner = SemaRef.getCurLexicalContext(); + auto *D = OMPAllocateDecl::Create(getASTContext(), Owner, Loc, Vars, Clauses); D->setAccess(AS_public); Owner->addDecl(D); return DeclGroupPtrTy::make(DeclGroupRef(D)); } -Sema::DeclGroupPtrTy -Sema::ActOnOpenMPRequiresDirective(SourceLocation Loc, - ArrayRef ClauseList) { +SemaOpenMP::DeclGroupPtrTy +SemaOpenMP::ActOnOpenMPRequiresDirective(SourceLocation Loc, + ArrayRef ClauseList) { OMPRequiresDecl *D = nullptr; - if (!CurContext->isFileContext()) { + if (!SemaRef.CurContext->isFileContext()) { Diag(Loc, diag::err_omp_invalid_scope) << "requires"; } else { D = CheckOMPRequiresDecl(Loc, ClauseList); if (D) { - CurContext->addDecl(D); + SemaRef.CurContext->addDecl(D); DSAStack->addRequiresDecl(D); } } return DeclGroupPtrTy::make(DeclGroupRef(D)); } -void Sema::ActOnOpenMPAssumesDirective(SourceLocation Loc, - OpenMPDirectiveKind DKind, - ArrayRef Assumptions, - bool SkippedClauses) { +void SemaOpenMP::ActOnOpenMPAssumesDirective(SourceLocation Loc, + OpenMPDirectiveKind DKind, + ArrayRef Assumptions, + bool SkippedClauses) { if (!SkippedClauses && Assumptions.empty()) Diag(Loc, diag::err_omp_no_clause_for_directive) << llvm::omp::getAllAssumeClauseOptions() << llvm::omp::getOpenMPDirectiveName(DKind); - auto *AA = OMPAssumeAttr::Create(Context, llvm::join(Assumptions, ","), Loc); + auto *AA = + OMPAssumeAttr::Create(getASTContext(), llvm::join(Assumptions, ","), Loc); if (DKind == llvm::omp::Directive::OMPD_begin_assumes) { OMPAssumeScoped.push_back(AA); return; @@ -3515,7 +3527,7 @@ void Sema::ActOnOpenMPAssumesDirective(SourceLocation Loc, // declarations in included headers. To this end, we traverse all existing // declaration contexts and annotate function declarations here. SmallVector DeclContexts; - auto *Ctx = CurContext; + auto *Ctx = SemaRef.CurContext; while (Ctx->getLexicalParent()) Ctx = Ctx->getLexicalParent(); DeclContexts.push_back(Ctx); @@ -3539,13 +3551,14 @@ void Sema::ActOnOpenMPAssumesDirective(SourceLocation Loc, } } -void Sema::ActOnOpenMPEndAssumesDirective() { +void SemaOpenMP::ActOnOpenMPEndAssumesDirective() { assert(isInOpenMPAssumeScope() && "Not in OpenMP assumes scope!"); OMPAssumeScoped.pop_back(); } -OMPRequiresDecl *Sema::CheckOMPRequiresDecl(SourceLocation Loc, - ArrayRef ClauseList) { +OMPRequiresDecl * +SemaOpenMP::CheckOMPRequiresDecl(SourceLocation Loc, + ArrayRef ClauseList) { /// For target specific clauses, the requires directive cannot be /// specified after the handling of any of the target regions in the /// current compilation unit. @@ -3576,8 +3589,8 @@ OMPRequiresDecl *Sema::CheckOMPRequiresDecl(SourceLocation Loc, } if (!DSAStack->hasDuplicateRequiresClause(ClauseList)) - return OMPRequiresDecl::Create(Context, getCurLexicalContext(), Loc, - ClauseList); + return OMPRequiresDecl::Create( + getASTContext(), SemaRef.getCurLexicalContext(), Loc, ClauseList); return nullptr; } @@ -3695,7 +3708,7 @@ class DSAAttrChecker final : public StmtVisitor { llvm::SmallVector ImplicitMap[DefaultmapKindNum][OMPC_MAP_delete]; llvm::SmallVector ImplicitMapModifier[DefaultmapKindNum]; - Sema::VarsWithInheritedDSAType VarsWithInheritedDSA; + SemaOpenMP::VarsWithInheritedDSAType VarsWithInheritedDSA; llvm::SmallDenseSet ImplicitDeclarations; void VisitSubCaptures(OMPExecutableDirective *S) { @@ -4161,7 +4174,7 @@ class DSAAttrChecker final : public StmtVisitor { getImplicitMapModifier(OpenMPDefaultmapClauseKind Kind) const { return ImplicitMapModifier[Kind]; } - const Sema::VarsWithInheritedDSAType &getVarsWithInheritedDSA() const { + const SemaOpenMP::VarsWithInheritedDSAType &getVarsWithInheritedDSA() const { return VarsWithInheritedDSA; } @@ -4193,7 +4206,9 @@ static void handleDeclareVariantConstructTrait(DSAStackTy *Stack, Stack->handleConstructTrait(Traits, ScopeEntry); } -void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { +void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, + Scope *CurScope) { + ASTContext &Context = getASTContext(); switch (DKind) { case OMPD_parallel: case OMPD_parallel_for: @@ -4208,13 +4223,13 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1).withConst(); QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty).withConst().withRestrict(); - Sema::CapturedParamNameType Params[] = { + SemaOpenMP::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), std::make_pair(StringRef(), QualType()) // __context with shared vars }; - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - Params); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, Params); break; } case OMPD_target_teams: @@ -4232,7 +4247,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { FunctionProtoType::ExtProtoInfo EPI; EPI.Variadic = true; QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); - Sema::CapturedParamNameType Params[] = { + SemaOpenMP::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32Ty), std::make_pair(".part_id.", KmpInt32PtrTy), std::make_pair(".privates.", VoidPtrTy), @@ -4242,31 +4257,33 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { std::make_pair(".task_t.", Context.VoidPtrTy.withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - Params, /*OpenMPCaptureLevel=*/0); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, Params, + /*OpenMPCaptureLevel=*/0); // Mark this captured region as inlined, because we don't use outlined // function directly. - getCurCapturedRegion()->TheCapturedDecl->addAttr( + SemaRef.getCurCapturedRegion()->TheCapturedDecl->addAttr( AlwaysInlineAttr::CreateImplicit( Context, {}, AlwaysInlineAttr::Keyword_forceinline)); - SmallVector ParamsTarget; + SmallVector ParamsTarget; if (getLangOpts().OpenMPIsTargetDevice) ParamsTarget.push_back(std::make_pair(StringRef("dyn_ptr"), VoidPtrTy)); ParamsTarget.push_back( std::make_pair(StringRef(), QualType())); // __context with shared vars; // Start a captured region for 'target' with no implicit parameters. - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - ParamsTarget, - /*OpenMPCaptureLevel=*/1); - Sema::CapturedParamNameType ParamsTeamsOrParallel[] = { + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, ParamsTarget, + /*OpenMPCaptureLevel=*/1); + SemaOpenMP::CapturedParamNameType ParamsTeamsOrParallel[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), std::make_pair(StringRef(), QualType()) // __context with shared vars }; // Start a captured region for 'teams' or 'parallel'. Both regions have // the same implicit parameters. - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - ParamsTeamsOrParallel, /*OpenMPCaptureLevel=*/2); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, ParamsTeamsOrParallel, + /*OpenMPCaptureLevel=*/2); break; } case OMPD_target: @@ -4279,7 +4296,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { FunctionProtoType::ExtProtoInfo EPI; EPI.Variadic = true; QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); - Sema::CapturedParamNameType Params[] = { + SemaOpenMP::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32Ty), std::make_pair(".part_id.", KmpInt32PtrTy), std::make_pair(".privates.", VoidPtrTy), @@ -4289,21 +4306,22 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { std::make_pair(".task_t.", Context.VoidPtrTy.withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - Params, /*OpenMPCaptureLevel=*/0); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, Params, + /*OpenMPCaptureLevel=*/0); // Mark this captured region as inlined, because we don't use outlined // function directly. - getCurCapturedRegion()->TheCapturedDecl->addAttr( + SemaRef.getCurCapturedRegion()->TheCapturedDecl->addAttr( AlwaysInlineAttr::CreateImplicit( Context, {}, AlwaysInlineAttr::Keyword_forceinline)); - SmallVector ParamsTarget; + SmallVector ParamsTarget; if (getLangOpts().OpenMPIsTargetDevice) ParamsTarget.push_back(std::make_pair(StringRef("dyn_ptr"), VoidPtrTy)); ParamsTarget.push_back( std::make_pair(StringRef(), QualType())); // __context with shared vars; - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - ParamsTarget, - /*OpenMPCaptureLevel=*/1); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, ParamsTarget, + /*OpenMPCaptureLevel=*/1); break; } case OMPD_atomic: @@ -4329,11 +4347,11 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { case OMPD_scope: case OMPD_target_data: case OMPD_dispatch: { - Sema::CapturedParamNameType Params[] = { + SemaOpenMP::CapturedParamNameType Params[] = { std::make_pair(StringRef(), QualType()) // __context with shared vars }; - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - Params); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, Params); break; } case OMPD_task: { @@ -4345,7 +4363,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { FunctionProtoType::ExtProtoInfo EPI; EPI.Variadic = true; QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); - Sema::CapturedParamNameType Params[] = { + SemaOpenMP::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32Ty), std::make_pair(".part_id.", KmpInt32PtrTy), std::make_pair(".privates.", VoidPtrTy), @@ -4355,11 +4373,11 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { std::make_pair(".task_t.", Context.VoidPtrTy.withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - Params); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, Params); // Mark this captured region as inlined, because we don't use outlined // function directly. - getCurCapturedRegion()->TheCapturedDecl->addAttr( + SemaRef.getCurCapturedRegion()->TheCapturedDecl->addAttr( AlwaysInlineAttr::CreateImplicit( Context, {}, AlwaysInlineAttr::Keyword_forceinline)); break; @@ -4386,7 +4404,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { FunctionProtoType::ExtProtoInfo EPI; EPI.Variadic = true; QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); - Sema::CapturedParamNameType Params[] = { + SemaOpenMP::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32Ty), std::make_pair(".part_id.", KmpInt32PtrTy), std::make_pair(".privates.", VoidPtrTy), @@ -4401,11 +4419,11 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { std::make_pair(".reductions.", VoidPtrTy), std::make_pair(StringRef(), QualType()) // __context with shared vars }; - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - Params); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, Params); // Mark this captured region as inlined, because we don't use outlined // function directly. - getCurCapturedRegion()->TheCapturedDecl->addAttr( + SemaRef.getCurCapturedRegion()->TheCapturedDecl->addAttr( AlwaysInlineAttr::CreateImplicit( Context, {}, AlwaysInlineAttr::Keyword_forceinline)); break; @@ -4426,19 +4444,20 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { QualType VoidPtrTy = Context.VoidPtrTy.withConst().withRestrict(); QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty).withConst().withRestrict(); - Sema::CapturedParamNameType ParamsParallel[] = { + SemaOpenMP::CapturedParamNameType ParamsParallel[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), std::make_pair(StringRef(), QualType()) // __context with shared vars }; // Start a captured region for 'parallel'. - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - ParamsParallel, /*OpenMPCaptureLevel=*/0); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, ParamsParallel, + /*OpenMPCaptureLevel=*/0); QualType Args[] = {VoidPtrTy}; FunctionProtoType::ExtProtoInfo EPI; EPI.Variadic = true; QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); - Sema::CapturedParamNameType Params[] = { + SemaOpenMP::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32Ty), std::make_pair(".part_id.", KmpInt32PtrTy), std::make_pair(".privates.", VoidPtrTy), @@ -4453,11 +4472,12 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { std::make_pair(".reductions.", VoidPtrTy), std::make_pair(StringRef(), QualType()) // __context with shared vars }; - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - Params, /*OpenMPCaptureLevel=*/1); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, Params, + /*OpenMPCaptureLevel=*/1); // Mark this captured region as inlined, because we don't use outlined // function directly. - getCurCapturedRegion()->TheCapturedDecl->addAttr( + SemaRef.getCurCapturedRegion()->TheCapturedDecl->addAttr( AlwaysInlineAttr::CreateImplicit( Context, {}, AlwaysInlineAttr::Keyword_forceinline)); break; @@ -4467,15 +4487,15 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1).withConst(); QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty).withConst().withRestrict(); - Sema::CapturedParamNameType Params[] = { + SemaOpenMP::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), std::make_pair(".previous.lb.", Context.getSizeType().withConst()), std::make_pair(".previous.ub.", Context.getSizeType().withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - Params); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, Params); break; } // For 'target teams loop', collect all captured regions so codegen can @@ -4492,7 +4512,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { FunctionProtoType::ExtProtoInfo EPI; EPI.Variadic = true; QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); - Sema::CapturedParamNameType Params[] = { + SemaOpenMP::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32Ty), std::make_pair(".part_id.", KmpInt32PtrTy), std::make_pair(".privates.", VoidPtrTy), @@ -4502,32 +4522,35 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { std::make_pair(".task_t.", Context.VoidPtrTy.withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - Params, /*OpenMPCaptureLevel=*/0); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, Params, + /*OpenMPCaptureLevel=*/0); // Mark this captured region as inlined, because we don't use outlined // function directly. - getCurCapturedRegion()->TheCapturedDecl->addAttr( + SemaRef.getCurCapturedRegion()->TheCapturedDecl->addAttr( AlwaysInlineAttr::CreateImplicit( Context, {}, AlwaysInlineAttr::Keyword_forceinline)); - SmallVector ParamsTarget; + SmallVector ParamsTarget; if (getLangOpts().OpenMPIsTargetDevice) ParamsTarget.push_back(std::make_pair(StringRef("dyn_ptr"), VoidPtrTy)); ParamsTarget.push_back( std::make_pair(StringRef(), QualType())); // __context with shared vars; // Start a captured region for 'target' with no implicit parameters. - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - ParamsTarget, /*OpenMPCaptureLevel=*/1); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, ParamsTarget, + /*OpenMPCaptureLevel=*/1); - Sema::CapturedParamNameType ParamsTeams[] = { + SemaOpenMP::CapturedParamNameType ParamsTeams[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), std::make_pair(StringRef(), QualType()) // __context with shared vars }; // Start a captured region for 'target' with no implicit parameters. - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - ParamsTeams, /*OpenMPCaptureLevel=*/2); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, ParamsTeams, + /*OpenMPCaptureLevel=*/2); - Sema::CapturedParamNameType ParamsParallel[] = { + SemaOpenMP::CapturedParamNameType ParamsParallel[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), std::make_pair(".previous.lb.", Context.getSizeType().withConst()), @@ -4536,8 +4559,9 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { }; // Start a captured region for 'teams' or 'parallel'. Both regions have // the same implicit parameters. - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - ParamsParallel, /*OpenMPCaptureLevel=*/3); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, ParamsParallel, + /*OpenMPCaptureLevel=*/3); break; } @@ -4548,16 +4572,17 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty).withConst().withRestrict(); - Sema::CapturedParamNameType ParamsTeams[] = { + SemaOpenMP::CapturedParamNameType ParamsTeams[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), std::make_pair(StringRef(), QualType()) // __context with shared vars }; // Start a captured region for 'target' with no implicit parameters. - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - ParamsTeams, /*OpenMPCaptureLevel=*/0); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, ParamsTeams, + /*OpenMPCaptureLevel=*/0); - Sema::CapturedParamNameType ParamsParallel[] = { + SemaOpenMP::CapturedParamNameType ParamsParallel[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), std::make_pair(".previous.lb.", Context.getSizeType().withConst()), @@ -4566,8 +4591,9 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { }; // Start a captured region for 'teams' or 'parallel'. Both regions have // the same implicit parameters. - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - ParamsParallel, /*OpenMPCaptureLevel=*/1); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, ParamsParallel, + /*OpenMPCaptureLevel=*/1); break; } case OMPD_target_update: @@ -4581,7 +4607,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { FunctionProtoType::ExtProtoInfo EPI; EPI.Variadic = true; QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); - Sema::CapturedParamNameType Params[] = { + SemaOpenMP::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32Ty), std::make_pair(".part_id.", KmpInt32PtrTy), std::make_pair(".privates.", VoidPtrTy), @@ -4591,11 +4617,11 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { std::make_pair(".task_t.", Context.VoidPtrTy.withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - Params); + SemaRef.ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP, Params); // Mark this captured region as inlined, because we don't use outlined // function directly. - getCurCapturedRegion()->TheCapturedDecl->addAttr( + SemaRef.getCurCapturedRegion()->TheCapturedDecl->addAttr( AlwaysInlineAttr::CreateImplicit( Context, {}, AlwaysInlineAttr::Keyword_forceinline)); break; @@ -4626,15 +4652,15 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { default: llvm_unreachable("Unknown OpenMP directive"); } - DSAStack->setContext(CurContext); + DSAStack->setContext(SemaRef.CurContext); handleDeclareVariantConstructTrait(DSAStack, DKind, /* ScopeEntry */ true); } -int Sema::getNumberOfConstructScopes(unsigned Level) const { +int SemaOpenMP::getNumberOfConstructScopes(unsigned Level) const { return getOpenMPCaptureLevels(DSAStack->getDirective(Level)); } -int Sema::getOpenMPCaptureLevels(OpenMPDirectiveKind DKind) { +int SemaOpenMP::getOpenMPCaptureLevels(OpenMPDirectiveKind DKind) { SmallVector CaptureRegions; getOpenMPCaptureRegions(CaptureRegions, DKind); return CaptureRegions.size(); @@ -4674,7 +4700,7 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id, static DeclRefExpr *buildCapture(Sema &S, ValueDecl *D, Expr *CaptureExpr, bool WithInit) { OMPCapturedExprDecl *CD; - if (VarDecl *VD = S.isOpenMPCapturedDecl(D)) + if (VarDecl *VD = S.OpenMP().isOpenMPCapturedDecl(D)) CD = cast(VD); else CD = buildCaptureDecl(S, D->getIdentifier(), CaptureExpr, WithInit, @@ -4726,7 +4752,7 @@ class CaptureRegionUnwinderRAII { : S(S), ErrorFound(ErrorFound), DKind(DKind) {} ~CaptureRegionUnwinderRAII() { if (ErrorFound) { - int ThisCaptureLevel = S.getOpenMPCaptureLevels(DKind); + int ThisCaptureLevel = S.OpenMP().getOpenMPCaptureLevels(DKind); while (--ThisCaptureLevel >= 0) S.ActOnCapturedRegionError(); } @@ -4734,10 +4760,10 @@ class CaptureRegionUnwinderRAII { }; } // namespace -void Sema::tryCaptureOpenMPLambdas(ValueDecl *V) { +void SemaOpenMP::tryCaptureOpenMPLambdas(ValueDecl *V) { // Capture variables captured by reference in lambdas for target-based // directives. - if (!CurContext->isDependentContext() && + if (!SemaRef.CurContext->isDependentContext() && (isOpenMPTargetExecutionDirective(DSAStack->getCurrentDirective()) || isOpenMPTargetDataManagementDirective( DSAStack->getCurrentDirective()))) { @@ -4757,14 +4783,14 @@ void Sema::tryCaptureOpenMPLambdas(ValueDecl *V) { if (LC.getCaptureKind() == LCK_ByRef) { VarDecl *VD = cast(LC.getCapturedVar()); DeclContext *VDC = VD->getDeclContext(); - if (!VDC->Encloses(CurContext)) + if (!VDC->Encloses(SemaRef.CurContext)) continue; - MarkVariableReferenced(LC.getLocation(), VD); + SemaRef.MarkVariableReferenced(LC.getLocation(), VD); } else if (LC.getCaptureKind() == LCK_This) { - QualType ThisTy = getCurrentThisType(); - if (!ThisTy.isNull() && - Context.typesAreCompatible(ThisTy, ThisCapture->getType())) - CheckCXXThisCapture(LC.getLocation()); + QualType ThisTy = SemaRef.getCurrentThisType(); + if (!ThisTy.isNull() && getASTContext().typesAreCompatible( + ThisTy, ThisCapture->getType())) + SemaRef.CheckCXXThisCapture(LC.getLocation()); } } } @@ -4804,8 +4830,8 @@ static bool checkOrderedOrderSpecified(Sema &S, return false; } -StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, - ArrayRef Clauses) { +StmtResult SemaOpenMP::ActOnOpenMPRegionEnd(StmtResult S, + ArrayRef Clauses) { handleDeclareVariantConstructTrait(DSAStack, DSAStack->getCurrentDirective(), /* ScopeEntry */ false); if (DSAStack->getCurrentDirective() == OMPD_atomic || @@ -4817,7 +4843,7 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, bool ErrorFound = false; CaptureRegionUnwinderRAII CaptureRegionUnwinder( - *this, ErrorFound, DSAStack->getCurrentDirective()); + SemaRef, ErrorFound, DSAStack->getCurrentDirective()); if (!S.isUsable()) { ErrorFound = true; return StmtError(); @@ -4831,7 +4857,7 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, SmallVector PICs; // This is required for proper codegen. for (OMPClause *Clause : Clauses) { - if (!LangOpts.OpenMPSimd && + if (!getLangOpts().OpenMPSimd && (isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) || DSAStack->getCurrentDirective() == OMPD_target) && Clause->getClauseKind() == OMPC_in_reduction) { @@ -4840,7 +4866,7 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, auto *IRC = cast(Clause); for (Expr *E : IRC->taskgroup_descriptors()) if (E) - MarkDeclarationsReferencedInExpr(E); + SemaRef.MarkDeclarationsReferencedInExpr(E); } if (isOpenMPPrivate(Clause->getClauseKind()) || Clause->getClauseKind() == OMPC_copyprivate || @@ -4851,7 +4877,7 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, // Mark all variables in private list clauses as used in inner region. for (Stmt *VarRef : Clause->children()) { if (auto *E = cast_or_null(VarRef)) { - MarkDeclarationsReferencedInExpr(E); + SemaRef.MarkDeclarationsReferencedInExpr(E); } } DSAStack->setForceVarCapturing(/*V=*/false); @@ -4865,7 +4891,7 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, PICs.push_back(C); if (auto *C = OMPClauseWithPostUpdate::get(Clause)) { if (Expr *E = C->getPostUpdateExpr()) - MarkDeclarationsReferencedInExpr(E); + SemaRef.MarkDeclarationsReferencedInExpr(E); } } if (Clause->getClauseKind() == OMPC_schedule) @@ -4877,7 +4903,7 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, } // Capture allocator expressions if used. for (Expr *E : DSAStack->getInnerAllocators()) - MarkDeclarationsReferencedInExpr(E); + SemaRef.MarkDeclarationsReferencedInExpr(E); // OpenMP, 2.7.1 Loop Construct, Restrictions // The nonmonotonic modifier cannot be specified if an ordered clause is // specified. @@ -4899,7 +4925,7 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Restrictions. // If an order(concurrent) clause is present, an ordered clause may not appear // on the same directive. - if (checkOrderedOrderSpecified(*this, Clauses)) + if (checkOrderedOrderSpecified(SemaRef, Clauses)) ErrorFound = true; if (!LCs.empty() && OC && OC->getNumForLoops()) { for (const OMPLinearClause *C : LCs) { @@ -4936,7 +4962,8 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, CaptureRegion == OMPD_unknown) { if (auto *DS = cast_or_null(C->getPreInitStmt())) { for (Decl *D : DS->decls()) - MarkVariableReferenced(D->getLocation(), cast(D)); + SemaRef.MarkVariableReferenced(D->getLocation(), + cast(D)); } } } @@ -4950,7 +4977,7 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, ++I) { OMPUsesAllocatorsClause::Data D = UAC->getAllocatorData(I); if (Expr *E = D.AllocatorTraits) - MarkDeclarationsReferencedInExpr(E); + SemaRef.MarkDeclarationsReferencedInExpr(E); } continue; } @@ -4965,17 +4992,17 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, continue; for (Expr *E : RC->copy_array_temps()) if (E) - MarkDeclarationsReferencedInExpr(E); + SemaRef.MarkDeclarationsReferencedInExpr(E); } if (auto *AC = dyn_cast(C)) { for (Expr *E : AC->varlists()) - MarkDeclarationsReferencedInExpr(E); + SemaRef.MarkDeclarationsReferencedInExpr(E); } } } if (++CompletedRegions == CaptureRegions.size()) DSAStack->setBodyComplete(); - SR = ActOnCapturedRegionEnd(SR.get()); + SR = SemaRef.ActOnCapturedRegionEnd(SR.get()); } return SR; } @@ -5782,9 +5809,9 @@ static CapturedStmt *buildLoopVarFunc(Sema &Actions, QualType LoopVarTy, // the OpenMPIRBuilder to know additional C/C++ semantics, such as how to // invoke a copy constructor. QualType TargetParamTy = Ctx.getLValueReferenceType(LoopVarTy); - Sema::CapturedParamNameType Params[] = {{"LoopVar", TargetParamTy}, - {"Logical", LogicalTy}, - {StringRef(), QualType()}}; + SemaOpenMP::CapturedParamNameType Params[] = {{"LoopVar", TargetParamTy}, + {"Logical", LogicalTy}, + {StringRef(), QualType()}}; Actions.ActOnCapturedRegionStart({}, nullptr, CR_Default, Params); // Capture the initial iterator which represents the LoopVar value at the @@ -5835,7 +5862,7 @@ static CapturedStmt *buildLoopVarFunc(Sema &Actions, QualType LoopVarTy, AssertSuccess(Actions.ActOnCapturedRegionEnd(Body))); } -StmtResult Sema::ActOnOpenMPCanonicalLoop(Stmt *AStmt) { +StmtResult SemaOpenMP::ActOnOpenMPCanonicalLoop(Stmt *AStmt) { ASTContext &Ctx = getASTContext(); // Extract the common elements of ForStmt and CXXForRangeStmt: @@ -5946,8 +5973,8 @@ StmtResult Sema::ActOnOpenMPCanonicalLoop(Stmt *AStmt) { if (IncBin->getOpcode() == BO_AddAssign) { Step = IncBin->getRHS(); } else if (IncBin->getOpcode() == BO_SubAssign) { - Step = - AssertSuccess(BuildUnaryOp(nullptr, {}, UO_Minus, IncBin->getRHS())); + Step = AssertSuccess( + SemaRef.BuildUnaryOp(nullptr, {}, UO_Minus, IncBin->getRHS())); } else llvm_unreachable("unhandled binary increment operator"); } else if (auto *CondCXXOp = dyn_cast(Inc)) { @@ -5965,7 +5992,7 @@ StmtResult Sema::ActOnOpenMPCanonicalLoop(Stmt *AStmt) { break; case OO_MinusEqual: Step = AssertSuccess( - BuildUnaryOp(nullptr, {}, UO_Minus, CondCXXOp->getArg(1))); + SemaRef.BuildUnaryOp(nullptr, {}, UO_Minus, CondCXXOp->getArg(1))); break; default: llvm_unreachable("unhandled overloaded increment operator"); @@ -5974,16 +6001,17 @@ StmtResult Sema::ActOnOpenMPCanonicalLoop(Stmt *AStmt) { llvm_unreachable("unknown increment expression"); CapturedStmt *DistanceFunc = - buildDistanceFunc(*this, LogicalTy, CondRel, LHS, RHS, Step); + buildDistanceFunc(SemaRef, LogicalTy, CondRel, LHS, RHS, Step); CapturedStmt *LoopVarFunc = buildLoopVarFunc( - *this, LVTy, LogicalTy, CounterRef, Step, isa(AStmt)); - DeclRefExpr *LVRef = BuildDeclRefExpr(LUVDecl, LUVDecl->getType(), VK_LValue, - {}, nullptr, nullptr, {}, nullptr); + SemaRef, LVTy, LogicalTy, CounterRef, Step, isa(AStmt)); + DeclRefExpr *LVRef = + SemaRef.BuildDeclRefExpr(LUVDecl, LUVDecl->getType(), VK_LValue, {}, + nullptr, nullptr, {}, nullptr); return OMPCanonicalLoop::create(getASTContext(), AStmt, DistanceFunc, LoopVarFunc, LVRef); } -StmtResult Sema::ActOnOpenMPLoopnest(Stmt *AStmt) { +StmtResult SemaOpenMP::ActOnOpenMPLoopnest(Stmt *AStmt) { // Handle a literal loop. if (isa(AStmt) || isa(AStmt)) return ActOnOpenMPCanonicalLoop(AStmt); @@ -6128,7 +6156,7 @@ processImplicitMapsWithDefaultMappers(Sema &S, DSAStackTy *Stack, continue; CXXScopeSpec MapperIdScopeSpec; DeclarationNameInfo MapperId; - if (OMPClause *NewClause = S.ActOnOpenMPMapClause( + if (OMPClause *NewClause = S.OpenMP().ActOnOpenMPMapClause( nullptr, C->getMapTypeModifiers(), C->getMapTypeModifiersLoc(), MapperIdScopeSpec, MapperId, C->getMapType(), /*IsMapTypeImplicit=*/true, SourceLocation(), SourceLocation(), @@ -6210,14 +6238,12 @@ static bool teamsLoopCanBeParallelFor(Stmt *AStmt, Sema &SemaRef) { return Checker.teamsLoopCanBeParallelFor(); } -bool Sema::mapLoopConstruct(llvm::SmallVector &ClausesWithoutBind, - ArrayRef Clauses, - OpenMPBindClauseKind &BindKind, - OpenMPDirectiveKind &Kind, - OpenMPDirectiveKind &PrevMappedDirective, - SourceLocation StartLoc, SourceLocation EndLoc, - const DeclarationNameInfo &DirName, - OpenMPDirectiveKind CancelRegion) { +bool SemaOpenMP::mapLoopConstruct( + llvm::SmallVector &ClausesWithoutBind, + ArrayRef Clauses, OpenMPBindClauseKind &BindKind, + OpenMPDirectiveKind &Kind, OpenMPDirectiveKind &PrevMappedDirective, + SourceLocation StartLoc, SourceLocation EndLoc, + const DeclarationNameInfo &DirName, OpenMPDirectiveKind CancelRegion) { bool UseClausesWithoutBind = false; @@ -6299,7 +6325,7 @@ bool Sema::mapLoopConstruct(llvm::SmallVector &ClausesWithoutBind, return UseClausesWithoutBind; } -StmtResult Sema::ActOnOpenMPExecutableDirective( +StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName, OpenMPDirectiveKind CancelRegion, ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, @@ -6324,8 +6350,8 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( } // First check CancelRegion which is then used in checkNestingOfRegions. - if (checkCancelRegion(*this, Kind, CancelRegion, StartLoc) || - checkNestingOfRegions(*this, DSAStack, DK, DirName, CancelRegion, + if (checkCancelRegion(SemaRef, Kind, CancelRegion, StartLoc) || + checkNestingOfRegions(SemaRef, DSAStack, DK, DirName, CancelRegion, BindKind, StartLoc)) { return StmtError(); } @@ -6344,13 +6370,14 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( } else { ClausesWithImplicit.append(Clauses.begin(), Clauses.end()); } - if (AStmt && !CurContext->isDependentContext() && Kind != OMPD_atomic && - Kind != OMPD_critical && Kind != OMPD_section && Kind != OMPD_master && - Kind != OMPD_masked && !isOpenMPLoopTransformationDirective(Kind)) { + if (AStmt && !SemaRef.CurContext->isDependentContext() && + Kind != OMPD_atomic && Kind != OMPD_critical && Kind != OMPD_section && + Kind != OMPD_master && Kind != OMPD_masked && + !isOpenMPLoopTransformationDirective(Kind)) { assert(isa(AStmt) && "Captured statement expected"); // Check default data sharing attributes for referenced variables. - DSAAttrChecker DSAChecker(DSAStack, *this, cast(AStmt)); + DSAAttrChecker DSAChecker(DSAStack, SemaRef, cast(AStmt)); int ThisCaptureLevel = getOpenMPCaptureLevels(Kind); Stmt *S = AStmt; while (--ThisCaptureLevel >= 0) @@ -6490,8 +6517,8 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( } // Build expressions for implicit maps of data members with 'default' // mappers. - if (LangOpts.OpenMP >= 50) - processImplicitMapsWithDefaultMappers(*this, DSAStack, + if (getLangOpts().OpenMP >= 50) + processImplicitMapsWithDefaultMappers(SemaRef, DSAStack, ClausesWithImplicit); } @@ -6505,7 +6532,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( case OMPD_simd: Res = ActOnOpenMPSimdDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); - if (LangOpts.OpenMP >= 50) + if (getLangOpts().OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_tile: @@ -6523,7 +6550,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( case OMPD_for_simd: Res = ActOnOpenMPForSimdDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); - if (LangOpts.OpenMP >= 50) + if (getLangOpts().OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_sections: @@ -6561,7 +6588,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPParallelForSimdDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_parallel); - if (LangOpts.OpenMP >= 50) + if (getLangOpts().OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_scope: @@ -6698,7 +6725,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPTaskLoopSimdDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_taskloop); - if (LangOpts.OpenMP >= 50) + if (getLangOpts().OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_master_taskloop: @@ -6715,13 +6742,13 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPMasterTaskLoopSimdDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_taskloop); - if (LangOpts.OpenMP >= 50) + if (getLangOpts().OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_masked_taskloop_simd: Res = ActOnOpenMPMaskedTaskLoopSimdDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); - if (LangOpts.OpenMP >= 51) { + if (getLangOpts().OpenMP >= 51) { AllowedNameModifiers.push_back(OMPD_taskloop); AllowedNameModifiers.push_back(OMPD_simd); } @@ -6735,7 +6762,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( case OMPD_parallel_masked_taskloop: Res = ActOnOpenMPParallelMaskedTaskLoopDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); - if (LangOpts.OpenMP >= 51) { + if (getLangOpts().OpenMP >= 51) { AllowedNameModifiers.push_back(OMPD_taskloop); AllowedNameModifiers.push_back(OMPD_parallel); } @@ -6745,13 +6772,13 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_taskloop); AllowedNameModifiers.push_back(OMPD_parallel); - if (LangOpts.OpenMP >= 50) + if (getLangOpts().OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_parallel_masked_taskloop_simd: Res = ActOnOpenMPParallelMaskedTaskLoopSimdDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); - if (LangOpts.OpenMP >= 51) { + if (getLangOpts().OpenMP >= 51) { AllowedNameModifiers.push_back(OMPD_taskloop); AllowedNameModifiers.push_back(OMPD_parallel); AllowedNameModifiers.push_back(OMPD_simd); @@ -6775,13 +6802,13 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPDistributeParallelForSimdDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_parallel); - if (LangOpts.OpenMP >= 50) + if (getLangOpts().OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_distribute_simd: Res = ActOnOpenMPDistributeSimdDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); - if (LangOpts.OpenMP >= 50) + if (getLangOpts().OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_target_parallel_for_simd: @@ -6789,14 +6816,14 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_target); AllowedNameModifiers.push_back(OMPD_parallel); - if (LangOpts.OpenMP >= 50) + if (getLangOpts().OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_target_simd: Res = ActOnOpenMPTargetSimdDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_target); - if (LangOpts.OpenMP >= 50) + if (getLangOpts().OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_teams_distribute: @@ -6806,14 +6833,14 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( case OMPD_teams_distribute_simd: Res = ActOnOpenMPTeamsDistributeSimdDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); - if (LangOpts.OpenMP >= 50) + if (getLangOpts().OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_teams_distribute_parallel_for_simd: Res = ActOnOpenMPTeamsDistributeParallelForSimdDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_parallel); - if (LangOpts.OpenMP >= 50) + if (getLangOpts().OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_teams_distribute_parallel_for: @@ -6842,14 +6869,14 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_target); AllowedNameModifiers.push_back(OMPD_parallel); - if (LangOpts.OpenMP >= 50) + if (getLangOpts().OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_target_teams_distribute_simd: Res = ActOnOpenMPTargetTeamsDistributeSimdDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_target); - if (LangOpts.OpenMP >= 50) + if (getLangOpts().OpenMP >= 50) AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_interop: @@ -6906,7 +6933,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( if (DSAStack->getDefaultDSA() == DSA_none || DSAStack->getDefaultDSA() == DSA_private || DSAStack->getDefaultDSA() == DSA_firstprivate) { - DSAAttrChecker DSAChecker(DSAStack, *this, nullptr); + DSAAttrChecker DSAChecker(DSAStack, SemaRef, nullptr); for (OMPClause *C : Clauses) { switch (C->getClauseKind()) { case OMPC_num_threads: @@ -7043,13 +7070,13 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( } if (!AllowedNameModifiers.empty()) - ErrorFound = checkIfClauses(*this, Kind, Clauses, AllowedNameModifiers) || + ErrorFound = checkIfClauses(SemaRef, Kind, Clauses, AllowedNameModifiers) || ErrorFound; if (ErrorFound) return StmtError(); - if (!CurContext->isDependentContext() && + if (!SemaRef.CurContext->isDependentContext() && isOpenMPTargetExecutionDirective(Kind) && !(DSAStack->hasRequiresDeclWithClause() || DSAStack->hasRequiresDeclWithClause() || @@ -7062,7 +7089,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( return Res; } -Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareSimdDirective( +SemaOpenMP::DeclGroupPtrTy SemaOpenMP::ActOnOpenMPDeclareSimdDirective( DeclGroupPtrTy DG, OMPDeclareSimdDeclAttr::BranchStateTy BS, Expr *Simdlen, ArrayRef Uniforms, ArrayRef Aligneds, ArrayRef Alignments, ArrayRef Linears, @@ -7297,13 +7324,15 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareSimdDirective( NewStep = PerformOpenMPImplicitIntegerConversion(Step->getExprLoc(), Step) .get(); if (NewStep) - NewStep = - VerifyIntegerConstantExpression(NewStep, /*FIXME*/ AllowFold).get(); + NewStep = SemaRef + .VerifyIntegerConstantExpression( + NewStep, /*FIXME*/ Sema::AllowFold) + .get(); } NewSteps.push_back(NewStep); } auto *NewAttr = OMPDeclareSimdDeclAttr::CreateImplicit( - Context, BS, SL.get(), const_cast(Uniforms.data()), + getASTContext(), BS, SL.get(), const_cast(Uniforms.data()), Uniforms.size(), const_cast(Aligneds.data()), Aligneds.size(), const_cast(NewAligns.data()), NewAligns.size(), const_cast(Linears.data()), Linears.size(), @@ -7336,7 +7365,7 @@ static void setPrototype(Sema &S, FunctionDecl *FD, FunctionDecl *FDWithProto, FD->setParams(Params); } -void Sema::ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(Decl *D) { +void SemaOpenMP::ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(Decl *D) { if (D->isInvalidDecl()) return; FunctionDecl *FD = nullptr; @@ -7349,7 +7378,7 @@ void Sema::ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(Decl *D) { // If we are instantiating templates we do *not* apply scoped assumptions but // only global ones. We apply scoped assumption to the template definition // though. - if (!inTemplateInstantiation()) { + if (!SemaRef.inTemplateInstantiation()) { for (OMPAssumeAttr *AA : OMPAssumeScoped) FD->addAttr(AA); } @@ -7357,10 +7386,10 @@ void Sema::ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(Decl *D) { FD->addAttr(AA); } -Sema::OMPDeclareVariantScope::OMPDeclareVariantScope(OMPTraitInfo &TI) +SemaOpenMP::OMPDeclareVariantScope::OMPDeclareVariantScope(OMPTraitInfo &TI) : TI(&TI), NameSuffix(TI.getMangledName()) {} -void Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( +void SemaOpenMP::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( Scope *S, Declarator &D, MultiTemplateParamsArg TemplateParamLists, SmallVectorImpl &Bases) { if (!D.getIdentifier()) @@ -7376,11 +7405,11 @@ void Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( return; const IdentifierInfo *BaseII = D.getIdentifier(); - LookupResult Lookup(*this, DeclarationName(BaseII), D.getIdentifierLoc(), - LookupOrdinaryName); - LookupParsedName(Lookup, S, &D.getCXXScopeSpec()); + LookupResult Lookup(SemaRef, DeclarationName(BaseII), D.getIdentifierLoc(), + Sema::LookupOrdinaryName); + SemaRef.LookupParsedName(Lookup, S, &D.getCXXScopeSpec()); - TypeSourceInfo *TInfo = GetTypeForDeclarator(D); + TypeSourceInfo *TInfo = SemaRef.GetTypeForDeclarator(D); QualType FType = TInfo->getType(); bool IsConstexpr = @@ -7409,7 +7438,7 @@ void Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( QualType UDeclTy = UDecl->getType(); if (!UDeclTy->isDependentType()) { - QualType NewType = Context.mergeFunctionTypes( + QualType NewType = getASTContext().mergeFunctionTypes( FType, UDeclTy, /* OfBlockPointer */ false, /* Unqualified */ false, /* AllowCXX */ true); if (NewType.isNull()) @@ -7425,7 +7454,7 @@ void Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( // If no base was found we create a declaration that we use as base. if (Bases.empty() && UseImplicitBase) { D.setFunctionDefinitionKind(FunctionDefinitionKind::Declaration); - Decl *BaseD = HandleDeclarator(S, D, TemplateParamLists); + Decl *BaseD = SemaRef.HandleDeclarator(S, D, TemplateParamLists); BaseD->setImplicit(true); if (auto *BaseTemplD = dyn_cast(BaseD)) Bases.push_back(BaseTemplD->getTemplatedDecl()); @@ -7437,18 +7466,18 @@ void Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope( MangledName += D.getIdentifier()->getName(); MangledName += getOpenMPVariantManglingSeparatorStr(); MangledName += DVScope.NameSuffix; - IdentifierInfo &VariantII = Context.Idents.get(MangledName); + IdentifierInfo &VariantII = getASTContext().Idents.get(MangledName); VariantII.setMangledOpenMPVariantName(true); D.SetIdentifier(&VariantII, D.getBeginLoc()); } -void Sema::ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope( +void SemaOpenMP::ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope( Decl *D, SmallVectorImpl &Bases) { // Do not mark function as is used to prevent its emission if this is the // only place where it is used. EnterExpressionEvaluationContext Unevaluated( - *this, Sema::ExpressionEvaluationContext::Unevaluated); + SemaRef, Sema::ExpressionEvaluationContext::Unevaluated); FunctionDecl *FD = nullptr; if (auto *UTemplDecl = dyn_cast(D)) @@ -7456,14 +7485,14 @@ void Sema::ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope( else FD = cast(D); auto *VariantFuncRef = DeclRefExpr::Create( - Context, NestedNameSpecifierLoc(), SourceLocation(), FD, + getASTContext(), NestedNameSpecifierLoc(), SourceLocation(), FD, /* RefersToEnclosingVariableOrCapture */ false, /* NameLoc */ FD->getLocation(), FD->getType(), ExprValueKind::VK_PRValue); OMPDeclareVariantScope &DVScope = OMPDeclareVariantScopes.back(); auto *OMPDeclareVariantA = OMPDeclareVariantAttr::CreateImplicit( - Context, VariantFuncRef, DVScope.TI, + getASTContext(), VariantFuncRef, DVScope.TI, /*NothingArgs=*/nullptr, /*NothingArgsSize=*/0, /*NeedDevicePtrArgs=*/nullptr, /*NeedDevicePtrArgsSize=*/0, /*AppendArgs=*/nullptr, /*AppendArgsSize=*/0); @@ -7471,10 +7500,11 @@ void Sema::ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope( BaseFD->addAttr(OMPDeclareVariantA); } -ExprResult Sema::ActOnOpenMPCall(ExprResult Call, Scope *Scope, - SourceLocation LParenLoc, - MultiExprArg ArgExprs, - SourceLocation RParenLoc, Expr *ExecConfig) { +ExprResult SemaOpenMP::ActOnOpenMPCall(ExprResult Call, Scope *Scope, + SourceLocation LParenLoc, + MultiExprArg ArgExprs, + SourceLocation RParenLoc, + Expr *ExecConfig) { // The common case is a regular call we do not want to specialize at all. Try // to make that case fast by bailing early. CallExpr *CE = dyn_cast(Call.get()); @@ -7485,7 +7515,7 @@ ExprResult Sema::ActOnOpenMPCall(ExprResult Call, Scope *Scope, if (!CalleeFnDecl) return Call; - if (LangOpts.OpenMP >= 51 && CalleeFnDecl->getIdentifier() && + if (getLangOpts().OpenMP >= 51 && CalleeFnDecl->getIdentifier() && CalleeFnDecl->getName().starts_with_insensitive("omp_")) { // checking for any calls inside an Order region if (Scope && Scope->isOpenMPOrderClauseScope()) @@ -7504,7 +7534,8 @@ ExprResult Sema::ActOnOpenMPCall(ExprResult Call, Scope *Scope, << ISATrait; }; TargetOMPContext OMPCtx(Context, std::move(DiagUnknownTrait), - getCurFunctionDecl(), DSAStack->getConstructTraits()); + SemaRef.getCurFunctionDecl(), + DSAStack->getConstructTraits()); QualType CalleeFnType = CalleeFnDecl->getType(); @@ -7549,7 +7580,7 @@ ExprResult Sema::ActOnOpenMPCall(ExprResult Call, Scope *Scope, // different type than the base function. This is intended and OK but if // we cannot create a call the difference is not in the "implementation // defined range" we allow. - Sema::TentativeAnalysisScope Trap(*this); + Sema::TentativeAnalysisScope Trap(SemaRef); if (auto *SpecializedMethod = dyn_cast(BestDecl)) { auto *MemberCall = dyn_cast(CE); @@ -7558,12 +7589,12 @@ ExprResult Sema::ActOnOpenMPCall(ExprResult Call, Scope *Scope, /* IsArrow */ false, SpecializedMethod, Context.BoundMemberTy, MemberCall->getValueKind(), MemberCall->getObjectKind()); } - NewCall = BuildCallExpr(Scope, BestExpr, LParenLoc, ArgExprs, RParenLoc, - ExecConfig); + NewCall = SemaRef.BuildCallExpr(Scope, BestExpr, LParenLoc, ArgExprs, + RParenLoc, ExecConfig); if (NewCall.isUsable()) { if (CallExpr *NCE = dyn_cast(NewCall.get())) { FunctionDecl *NewCalleeFnDecl = NCE->getDirectCallee(); - QualType NewType = Context.mergeFunctionTypes( + QualType NewType = getASTContext().mergeFunctionTypes( CalleeFnType, NewCalleeFnDecl->getType(), /* OfBlockPointer */ false, /* Unqualified */ false, /* AllowCXX */ true); @@ -7581,14 +7612,16 @@ ExprResult Sema::ActOnOpenMPCall(ExprResult Call, Scope *Scope, if (!NewCall.isUsable()) return Call; - return PseudoObjectExpr::Create(Context, CE, {NewCall.get()}, 0); + return PseudoObjectExpr::Create(getASTContext(), CE, {NewCall.get()}, 0); } std::optional> -Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, - Expr *VariantRef, OMPTraitInfo &TI, - unsigned NumAppendArgs, - SourceRange SR) { +SemaOpenMP::checkOpenMPDeclareVariantFunction(SemaOpenMP::DeclGroupPtrTy DG, + Expr *VariantRef, + OMPTraitInfo &TI, + unsigned NumAppendArgs, + SourceRange SR) { + ASTContext &Context = getASTContext(); if (!DG || DG.get().isNull()) return std::nullopt; @@ -7631,7 +7664,7 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, // Check if the function was emitted already. const FunctionDecl *Definition; if (!FD->isThisDeclarationADefinition() && FD->isDefined(Definition) && - (LangOpts.EmitAllDecls || Context.DeclMustBeEmitted(Definition))) + (getLangOpts().EmitAllDecls || Context.DeclMustBeEmitted(Definition))) Diag(SR.getBegin(), diag::warn_omp_declare_variant_after_emitted) << FD->getLocation(); @@ -7654,7 +7687,7 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, // Deal with non-constant score and user condition expressions. auto HandleNonConstantScoresAndConditions = [this](Expr *&E, bool IsScore) -> bool { - if (!E || E->isIntegerConstantExpr(Context)) + if (!E || E->isIntegerConstantExpr(getASTContext())) return false; if (IsScore) { @@ -7686,9 +7719,9 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, // Adjust the function type to account for an extra omp_interop_t for each // specified in the append_args clause. const TypeDecl *TD = nullptr; - LookupResult Result(*this, &Context.Idents.get("omp_interop_t"), + LookupResult Result(SemaRef, &Context.Idents.get("omp_interop_t"), SR.getBegin(), Sema::LookupOrdinaryName); - if (LookupName(Result, getCurScope())) { + if (SemaRef.LookupName(Result, SemaRef.getCurScope())) { NamedDecl *ND = Result.getFoundDecl(); TD = dyn_cast_or_null(ND); } @@ -7711,7 +7744,7 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, // Convert VariantRef expression to the type of the original function to // resolve possible conflicts. ExprResult VariantRefCast = VariantRef; - if (LangOpts.CPlusPlus) { + if (getLangOpts().CPlusPlus) { QualType FnPtrType; auto *Method = dyn_cast(FD); if (Method && !Method->isStatic()) { @@ -7722,9 +7755,9 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, { // Build adrr_of unary op to correctly handle type checks for member // functions. - Sema::TentativeAnalysisScope Trap(*this); - ER = CreateBuiltinUnaryOp(VariantRef->getBeginLoc(), UO_AddrOf, - VariantRef); + Sema::TentativeAnalysisScope Trap(SemaRef); + ER = SemaRef.CreateBuiltinUnaryOp(VariantRef->getBeginLoc(), UO_AddrOf, + VariantRef); } if (!ER.isUsable()) { Diag(VariantRef->getExprLoc(), diag::err_omp_function_expected) @@ -7737,9 +7770,9 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, } QualType VarianPtrType = Context.getPointerType(VariantRef->getType()); if (VarianPtrType.getUnqualifiedType() != FnPtrType.getUnqualifiedType()) { - ImplicitConversionSequence ICS = TryImplicitConversion( + ImplicitConversionSequence ICS = SemaRef.TryImplicitConversion( VariantRef, FnPtrType.getUnqualifiedType(), - /*SuppressUserConversions=*/false, AllowedExplicit::None, + /*SuppressUserConversions=*/false, Sema::AllowedExplicit::None, /*InOverloadResolution=*/false, /*CStyle=*/false, /*AllowObjCWritebackConversion=*/false); @@ -7751,8 +7784,8 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, << (NumAppendArgs ? 1 : 0) << VariantRef->getSourceRange(); return std::nullopt; } - VariantRefCast = PerformImplicitConversion( - VariantRef, FnPtrType.getUnqualifiedType(), AA_Converting); + VariantRefCast = SemaRef.PerformImplicitConversion( + VariantRef, FnPtrType.getUnqualifiedType(), Sema::AA_Converting); if (!VariantRefCast.isUsable()) return std::nullopt; } @@ -7765,7 +7798,7 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, } } - ExprResult ER = CheckPlaceholderExpr(VariantRefCast.get()); + ExprResult ER = SemaRef.CheckPlaceholderExpr(VariantRefCast.get()); if (!ER.isUsable() || !ER.get()->IgnoreParenImpCasts()->getType()->isFunctionType()) { Diag(VariantRef->getExprLoc(), diag::err_omp_function_expected) @@ -7795,7 +7828,7 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, } // Check if function types are compatible in C. - if (!LangOpts.CPlusPlus) { + if (!getLangOpts().CPlusPlus) { QualType NewType = Context.mergeFunctionTypes(AdjustedFnType, NewFD->getType()); if (NewType.isNull()) { @@ -7807,9 +7840,9 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, } if (NewType->isFunctionProtoType()) { if (FD->getType()->isFunctionNoProtoType()) - setPrototype(*this, FD, NewFD, NewType); + setPrototype(SemaRef, FD, NewFD, NewType); else if (NewFD->getType()->isFunctionNoProtoType()) - setPrototype(*this, NewFD, FD, NewType); + setPrototype(SemaRef, NewFD, FD, NewType); } } @@ -7872,15 +7905,15 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, } // Check general compatibility. - if (areMultiversionVariantFunctionsCompatible( + if (SemaRef.areMultiversionVariantFunctionsCompatible( FD, NewFD, PartialDiagnostic::NullDiagnostic(), PartialDiagnosticAt(SourceLocation(), PartialDiagnostic::NullDiagnostic()), PartialDiagnosticAt( VariantRef->getExprLoc(), - PDiag(diag::err_omp_declare_variant_doesnt_support)), + SemaRef.PDiag(diag::err_omp_declare_variant_doesnt_support)), PartialDiagnosticAt(VariantRef->getExprLoc(), - PDiag(diag::err_omp_declare_variant_diff) + SemaRef.PDiag(diag::err_omp_declare_variant_diff) << FD->getLocation()), /*TemplatesSupported=*/true, /*ConstexprSupported=*/false, /*CLinkageMayDiffer=*/true)) @@ -7888,7 +7921,7 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, return std::make_pair(FD, cast(DRE)); } -void Sema::ActOnOpenMPDeclareVariantDirective( +void SemaOpenMP::ActOnOpenMPDeclareVariantDirective( FunctionDecl *FD, Expr *VariantRef, OMPTraitInfo &TI, ArrayRef AdjustArgsNothing, ArrayRef AdjustArgsNeedDevicePtr, @@ -7906,7 +7939,7 @@ void Sema::ActOnOpenMPDeclareVariantDirective( if (!AllAdjustArgs.empty() || !AppendArgs.empty()) { VariantMatchInfo VMI; - TI.getAsVariantMatchInfo(Context, VMI); + TI.getAsVariantMatchInfo(getASTContext(), VMI); if (!llvm::is_contained( VMI.ConstructTraits, llvm::omp::TraitProperty::construct_dispatch_dispatch)) { @@ -7949,18 +7982,18 @@ void Sema::ActOnOpenMPDeclareVariantDirective( } auto *NewAttr = OMPDeclareVariantAttr::CreateImplicit( - Context, VariantRef, &TI, const_cast(AdjustArgsNothing.data()), - AdjustArgsNothing.size(), + getASTContext(), VariantRef, &TI, + const_cast(AdjustArgsNothing.data()), AdjustArgsNothing.size(), const_cast(AdjustArgsNeedDevicePtr.data()), AdjustArgsNeedDevicePtr.size(), const_cast(AppendArgs.data()), AppendArgs.size(), SR); FD->addAttr(NewAttr); } -StmtResult Sema::ActOnOpenMPParallelDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult +SemaOpenMP::ActOnOpenMPParallelDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); @@ -7972,11 +8005,11 @@ StmtResult Sema::ActOnOpenMPParallelDirective(ArrayRef Clauses, // longjmp() and throw() must not violate the entry/exit criteria. CS->getCapturedDecl()->setNothrow(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); - return OMPParallelDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt, - DSAStack->getTaskgroupReductionRef(), - DSAStack->isCancelRegion()); + return OMPParallelDirective::Create( + getASTContext(), StartLoc, EndLoc, Clauses, AStmt, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } namespace { @@ -8226,7 +8259,7 @@ bool OpenMPIterationSpaceChecker::setStep(Expr *NewStep, bool Subtract) { if (!NewStep->isValueDependent()) { // Check that the step is integer expression. SourceLocation StepLoc = NewStep->getBeginLoc(); - ExprResult Val = SemaRef.PerformOpenMPImplicitIntegerConversion( + ExprResult Val = SemaRef.OpenMP().PerformOpenMPImplicitIntegerConversion( StepLoc, getExprAsWritten(NewStep)); if (Val.isInvalid()) return true; @@ -9248,7 +9281,7 @@ DeclRefExpr *OpenMPIterationSpaceChecker::buildCounterVar( DSAStackTy &DSA) const { auto *VD = dyn_cast(LCDecl); if (!VD) { - VD = SemaRef.isOpenMPCapturedDecl(LCDecl); + VD = SemaRef.OpenMP().isOpenMPCapturedDecl(LCDecl); DeclRefExpr *Ref = buildDeclRefExpr( SemaRef, VD, VD->getType().getNonReferenceType(), DefaultLoc); const DSAStackTy::DSAVarData Data = @@ -9321,14 +9354,15 @@ Expr *OpenMPIterationSpaceChecker::buildOrderedLoopData( } } // namespace -void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) { +void SemaOpenMP::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, + Stmt *Init) { assert(getLangOpts().OpenMP && "OpenMP is not active."); assert(Init && "Expected loop in canonical form."); unsigned AssociatedLoops = DSAStack->getAssociatedLoops(); if (AssociatedLoops > 0 && isOpenMPLoopDirective(DSAStack->getCurrentDirective())) { DSAStack->loopStart(); - OpenMPIterationSpaceChecker ISC(*this, /*SupportsNonRectangular=*/true, + OpenMPIterationSpaceChecker ISC(SemaRef, /*SupportsNonRectangular=*/true, *DSAStack, ForLoc); if (!ISC.checkAndSetInit(Init, /*EmitDiags=*/false)) { if (ValueDecl *D = ISC.getLoopDecl()) { @@ -9338,7 +9372,7 @@ void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) { if (VarDecl *Private = isOpenMPCapturedDecl(D)) { VD = Private; } else { - PrivateRef = buildCapture(*this, D, ISC.getLoopDeclRefExpr(), + PrivateRef = buildCapture(SemaRef, D, ISC.getLoopDeclRefExpr(), /*WithInit=*/false); VD = cast(PrivateRef->getDecl()); } @@ -9348,10 +9382,10 @@ void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) { if (LD != D->getCanonicalDecl()) { DSAStack->resetPossibleLoopCounter(); if (auto *Var = dyn_cast_or_null(LD)) - MarkDeclarationsReferencedInExpr( - buildDeclRefExpr(*this, const_cast(Var), - Var->getType().getNonLValueExprType(Context), - ForLoc, /*RefersToCapture=*/true)); + SemaRef.MarkDeclarationsReferencedInExpr(buildDeclRefExpr( + SemaRef, const_cast(Var), + Var->getType().getNonLValueExprType(getASTContext()), ForLoc, + /*RefersToCapture=*/true)); } OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); // OpenMP [2.14.1.1, Data-sharing Attribute Rules for Variables @@ -9372,8 +9406,8 @@ void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) { : OMPC_private; if (((isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown && DVar.CKind != PredeterminedCKind && DVar.RefExpr && - (LangOpts.OpenMP <= 45 || (DVar.CKind != OMPC_lastprivate && - DVar.CKind != OMPC_private))) || + (getLangOpts().OpenMP <= 45 || (DVar.CKind != OMPC_lastprivate && + DVar.CKind != OMPC_private))) || ((isOpenMPWorksharingDirective(DKind) || DKind == OMPD_taskloop || DKind == OMPD_master_taskloop || DKind == OMPD_masked_taskloop || DKind == OMPD_parallel_master_taskloop || @@ -9388,7 +9422,7 @@ void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) { << getOpenMPClauseName(PredeterminedCKind); if (DVar.RefExpr == nullptr) DVar.CKind = PredeterminedCKind; - reportOriginalDsa(*this, DSAStack, D, DVar, + reportOriginalDsa(SemaRef, DSAStack, D, DVar, /*IsLoopIterVar=*/true); } else if (LoopDeclRefExpr) { // Make the loop iteration variable private (for worksharing @@ -9428,7 +9462,7 @@ static bool checkOpenMPIterationSpace( unsigned CurrentNestedLoopCount, unsigned NestedLoopCount, unsigned TotalNestedLoopCount, Expr *CollapseLoopCountExpr, Expr *OrderedLoopCountExpr, - Sema::VarsWithInheritedDSAType &VarsWithImplicitDSA, + SemaOpenMP::VarsWithInheritedDSAType &VarsWithImplicitDSA, llvm::MutableArrayRef ResultIterSpaces, llvm::MapVector &Captures) { bool SupportsNonRectangular = !isOpenMPLoopTransformationDirective(DKind); @@ -9817,7 +9851,7 @@ static unsigned checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, Expr *OrderedLoopCountExpr, Stmt *AStmt, Sema &SemaRef, DSAStackTy &DSA, - Sema::VarsWithInheritedDSAType &VarsWithImplicitDSA, + SemaOpenMP::VarsWithInheritedDSAType &VarsWithImplicitDSA, OMPLoopBasedDirective::HelperExprs &Built) { unsigned NestedLoopCount = 1; bool SupportsNonPerfectlyNested = (SemaRef.LangOpts.OpenMP >= 50) && @@ -10566,7 +10600,8 @@ static bool checkGenericLoopLastprivate(Sema &S, ArrayRef Clauses, OpenMPDirectiveKind K, DSAStackTy *Stack); -bool Sema::checkLastPrivateForMappedDirectives(ArrayRef Clauses) { +bool SemaOpenMP::checkLastPrivateForMappedDirectives( + ArrayRef Clauses) { // Check for syntax of lastprivate // Param of the lastprivate have different meanings in the mapped directives @@ -10574,16 +10609,15 @@ bool Sema::checkLastPrivateForMappedDirectives(ArrayRef Clauses) { // "omp for" lastprivate vars must be shared if (getLangOpts().OpenMP >= 50 && DSAStack->getMappedDirective() == OMPD_loop && - checkGenericLoopLastprivate(*this, Clauses, OMPD_loop, DSAStack)) { + checkGenericLoopLastprivate(SemaRef, Clauses, OMPD_loop, DSAStack)) { return false; } return true; } -StmtResult -Sema::ActOnOpenMPSimdDirective(ArrayRef Clauses, Stmt *AStmt, - SourceLocation StartLoc, SourceLocation EndLoc, - VarsWithInheritedDSAType &VarsWithImplicitDSA) { +StmtResult SemaOpenMP::ActOnOpenMPSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) return StmtError(); @@ -10596,38 +10630,37 @@ Sema::ActOnOpenMPSimdDirective(ArrayRef Clauses, Stmt *AStmt, // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_simd, getCollapseNumberExpr(Clauses), getOrderedNumberExpr(Clauses), - AStmt, *this, *DSAStack, VarsWithImplicitDSA, B); + AStmt, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp simd loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); auto *SimdDirective = OMPSimdDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, DSAStack->getMappedDirective()); return SimdDirective; } -StmtResult -Sema::ActOnOpenMPForDirective(ArrayRef Clauses, Stmt *AStmt, - SourceLocation StartLoc, SourceLocation EndLoc, - VarsWithInheritedDSAType &VarsWithImplicitDSA) { +StmtResult SemaOpenMP::ActOnOpenMPForDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) return StmtError(); @@ -10640,32 +10673,32 @@ Sema::ActOnOpenMPForDirective(ArrayRef Clauses, Stmt *AStmt, // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_for, getCollapseNumberExpr(Clauses), getOrderedNumberExpr(Clauses), - AStmt, *this, *DSAStack, VarsWithImplicitDSA, B); + AStmt, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } auto *ForDirective = OMPForDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion(), DSAStack->getMappedDirective()); return ForDirective; } -StmtResult Sema::ActOnOpenMPForSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPForSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -10677,37 +10710,37 @@ StmtResult Sema::ActOnOpenMPForSimdDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_for_simd, getCollapseNumberExpr(Clauses), - getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack, + getOrderedNumberExpr(Clauses), AStmt, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for simd loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); - return OMPForSimdDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount, - Clauses, AStmt, B); + SemaRef.setFunctionHasBranchProtectedScope(); + return OMPForSimdDirective::Create(getASTContext(), StartLoc, EndLoc, + NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPSectionsDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult +SemaOpenMP::ActOnOpenMPSectionsDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); @@ -10736,23 +10769,23 @@ StmtResult Sema::ActOnOpenMPSectionsDirective(ArrayRef Clauses, return StmtError(); } - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); - return OMPSectionsDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt, - DSAStack->getTaskgroupReductionRef(), - DSAStack->isCancelRegion()); + return OMPSectionsDirective::Create( + getASTContext(), StartLoc, EndLoc, Clauses, AStmt, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } -StmtResult Sema::ActOnOpenMPSectionDirective(Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPSectionDirective(Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); DSAStack->setParentCancelRegion(DSAStack->isCancelRegion()); - return OMPSectionDirective::Create(Context, StartLoc, EndLoc, AStmt, + return OMPSectionDirective::Create(getASTContext(), StartLoc, EndLoc, AStmt, DSAStack->isCancelRegion()); } @@ -10764,10 +10797,10 @@ static Expr *getDirectCallExpr(Expr *E) { return nullptr; } -StmtResult Sema::ActOnOpenMPDispatchDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult +SemaOpenMP::ActOnOpenMPDispatchDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); @@ -10780,7 +10813,7 @@ StmtResult Sema::ActOnOpenMPDispatchDirective(ArrayRef Clauses, SourceLocation TargetCallLoc; - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { Expr *TargetCall = nullptr; auto *E = dyn_cast(S); @@ -10808,10 +10841,10 @@ StmtResult Sema::ActOnOpenMPDispatchDirective(ArrayRef Clauses, TargetCallLoc = TargetCall->getExprLoc(); } - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); - return OMPDispatchDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt, - TargetCallLoc); + return OMPDispatchDirective::Create(getASTContext(), StartLoc, EndLoc, + Clauses, AStmt, TargetCallLoc); } static bool checkGenericLoopLastprivate(Sema &S, ArrayRef Clauses, @@ -10839,7 +10872,7 @@ static bool checkGenericLoopLastprivate(Sema &S, ArrayRef Clauses, return ErrorFound; } -StmtResult Sema::ActOnOpenMPGenericLoopDirective( +StmtResult SemaOpenMP::ActOnOpenMPGenericLoopDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -10848,7 +10881,7 @@ StmtResult Sema::ActOnOpenMPGenericLoopDirective( // OpenMP 5.1 [2.11.7, loop construct, Restrictions] // A list item may not appear in a lastprivate clause unless it is the // loop iteration variable of a loop that is associated with the construct. - if (checkGenericLoopLastprivate(*this, Clauses, OMPD_loop, DSAStack)) + if (checkGenericLoopLastprivate(SemaRef, Clauses, OMPD_loop, DSAStack)) return StmtError(); auto *CS = cast(AStmt); @@ -10863,19 +10896,19 @@ StmtResult Sema::ActOnOpenMPGenericLoopDirective( // In presence of clause 'collapse', it will define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_loop, getCollapseNumberExpr(Clauses), getOrderedNumberExpr(Clauses), - AStmt, *this, *DSAStack, VarsWithImplicitDSA, B); + AStmt, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp loop exprs were not built"); - setFunctionHasBranchProtectedScope(); - return OMPGenericLoopDirective::Create(Context, StartLoc, EndLoc, + SemaRef.setFunctionHasBranchProtectedScope(); + return OMPGenericLoopDirective::Create(getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPTeamsGenericLoopDirective( +StmtResult SemaOpenMP::ActOnOpenMPTeamsGenericLoopDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -10884,7 +10917,7 @@ StmtResult Sema::ActOnOpenMPTeamsGenericLoopDirective( // OpenMP 5.1 [2.11.7, loop construct, Restrictions] // A list item may not appear in a lastprivate clause unless it is the // loop iteration variable of a loop that is associated with the construct. - if (checkGenericLoopLastprivate(*this, Clauses, OMPD_teams_loop, DSAStack)) + if (checkGenericLoopLastprivate(SemaRef, Clauses, OMPD_teams_loop, DSAStack)) return StmtError(); auto *CS = cast(AStmt); @@ -10909,22 +10942,22 @@ StmtResult Sema::ActOnOpenMPTeamsGenericLoopDirective( // In presence of clause 'collapse', it will define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_teams_loop, getCollapseNumberExpr(Clauses), - /*OrderedLoopCountExpr=*/nullptr, CS, *this, *DSAStack, + /*OrderedLoopCountExpr=*/nullptr, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp loop exprs were not built"); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); DSAStack->setParentTeamsRegionLoc(StartLoc); return OMPTeamsGenericLoopDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPTargetTeamsGenericLoopDirective( +StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsGenericLoopDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -10933,7 +10966,7 @@ StmtResult Sema::ActOnOpenMPTargetTeamsGenericLoopDirective( // OpenMP 5.1 [2.11.7, loop construct, Restrictions] // A list item may not appear in a lastprivate clause unless it is the // loop iteration variable of a loop that is associated with the construct. - if (checkGenericLoopLastprivate(*this, Clauses, OMPD_target_teams_loop, + if (checkGenericLoopLastprivate(SemaRef, Clauses, OMPD_target_teams_loop, DSAStack)) return StmtError(); @@ -10959,22 +10992,22 @@ StmtResult Sema::ActOnOpenMPTargetTeamsGenericLoopDirective( // In presence of clause 'collapse', it will define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_target_teams_loop, getCollapseNumberExpr(Clauses), - /*OrderedLoopCountExpr=*/nullptr, CS, *this, *DSAStack, + /*OrderedLoopCountExpr=*/nullptr, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp loop exprs were not built"); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPTargetTeamsGenericLoopDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, - teamsLoopCanBeParallelFor(AStmt, *this)); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + teamsLoopCanBeParallelFor(AStmt, SemaRef)); } -StmtResult Sema::ActOnOpenMPParallelGenericLoopDirective( +StmtResult SemaOpenMP::ActOnOpenMPParallelGenericLoopDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -10983,7 +11016,8 @@ StmtResult Sema::ActOnOpenMPParallelGenericLoopDirective( // OpenMP 5.1 [2.11.7, loop construct, Restrictions] // A list item may not appear in a lastprivate clause unless it is the // loop iteration variable of a loop that is associated with the construct. - if (checkGenericLoopLastprivate(*this, Clauses, OMPD_parallel_loop, DSAStack)) + if (checkGenericLoopLastprivate(SemaRef, Clauses, OMPD_parallel_loop, + DSAStack)) return StmtError(); auto *CS = cast(AStmt); @@ -11008,21 +11042,21 @@ StmtResult Sema::ActOnOpenMPParallelGenericLoopDirective( // In presence of clause 'collapse', it will define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_parallel_loop, getCollapseNumberExpr(Clauses), - /*OrderedLoopCountExpr=*/nullptr, CS, *this, *DSAStack, + /*OrderedLoopCountExpr=*/nullptr, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp loop exprs were not built"); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPParallelGenericLoopDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPTargetParallelGenericLoopDirective( +StmtResult SemaOpenMP::ActOnOpenMPTargetParallelGenericLoopDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -11031,7 +11065,7 @@ StmtResult Sema::ActOnOpenMPTargetParallelGenericLoopDirective( // OpenMP 5.1 [2.11.7, loop construct, Restrictions] // A list item may not appear in a lastprivate clause unless it is the // loop iteration variable of a loop that is associated with the construct. - if (checkGenericLoopLastprivate(*this, Clauses, OMPD_target_parallel_loop, + if (checkGenericLoopLastprivate(SemaRef, Clauses, OMPD_target_parallel_loop, DSAStack)) return StmtError(); @@ -11057,30 +11091,30 @@ StmtResult Sema::ActOnOpenMPTargetParallelGenericLoopDirective( // In presence of clause 'collapse', it will define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_target_parallel_loop, getCollapseNumberExpr(Clauses), - /*OrderedLoopCountExpr=*/nullptr, CS, *this, *DSAStack, + /*OrderedLoopCountExpr=*/nullptr, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp loop exprs were not built"); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPTargetParallelGenericLoopDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPSingleDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPSingleDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); assert(isa(AStmt) && "Captured statement expected"); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); // OpenMP [2.7.3, single Construct, Restrictions] // The copyprivate clause must not be used with the nowait clause. @@ -11099,33 +11133,35 @@ StmtResult Sema::ActOnOpenMPSingleDirective(ArrayRef Clauses, } } - return OMPSingleDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt); + return OMPSingleDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses, + AStmt); } -StmtResult Sema::ActOnOpenMPMasterDirective(Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPMasterDirective(Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); - return OMPMasterDirective::Create(Context, StartLoc, EndLoc, AStmt); + return OMPMasterDirective::Create(getASTContext(), StartLoc, EndLoc, AStmt); } -StmtResult Sema::ActOnOpenMPMaskedDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPMaskedDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); - return OMPMaskedDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt); + return OMPMaskedDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses, + AStmt); } -StmtResult Sema::ActOnOpenMPCriticalDirective( +StmtResult SemaOpenMP::ActOnOpenMPCriticalDirective( const DeclarationNameInfo &DirName, ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) { if (!AStmt) @@ -11146,7 +11182,7 @@ StmtResult Sema::ActOnOpenMPCriticalDirective( E->isInstantiationDependent()) { DependentHint = true; } else { - Hint = E->EvaluateKnownConstInt(Context); + Hint = E->EvaluateKnownConstInt(getASTContext()); HintLoc = C->getBeginLoc(); } } @@ -11165,7 +11201,7 @@ StmtResult Sema::ActOnOpenMPCriticalDirective( if (const auto *C = Pair.first->getSingleClause()) { Diag(C->getBeginLoc(), diag::note_omp_critical_hint_here) << 1 - << toString(C->getHint()->EvaluateKnownConstInt(Context), + << toString(C->getHint()->EvaluateKnownConstInt(getASTContext()), /*Radix=*/10, /*Signed=*/false); } else { Diag(Pair.first->getBeginLoc(), diag::note_omp_critical_no_hint) << 1; @@ -11173,16 +11209,16 @@ StmtResult Sema::ActOnOpenMPCriticalDirective( } } - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); - auto *Dir = OMPCriticalDirective::Create(Context, DirName, StartLoc, EndLoc, - Clauses, AStmt); + auto *Dir = OMPCriticalDirective::Create(getASTContext(), DirName, StartLoc, + EndLoc, Clauses, AStmt); if (!Pair.first && DirName.getName() && !DependentHint) DSAStack->addCriticalWithHint(Dir, Hint); return Dir; } -StmtResult Sema::ActOnOpenMPParallelForDirective( +StmtResult SemaOpenMP::ActOnOpenMPParallelForDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -11201,32 +11237,32 @@ StmtResult Sema::ActOnOpenMPParallelForDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_parallel_for, getCollapseNumberExpr(Clauses), - getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack, + getOrderedNumberExpr(Clauses), AStmt, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp parallel for loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPParallelForDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } -StmtResult Sema::ActOnOpenMPParallelForSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPParallelForSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -11245,34 +11281,33 @@ StmtResult Sema::ActOnOpenMPParallelForSimdDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_parallel_for_simd, getCollapseNumberExpr(Clauses), - getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack, + getOrderedNumberExpr(Clauses), AStmt, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPParallelForSimdDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult -Sema::ActOnOpenMPParallelMasterDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPParallelMasterDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); @@ -11285,17 +11320,16 @@ Sema::ActOnOpenMPParallelMasterDirective(ArrayRef Clauses, // longjmp() and throw() must not violate the entry/exit criteria. CS->getCapturedDecl()->setNothrow(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPParallelMasterDirective::Create( - Context, StartLoc, EndLoc, Clauses, AStmt, + getASTContext(), StartLoc, EndLoc, Clauses, AStmt, DSAStack->getTaskgroupReductionRef()); } -StmtResult -Sema::ActOnOpenMPParallelMaskedDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPParallelMaskedDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); @@ -11308,17 +11342,16 @@ Sema::ActOnOpenMPParallelMaskedDirective(ArrayRef Clauses, // longjmp() and throw() must not violate the entry/exit criteria. CS->getCapturedDecl()->setNothrow(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPParallelMaskedDirective::Create( - Context, StartLoc, EndLoc, Clauses, AStmt, + getASTContext(), StartLoc, EndLoc, Clauses, AStmt, DSAStack->getTaskgroupReductionRef()); } -StmtResult -Sema::ActOnOpenMPParallelSectionsDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPParallelSectionsDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); @@ -11348,10 +11381,10 @@ Sema::ActOnOpenMPParallelSectionsDirective(ArrayRef Clauses, return StmtError(); } - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPParallelSectionsDirective::Create( - Context, StartLoc, EndLoc, Clauses, AStmt, + getASTContext(), StartLoc, EndLoc, Clauses, AStmt, DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } @@ -11378,16 +11411,17 @@ static bool checkMutuallyExclusiveClauses( return ErrorFound; } -StmtResult Sema::ActOnOpenMPTaskDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPTaskDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); // OpenMP 5.0, 2.10.1 task Construct // If a detach clause appears on the directive, then a mergeable clause cannot // appear on the same directive. - if (checkMutuallyExclusiveClauses(*this, Clauses, + if (checkMutuallyExclusiveClauses(SemaRef, Clauses, {OMPC_detach, OMPC_mergeable})) return StmtError(); @@ -11399,26 +11433,26 @@ StmtResult Sema::ActOnOpenMPTaskDirective(ArrayRef Clauses, // longjmp() and throw() must not violate the entry/exit criteria. CS->getCapturedDecl()->setNothrow(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); - return OMPTaskDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt, - DSAStack->isCancelRegion()); + return OMPTaskDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses, + AStmt, DSAStack->isCancelRegion()); } -StmtResult Sema::ActOnOpenMPTaskyieldDirective(SourceLocation StartLoc, - SourceLocation EndLoc) { - return OMPTaskyieldDirective::Create(Context, StartLoc, EndLoc); +StmtResult SemaOpenMP::ActOnOpenMPTaskyieldDirective(SourceLocation StartLoc, + SourceLocation EndLoc) { + return OMPTaskyieldDirective::Create(getASTContext(), StartLoc, EndLoc); } -StmtResult Sema::ActOnOpenMPBarrierDirective(SourceLocation StartLoc, - SourceLocation EndLoc) { - return OMPBarrierDirective::Create(Context, StartLoc, EndLoc); +StmtResult SemaOpenMP::ActOnOpenMPBarrierDirective(SourceLocation StartLoc, + SourceLocation EndLoc) { + return OMPBarrierDirective::Create(getASTContext(), StartLoc, EndLoc); } -StmtResult Sema::ActOnOpenMPErrorDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc, - bool InExContext) { +StmtResult SemaOpenMP::ActOnOpenMPErrorDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc, + bool InExContext) { const OMPAtClause *AtC = OMPExecutableDirective::getSingleClause(Clauses); @@ -11443,12 +11477,13 @@ StmtResult Sema::ActOnOpenMPErrorDirective(ArrayRef Clauses, if (!SeverityC || SeverityC->getSeverityKind() != OMPC_SEVERITY_warning) return StmtError(); } - return OMPErrorDirective::Create(Context, StartLoc, EndLoc, Clauses); + return OMPErrorDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses); } -StmtResult Sema::ActOnOpenMPTaskwaitDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult +SemaOpenMP::ActOnOpenMPTaskwaitDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc) { const OMPNowaitClause *NowaitC = OMPExecutableDirective::getSingleClause(Clauses); bool HasDependC = @@ -11459,28 +11494,29 @@ StmtResult Sema::ActOnOpenMPTaskwaitDirective(ArrayRef Clauses, return StmtError(); } - return OMPTaskwaitDirective::Create(Context, StartLoc, EndLoc, Clauses); + return OMPTaskwaitDirective::Create(getASTContext(), StartLoc, EndLoc, + Clauses); } -StmtResult Sema::ActOnOpenMPTaskgroupDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult +SemaOpenMP::ActOnOpenMPTaskgroupDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); assert(isa(AStmt) && "Captured statement expected"); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); - return OMPTaskgroupDirective::Create(Context, StartLoc, EndLoc, Clauses, - AStmt, + return OMPTaskgroupDirective::Create(getASTContext(), StartLoc, EndLoc, + Clauses, AStmt, DSAStack->getTaskgroupReductionRef()); } -StmtResult Sema::ActOnOpenMPFlushDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPFlushDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc) { OMPFlushClause *FC = nullptr; OMPClause *OrderClause = nullptr; for (OMPClause *C : Clauses) { @@ -11514,12 +11550,12 @@ StmtResult Sema::ActOnOpenMPFlushDirective(ArrayRef Clauses, << getOpenMPClauseName(OrderClause->getClauseKind()); return StmtError(); } - return OMPFlushDirective::Create(Context, StartLoc, EndLoc, Clauses); + return OMPFlushDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses); } -StmtResult Sema::ActOnOpenMPDepobjDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPDepobjDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc) { if (Clauses.empty()) { Diag(StartLoc, diag::err_omp_depobj_expected); return StmtError(); @@ -11536,12 +11572,12 @@ StmtResult Sema::ActOnOpenMPDepobjDirective(ArrayRef Clauses, Diag(Clauses[0]->getEndLoc(), diag::err_omp_depobj_single_clause_expected); return StmtError(); } - return OMPDepobjDirective::Create(Context, StartLoc, EndLoc, Clauses); + return OMPDepobjDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses); } -StmtResult Sema::ActOnOpenMPScanDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPScanDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc) { // Check that exactly one clause is specified. if (Clauses.size() != 1) { Diag(Clauses.empty() ? EndLoc : Clauses[1]->getBeginLoc(), @@ -11566,13 +11602,13 @@ StmtResult Sema::ActOnOpenMPScanDirective(ArrayRef Clauses, return StmtError(); } DSAStack->setParentHasScanDirective(StartLoc); - return OMPScanDirective::Create(Context, StartLoc, EndLoc, Clauses); + return OMPScanDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses); } -StmtResult Sema::ActOnOpenMPOrderedDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult +SemaOpenMP::ActOnOpenMPOrderedDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { const OMPClause *DependFound = nullptr; const OMPClause *DependSourceClause = nullptr; const OMPClause *DependSinkClause = nullptr; @@ -11631,7 +11667,7 @@ StmtResult Sema::ActOnOpenMPOrderedDirective(ArrayRef Clauses, // An ordered construct with the simd clause is the only OpenMP construct // that can appear in the simd region. Diag(StartLoc, diag::err_omp_prohibited_region_simd) - << (LangOpts.OpenMP >= 50 ? 1 : 0); + << (getLangOpts().OpenMP >= 50 ? 1 : 0); ErrorFound = true; } else if ((DependFound || DoacrossFound) && (TC || SC)) { SourceLocation Loc = @@ -11678,10 +11714,11 @@ StmtResult Sema::ActOnOpenMPOrderedDirective(ArrayRef Clauses, if (AStmt) { assert(isa(AStmt) && "Captured statement expected"); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); } - return OMPOrderedDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt); + return OMPOrderedDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses, + AStmt); } namespace { @@ -12739,10 +12776,11 @@ bool OpenMPAtomicCompareCaptureChecker::checkStmt(Stmt *S, } } // namespace -StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPAtomicDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); // Register location of the first atomic directive. DSAStack->addAtomicDirectiveLoc(StartLoc); if (!AStmt) @@ -12945,7 +12983,7 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, << ErrorFound << NoteRange; return StmtError(); } - if (CurContext->isDependentContext()) + if (SemaRef.CurContext->isDependentContext()) V = X = nullptr; } else if (AtomicKind == OMPC_write) { enum { @@ -13007,7 +13045,7 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, << ErrorFound << NoteRange; return StmtError(); } - if (CurContext->isDependentContext()) + if (SemaRef.CurContext->isDependentContext()) E = X = nullptr; } else if (AtomicKind == OMPC_update || AtomicKind == OMPC_unknown) { // If clause is update: @@ -13018,7 +13056,7 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, // x binop= expr; // x = x binop expr; // x = expr binop x; - OpenMPAtomicUpdateChecker Checker(*this); + OpenMPAtomicUpdateChecker Checker(SemaRef); if (Checker.checkStatement( Body, (AtomicKind == OMPC_update) @@ -13026,7 +13064,7 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, : diag::err_omp_atomic_not_expression_statement, diag::note_omp_atomic_update)) return StmtError(); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { E = Checker.getExpr(); X = Checker.getX(); UE = Checker.getUpdateExpr(); @@ -13056,7 +13094,7 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, if (AtomicBinOp && AtomicBinOp->getOpcode() == BO_Assign) { V = AtomicBinOp->getLHS(); Body = AtomicBinOp->getRHS()->IgnoreParenImpCasts(); - OpenMPAtomicUpdateChecker Checker(*this); + OpenMPAtomicUpdateChecker Checker(SemaRef); if (Checker.checkStatement( Body, diag::err_omp_atomic_capture_not_expression_statement, diag::note_omp_atomic_update)) @@ -13081,7 +13119,7 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, Diag(NoteLoc, diag::note_omp_atomic_capture) << ErrorFound << NoteRange; return StmtError(); } - if (CurContext->isDependentContext()) + if (SemaRef.CurContext->isDependentContext()) UE = V = E = X = nullptr; } else { // If clause is a capture: @@ -13110,14 +13148,14 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, if (auto *EWC = dyn_cast(Second)) Second = EWC->getSubExpr()->IgnoreParenImpCasts(); // Need to find what subexpression is 'v' and what is 'x'. - OpenMPAtomicUpdateChecker Checker(*this); + OpenMPAtomicUpdateChecker Checker(SemaRef); bool IsUpdateExprFound = !Checker.checkStatement(Second); BinaryOperator *BinOp = nullptr; if (IsUpdateExprFound) { BinOp = dyn_cast(First); IsUpdateExprFound = BinOp && BinOp->getOpcode() == BO_Assign; } - if (IsUpdateExprFound && !CurContext->isDependentContext()) { + if (IsUpdateExprFound && !SemaRef.CurContext->isDependentContext()) { // { v = x; x++; } // { v = x; x--; } // { v = x; ++x; } @@ -13147,7 +13185,8 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, BinOp = dyn_cast(Second); IsUpdateExprFound = BinOp && BinOp->getOpcode() == BO_Assign; } - if (IsUpdateExprFound && !CurContext->isDependentContext()) { + if (IsUpdateExprFound && + !SemaRef.CurContext->isDependentContext()) { // { x++; v = x; } // { x--; v = x; } // { ++x; v = x; } @@ -13244,12 +13283,12 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, Diag(NoteLoc, diag::note_omp_atomic_capture) << ErrorFound << NoteRange; return StmtError(); } - if (CurContext->isDependentContext()) + if (SemaRef.CurContext->isDependentContext()) UE = V = E = X = nullptr; } else if (AtomicKind == OMPC_compare) { if (IsCompareCapture) { OpenMPAtomicCompareCaptureChecker::ErrorInfoTy ErrorInfo; - OpenMPAtomicCompareCaptureChecker Checker(*this); + OpenMPAtomicCompareCaptureChecker Checker(SemaRef); if (!Checker.checkStmt(Body, ErrorInfo)) { Diag(ErrorInfo.ErrorLoc, diag::err_omp_atomic_compare_capture) << ErrorInfo.ErrorRange; @@ -13269,7 +13308,7 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, IsPostfixUpdate = Checker.isPostfixUpdate(); } else { OpenMPAtomicCompareChecker::ErrorInfoTy ErrorInfo; - OpenMPAtomicCompareChecker Checker(*this); + OpenMPAtomicCompareChecker Checker(SemaRef); if (!Checker.checkStmt(Body, ErrorInfo)) { Diag(ErrorInfo.ErrorLoc, diag::err_omp_atomic_compare) << ErrorInfo.ErrorRange; @@ -13307,17 +13346,17 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, } } - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPAtomicDirective::Create( Context, StartLoc, EndLoc, Clauses, AStmt, {X, V, R, E, UE, D, CE, IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly}); } -StmtResult Sema::ActOnOpenMPTargetDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPTargetDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); @@ -13374,15 +13413,15 @@ StmtResult Sema::ActOnOpenMPTargetDirective(ArrayRef Clauses, } } - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); - return OMPTargetDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt); + return OMPTargetDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses, + AStmt); } -StmtResult -Sema::ActOnOpenMPTargetParallelDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPTargetParallelDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); @@ -13404,14 +13443,14 @@ Sema::ActOnOpenMPTargetParallelDirective(ArrayRef Clauses, CS->getCapturedDecl()->setNothrow(); } - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPTargetParallelDirective::Create( - Context, StartLoc, EndLoc, Clauses, AStmt, + getASTContext(), StartLoc, EndLoc, Clauses, AStmt, DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } -StmtResult Sema::ActOnOpenMPTargetParallelForDirective( +StmtResult SemaOpenMP::ActOnOpenMPTargetParallelForDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -13440,28 +13479,28 @@ StmtResult Sema::ActOnOpenMPTargetParallelForDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_target_parallel_for, getCollapseNumberExpr(Clauses), - getOrderedNumberExpr(Clauses), CS, *this, *DSAStack, + getOrderedNumberExpr(Clauses), CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp target parallel for loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPTargetParallelForDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } @@ -13498,10 +13537,10 @@ static bool isClauseMappable(ArrayRef Clauses) { return true; } -StmtResult Sema::ActOnOpenMPTargetDataDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult +SemaOpenMP::ActOnOpenMPTargetDataDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); @@ -13511,9 +13550,10 @@ StmtResult Sema::ActOnOpenMPTargetDataDirective(ArrayRef Clauses, // At least one map, use_device_addr or use_device_ptr clause must appear on // the directive. if (!hasClauses(Clauses, OMPC_map, OMPC_use_device_ptr) && - (LangOpts.OpenMP < 50 || !hasClauses(Clauses, OMPC_use_device_addr))) { + (getLangOpts().OpenMP < 50 || + !hasClauses(Clauses, OMPC_use_device_addr))) { StringRef Expected; - if (LangOpts.OpenMP < 50) + if (getLangOpts().OpenMP < 50) Expected = "'map' or 'use_device_ptr'"; else Expected = "'map', 'use_device_ptr', or 'use_device_addr'"; @@ -13522,16 +13562,15 @@ StmtResult Sema::ActOnOpenMPTargetDataDirective(ArrayRef Clauses, return StmtError(); } - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); - return OMPTargetDataDirective::Create(Context, StartLoc, EndLoc, Clauses, - AStmt); + return OMPTargetDataDirective::Create(getASTContext(), StartLoc, EndLoc, + Clauses, AStmt); } -StmtResult -Sema::ActOnOpenMPTargetEnterDataDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc, Stmt *AStmt) { +StmtResult SemaOpenMP::ActOnOpenMPTargetEnterDataDirective( + ArrayRef Clauses, SourceLocation StartLoc, + SourceLocation EndLoc, Stmt *AStmt) { if (!AStmt) return StmtError(); @@ -13561,14 +13600,13 @@ Sema::ActOnOpenMPTargetEnterDataDirective(ArrayRef Clauses, return StmtError(); } - return OMPTargetEnterDataDirective::Create(Context, StartLoc, EndLoc, Clauses, - AStmt); + return OMPTargetEnterDataDirective::Create(getASTContext(), StartLoc, EndLoc, + Clauses, AStmt); } -StmtResult -Sema::ActOnOpenMPTargetExitDataDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc, Stmt *AStmt) { +StmtResult SemaOpenMP::ActOnOpenMPTargetExitDataDirective( + ArrayRef Clauses, SourceLocation StartLoc, + SourceLocation EndLoc, Stmt *AStmt) { if (!AStmt) return StmtError(); @@ -13598,14 +13636,13 @@ Sema::ActOnOpenMPTargetExitDataDirective(ArrayRef Clauses, return StmtError(); } - return OMPTargetExitDataDirective::Create(Context, StartLoc, EndLoc, Clauses, - AStmt); + return OMPTargetExitDataDirective::Create(getASTContext(), StartLoc, EndLoc, + Clauses, AStmt); } -StmtResult Sema::ActOnOpenMPTargetUpdateDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc, - Stmt *AStmt) { +StmtResult SemaOpenMP::ActOnOpenMPTargetUpdateDirective( + ArrayRef Clauses, SourceLocation StartLoc, + SourceLocation EndLoc, Stmt *AStmt) { if (!AStmt) return StmtError(); @@ -13637,13 +13674,14 @@ StmtResult Sema::ActOnOpenMPTargetUpdateDirective(ArrayRef Clauses, return StmtError(); } - return OMPTargetUpdateDirective::Create(Context, StartLoc, EndLoc, Clauses, - AStmt); + return OMPTargetUpdateDirective::Create(getASTContext(), StartLoc, EndLoc, + Clauses, AStmt); } -StmtResult Sema::ActOnOpenMPTeamsDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPTeamsDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); @@ -13659,17 +13697,17 @@ StmtResult Sema::ActOnOpenMPTeamsDirective(ArrayRef Clauses, // longjmp() and throw() must not violate the entry/exit criteria. CS->getCapturedDecl()->setNothrow(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); DSAStack->setParentTeamsRegionLoc(StartLoc); - return OMPTeamsDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt); + return OMPTeamsDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses, + AStmt); } -StmtResult -Sema::ActOnOpenMPCancellationPointDirective(SourceLocation StartLoc, - SourceLocation EndLoc, - OpenMPDirectiveKind CancelRegion) { +StmtResult SemaOpenMP::ActOnOpenMPCancellationPointDirective( + SourceLocation StartLoc, SourceLocation EndLoc, + OpenMPDirectiveKind CancelRegion) { if (DSAStack->isParentNowaitRegion()) { Diag(StartLoc, diag::err_omp_parent_cancel_region_nowait) << 0; return StmtError(); @@ -13678,14 +13716,13 @@ Sema::ActOnOpenMPCancellationPointDirective(SourceLocation StartLoc, Diag(StartLoc, diag::err_omp_parent_cancel_region_ordered) << 0; return StmtError(); } - return OMPCancellationPointDirective::Create(Context, StartLoc, EndLoc, - CancelRegion); + return OMPCancellationPointDirective::Create(getASTContext(), StartLoc, + EndLoc, CancelRegion); } -StmtResult Sema::ActOnOpenMPCancelDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc, - OpenMPDirectiveKind CancelRegion) { +StmtResult SemaOpenMP::ActOnOpenMPCancelDirective( + ArrayRef Clauses, SourceLocation StartLoc, + SourceLocation EndLoc, OpenMPDirectiveKind CancelRegion) { if (DSAStack->isParentNowaitRegion()) { Diag(StartLoc, diag::err_omp_parent_cancel_region_nowait) << 1; return StmtError(); @@ -13695,7 +13732,7 @@ StmtResult Sema::ActOnOpenMPCancelDirective(ArrayRef Clauses, return StmtError(); } DSAStack->setParentCancelRegion(/*Cancel=*/true); - return OMPCancelDirective::Create(Context, StartLoc, EndLoc, Clauses, + return OMPCancelDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses, CancelRegion); } @@ -13726,7 +13763,7 @@ static bool checkReductionClauseWithNogroup(Sema &S, return false; } -StmtResult Sema::ActOnOpenMPTaskLoopDirective( +StmtResult SemaOpenMP::ActOnOpenMPTaskLoopDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -13738,33 +13775,33 @@ StmtResult Sema::ActOnOpenMPTaskLoopDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_taskloop, getCollapseNumberExpr(Clauses), - /*OrderedLoopCountExpr=*/nullptr, AStmt, *this, *DSAStack, - VarsWithImplicitDSA, B); + /*OrderedLoopCountExpr=*/nullptr, AStmt, SemaRef, + *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // The grainsize clause and num_tasks clause are mutually exclusive and may // not appear on the same taskloop directive. - if (checkMutuallyExclusiveClauses(*this, Clauses, + if (checkMutuallyExclusiveClauses(SemaRef, Clauses, {OMPC_grainsize, OMPC_num_tasks})) return StmtError(); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // If a reduction clause is present on the taskloop directive, the nogroup // clause must not be specified. - if (checkReductionClauseWithNogroup(*this, Clauses)) + if (checkReductionClauseWithNogroup(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); - return OMPTaskLoopDirective::Create(Context, StartLoc, EndLoc, + SemaRef.setFunctionHasBranchProtectedScope(); + return OMPTaskLoopDirective::Create(getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, DSAStack->isCancelRegion()); } -StmtResult Sema::ActOnOpenMPTaskLoopSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPTaskLoopSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -13776,21 +13813,21 @@ StmtResult Sema::ActOnOpenMPTaskLoopSimdDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_taskloop_simd, getCollapseNumberExpr(Clauses), - /*OrderedLoopCountExpr=*/nullptr, AStmt, *this, *DSAStack, - VarsWithImplicitDSA, B); + /*OrderedLoopCountExpr=*/nullptr, AStmt, SemaRef, + *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } @@ -13798,23 +13835,23 @@ StmtResult Sema::ActOnOpenMPTaskLoopSimdDirective( // OpenMP, [2.9.2 taskloop Construct, Restrictions] // The grainsize clause and num_tasks clause are mutually exclusive and may // not appear on the same taskloop directive. - if (checkMutuallyExclusiveClauses(*this, Clauses, + if (checkMutuallyExclusiveClauses(SemaRef, Clauses, {OMPC_grainsize, OMPC_num_tasks})) return StmtError(); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // If a reduction clause is present on the taskloop directive, the nogroup // clause must not be specified. - if (checkReductionClauseWithNogroup(*this, Clauses)) + if (checkReductionClauseWithNogroup(SemaRef, Clauses)) return StmtError(); - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); - return OMPTaskLoopSimdDirective::Create(Context, StartLoc, EndLoc, + SemaRef.setFunctionHasBranchProtectedScope(); + return OMPTaskLoopSimdDirective::Create(getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPMasterTaskLoopDirective( +StmtResult SemaOpenMP::ActOnOpenMPMasterTaskLoopDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -13826,33 +13863,33 @@ StmtResult Sema::ActOnOpenMPMasterTaskLoopDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_master_taskloop, getCollapseNumberExpr(Clauses), - /*OrderedLoopCountExpr=*/nullptr, AStmt, *this, *DSAStack, - VarsWithImplicitDSA, B); + /*OrderedLoopCountExpr=*/nullptr, AStmt, SemaRef, + *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // The grainsize clause and num_tasks clause are mutually exclusive and may // not appear on the same taskloop directive. - if (checkMutuallyExclusiveClauses(*this, Clauses, + if (checkMutuallyExclusiveClauses(SemaRef, Clauses, {OMPC_grainsize, OMPC_num_tasks})) return StmtError(); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // If a reduction clause is present on the taskloop directive, the nogroup // clause must not be specified. - if (checkReductionClauseWithNogroup(*this, Clauses)) + if (checkReductionClauseWithNogroup(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); - return OMPMasterTaskLoopDirective::Create(Context, StartLoc, EndLoc, + SemaRef.setFunctionHasBranchProtectedScope(); + return OMPMasterTaskLoopDirective::Create(getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, DSAStack->isCancelRegion()); } -StmtResult Sema::ActOnOpenMPMaskedTaskLoopDirective( +StmtResult SemaOpenMP::ActOnOpenMPMaskedTaskLoopDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -13864,33 +13901,33 @@ StmtResult Sema::ActOnOpenMPMaskedTaskLoopDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_masked_taskloop, getCollapseNumberExpr(Clauses), - /*OrderedLoopCountExpr=*/nullptr, AStmt, *this, *DSAStack, - VarsWithImplicitDSA, B); + /*OrderedLoopCountExpr=*/nullptr, AStmt, SemaRef, + *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // The grainsize clause and num_tasks clause are mutually exclusive and may // not appear on the same taskloop directive. - if (checkMutuallyExclusiveClauses(*this, Clauses, + if (checkMutuallyExclusiveClauses(SemaRef, Clauses, {OMPC_grainsize, OMPC_num_tasks})) return StmtError(); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // If a reduction clause is present on the taskloop directive, the nogroup // clause must not be specified. - if (checkReductionClauseWithNogroup(*this, Clauses)) + if (checkReductionClauseWithNogroup(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); - return OMPMaskedTaskLoopDirective::Create(Context, StartLoc, EndLoc, + SemaRef.setFunctionHasBranchProtectedScope(); + return OMPMaskedTaskLoopDirective::Create(getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, DSAStack->isCancelRegion()); } -StmtResult Sema::ActOnOpenMPMasterTaskLoopSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPMasterTaskLoopSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -13902,21 +13939,21 @@ StmtResult Sema::ActOnOpenMPMasterTaskLoopSimdDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_master_taskloop_simd, getCollapseNumberExpr(Clauses), - /*OrderedLoopCountExpr=*/nullptr, AStmt, *this, *DSAStack, - VarsWithImplicitDSA, B); + /*OrderedLoopCountExpr=*/nullptr, AStmt, SemaRef, + *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } @@ -13924,23 +13961,23 @@ StmtResult Sema::ActOnOpenMPMasterTaskLoopSimdDirective( // OpenMP, [2.9.2 taskloop Construct, Restrictions] // The grainsize clause and num_tasks clause are mutually exclusive and may // not appear on the same taskloop directive. - if (checkMutuallyExclusiveClauses(*this, Clauses, + if (checkMutuallyExclusiveClauses(SemaRef, Clauses, {OMPC_grainsize, OMPC_num_tasks})) return StmtError(); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // If a reduction clause is present on the taskloop directive, the nogroup // clause must not be specified. - if (checkReductionClauseWithNogroup(*this, Clauses)) + if (checkReductionClauseWithNogroup(SemaRef, Clauses)) return StmtError(); - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPMasterTaskLoopSimdDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPMaskedTaskLoopSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPMaskedTaskLoopSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -13952,21 +13989,21 @@ StmtResult Sema::ActOnOpenMPMaskedTaskLoopSimdDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_masked_taskloop_simd, getCollapseNumberExpr(Clauses), - /*OrderedLoopCountExpr=*/nullptr, AStmt, *this, *DSAStack, - VarsWithImplicitDSA, B); + /*OrderedLoopCountExpr=*/nullptr, AStmt, SemaRef, + *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } @@ -13974,23 +14011,23 @@ StmtResult Sema::ActOnOpenMPMaskedTaskLoopSimdDirective( // OpenMP, [2.9.2 taskloop Construct, Restrictions] // The grainsize clause and num_tasks clause are mutually exclusive and may // not appear on the same taskloop directive. - if (checkMutuallyExclusiveClauses(*this, Clauses, + if (checkMutuallyExclusiveClauses(SemaRef, Clauses, {OMPC_grainsize, OMPC_num_tasks})) return StmtError(); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // If a reduction clause is present on the taskloop directive, the nogroup // clause must not be specified. - if (checkReductionClauseWithNogroup(*this, Clauses)) + if (checkReductionClauseWithNogroup(SemaRef, Clauses)) return StmtError(); - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPMaskedTaskLoopSimdDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPParallelMasterTaskLoopDirective( +StmtResult SemaOpenMP::ActOnOpenMPParallelMasterTaskLoopDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14021,33 +14058,33 @@ StmtResult Sema::ActOnOpenMPParallelMasterTaskLoopDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_parallel_master_taskloop, getCollapseNumberExpr(Clauses), - /*OrderedLoopCountExpr=*/nullptr, CS, *this, *DSAStack, + /*OrderedLoopCountExpr=*/nullptr, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // The grainsize clause and num_tasks clause are mutually exclusive and may // not appear on the same taskloop directive. - if (checkMutuallyExclusiveClauses(*this, Clauses, + if (checkMutuallyExclusiveClauses(SemaRef, Clauses, {OMPC_grainsize, OMPC_num_tasks})) return StmtError(); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // If a reduction clause is present on the taskloop directive, the nogroup // clause must not be specified. - if (checkReductionClauseWithNogroup(*this, Clauses)) + if (checkReductionClauseWithNogroup(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPParallelMasterTaskLoopDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, DSAStack->isCancelRegion()); } -StmtResult Sema::ActOnOpenMPParallelMaskedTaskLoopDirective( +StmtResult SemaOpenMP::ActOnOpenMPParallelMaskedTaskLoopDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14078,33 +14115,33 @@ StmtResult Sema::ActOnOpenMPParallelMaskedTaskLoopDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_parallel_masked_taskloop, getCollapseNumberExpr(Clauses), - /*OrderedLoopCountExpr=*/nullptr, CS, *this, *DSAStack, + /*OrderedLoopCountExpr=*/nullptr, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // The grainsize clause and num_tasks clause are mutually exclusive and may // not appear on the same taskloop directive. - if (checkMutuallyExclusiveClauses(*this, Clauses, + if (checkMutuallyExclusiveClauses(SemaRef, Clauses, {OMPC_grainsize, OMPC_num_tasks})) return StmtError(); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // If a reduction clause is present on the taskloop directive, the nogroup // clause must not be specified. - if (checkReductionClauseWithNogroup(*this, Clauses)) + if (checkReductionClauseWithNogroup(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPParallelMaskedTaskLoopDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, DSAStack->isCancelRegion()); } -StmtResult Sema::ActOnOpenMPParallelMasterTaskLoopSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPParallelMasterTaskLoopSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14135,21 +14172,21 @@ StmtResult Sema::ActOnOpenMPParallelMasterTaskLoopSimdDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_parallel_master_taskloop_simd, getCollapseNumberExpr(Clauses), - /*OrderedLoopCountExpr=*/nullptr, CS, *this, *DSAStack, + /*OrderedLoopCountExpr=*/nullptr, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } @@ -14157,23 +14194,23 @@ StmtResult Sema::ActOnOpenMPParallelMasterTaskLoopSimdDirective( // OpenMP, [2.9.2 taskloop Construct, Restrictions] // The grainsize clause and num_tasks clause are mutually exclusive and may // not appear on the same taskloop directive. - if (checkMutuallyExclusiveClauses(*this, Clauses, + if (checkMutuallyExclusiveClauses(SemaRef, Clauses, {OMPC_grainsize, OMPC_num_tasks})) return StmtError(); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // If a reduction clause is present on the taskloop directive, the nogroup // clause must not be specified. - if (checkReductionClauseWithNogroup(*this, Clauses)) + if (checkReductionClauseWithNogroup(SemaRef, Clauses)) return StmtError(); - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPParallelMasterTaskLoopSimdDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPParallelMaskedTaskLoopSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPParallelMaskedTaskLoopSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14204,21 +14241,21 @@ StmtResult Sema::ActOnOpenMPParallelMaskedTaskLoopSimdDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_parallel_masked_taskloop_simd, getCollapseNumberExpr(Clauses), - /*OrderedLoopCountExpr=*/nullptr, CS, *this, *DSAStack, + /*OrderedLoopCountExpr=*/nullptr, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } @@ -14226,23 +14263,23 @@ StmtResult Sema::ActOnOpenMPParallelMaskedTaskLoopSimdDirective( // OpenMP, [2.9.2 taskloop Construct, Restrictions] // The grainsize clause and num_tasks clause are mutually exclusive and may // not appear on the same taskloop directive. - if (checkMutuallyExclusiveClauses(*this, Clauses, + if (checkMutuallyExclusiveClauses(SemaRef, Clauses, {OMPC_grainsize, OMPC_num_tasks})) return StmtError(); // OpenMP, [2.9.2 taskloop Construct, Restrictions] // If a reduction clause is present on the taskloop directive, the nogroup // clause must not be specified. - if (checkReductionClauseWithNogroup(*this, Clauses)) + if (checkReductionClauseWithNogroup(SemaRef, Clauses)) return StmtError(); - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPParallelMaskedTaskLoopSimdDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPDistributeDirective( +StmtResult SemaOpenMP::ActOnOpenMPDistributeDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14258,21 +14295,21 @@ StmtResult Sema::ActOnOpenMPDistributeDirective( unsigned NestedLoopCount = checkOpenMPLoop(OMPD_distribute, getCollapseNumberExpr(Clauses), nullptr /*ordered not a clause on distribute*/, AStmt, - *this, *DSAStack, VarsWithImplicitDSA, B); + SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); auto *DistributeDirective = OMPDistributeDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, DSAStack->getMappedDirective()); return DistributeDirective; } -StmtResult Sema::ActOnOpenMPDistributeParallelForDirective( +StmtResult SemaOpenMP::ActOnOpenMPDistributeParallelForDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14302,21 +14339,21 @@ StmtResult Sema::ActOnOpenMPDistributeParallelForDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_distribute_parallel_for, getCollapseNumberExpr(Clauses), - nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack, + nullptr /*ordered not a clause on distribute*/, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPDistributeParallelForDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } -StmtResult Sema::ActOnOpenMPDistributeParallelForSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPDistributeParallelForSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14346,34 +14383,34 @@ StmtResult Sema::ActOnOpenMPDistributeParallelForSimdDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_distribute_parallel_for_simd, getCollapseNumberExpr(Clauses), - nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack, + nullptr /*ordered not a clause on distribute*/, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPDistributeParallelForSimdDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPDistributeSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPDistributeSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14402,34 +14439,34 @@ StmtResult Sema::ActOnOpenMPDistributeSimdDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_distribute_simd, getCollapseNumberExpr(Clauses), - nullptr /*ordered not a clause on distribute*/, CS, *this, - *DSAStack, VarsWithImplicitDSA, B); + nullptr /*ordered not a clause on distribute*/, CS, + SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); - return OMPDistributeSimdDirective::Create(Context, StartLoc, EndLoc, + SemaRef.setFunctionHasBranchProtectedScope(); + return OMPDistributeSimdDirective::Create(getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPTargetParallelForSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPTargetParallelForSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14459,33 +14496,33 @@ StmtResult Sema::ActOnOpenMPTargetParallelForSimdDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_target_parallel_for_simd, getCollapseNumberExpr(Clauses), - getOrderedNumberExpr(Clauses), CS, *this, *DSAStack, VarsWithImplicitDSA, - B); + getOrderedNumberExpr(Clauses), CS, SemaRef, *DSAStack, + VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp target parallel for simd loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPTargetParallelForSimdDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPTargetSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPTargetSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14514,34 +14551,34 @@ StmtResult Sema::ActOnOpenMPTargetSimdDirective( // nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_target_simd, getCollapseNumberExpr(Clauses), - getOrderedNumberExpr(Clauses), CS, *this, *DSAStack, + getOrderedNumberExpr(Clauses), CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp target simd loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); - return OMPTargetSimdDirective::Create(Context, StartLoc, EndLoc, + SemaRef.setFunctionHasBranchProtectedScope(); + return OMPTargetSimdDirective::Create(getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPTeamsDistributeDirective( +StmtResult SemaOpenMP::ActOnOpenMPTeamsDistributeDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14570,23 +14607,23 @@ StmtResult Sema::ActOnOpenMPTeamsDistributeDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop(OMPD_teams_distribute, getCollapseNumberExpr(Clauses), - nullptr /*ordered not a clause on distribute*/, CS, *this, - *DSAStack, VarsWithImplicitDSA, B); + nullptr /*ordered not a clause on distribute*/, CS, + SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp teams distribute loop exprs were not built"); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); DSAStack->setParentTeamsRegionLoc(StartLoc); return OMPTeamsDistributeDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPTeamsDistributeSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPTeamsDistributeSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14616,38 +14653,38 @@ StmtResult Sema::ActOnOpenMPTeamsDistributeSimdDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_teams_distribute_simd, getCollapseNumberExpr(Clauses), - nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack, + nullptr /*ordered not a clause on distribute*/, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp teams distribute simd loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); DSAStack->setParentTeamsRegionLoc(StartLoc); return OMPTeamsDistributeSimdDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPTeamsDistributeParallelForSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPTeamsDistributeParallelForSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14678,38 +14715,38 @@ StmtResult Sema::ActOnOpenMPTeamsDistributeParallelForSimdDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_teams_distribute_parallel_for_simd, getCollapseNumberExpr(Clauses), - nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack, + nullptr /*ordered not a clause on distribute*/, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); DSAStack->setParentTeamsRegionLoc(StartLoc); return OMPTeamsDistributeParallelForSimdDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPTeamsDistributeParallelForDirective( +StmtResult SemaOpenMP::ActOnOpenMPTeamsDistributeParallelForDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14740,28 +14777,27 @@ StmtResult Sema::ActOnOpenMPTeamsDistributeParallelForDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_teams_distribute_parallel_for, getCollapseNumberExpr(Clauses), - nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack, + nullptr /*ordered not a clause on distribute*/, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp for loop exprs were not built"); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); DSAStack->setParentTeamsRegionLoc(StartLoc); return OMPTeamsDistributeParallelForDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } -StmtResult Sema::ActOnOpenMPTargetTeamsDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); @@ -14783,7 +14819,7 @@ StmtResult Sema::ActOnOpenMPTargetTeamsDirective(ArrayRef Clauses, // longjmp() and throw() must not violate the entry/exit criteria. CS->getCapturedDecl()->setNothrow(); } - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); const OMPClause *BareClause = nullptr; bool HasThreadLimitAndNumTeamsClause = hasClauses(Clauses, OMPC_num_teams) && @@ -14798,11 +14834,11 @@ StmtResult Sema::ActOnOpenMPTargetTeamsDirective(ArrayRef Clauses, return StmtError(); } - return OMPTargetTeamsDirective::Create(Context, StartLoc, EndLoc, Clauses, - AStmt); + return OMPTargetTeamsDirective::Create(getASTContext(), StartLoc, EndLoc, + Clauses, AStmt); } -StmtResult Sema::ActOnOpenMPTargetTeamsDistributeDirective( +StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14832,20 +14868,20 @@ StmtResult Sema::ActOnOpenMPTargetTeamsDistributeDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_target_teams_distribute, getCollapseNumberExpr(Clauses), - nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack, + nullptr /*ordered not a clause on distribute*/, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp target teams distribute loop exprs were not built"); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPTargetTeamsDistributeDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForDirective( +StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeParallelForDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14875,32 +14911,32 @@ StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_target_teams_distribute_parallel_for, getCollapseNumberExpr(Clauses), - nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack, + nullptr /*ordered not a clause on distribute*/, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp target teams distribute parallel for loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPTargetTeamsDistributeParallelForDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } -StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14931,35 +14967,35 @@ StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective( unsigned NestedLoopCount = checkOpenMPLoop(OMPD_target_teams_distribute_parallel_for_simd, getCollapseNumberExpr(Clauses), - nullptr /*ordered not a clause on distribute*/, CS, *this, - *DSAStack, VarsWithImplicitDSA, B); + nullptr /*ordered not a clause on distribute*/, CS, + SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp target teams distribute parallel for simd loop exprs were not " "built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPTargetTeamsDistributeParallelForSimdDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -StmtResult Sema::ActOnOpenMPTargetTeamsDistributeSimdDirective( +StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { if (!AStmt) @@ -14989,34 +15025,34 @@ StmtResult Sema::ActOnOpenMPTargetTeamsDistributeSimdDirective( // define the nested loops number. unsigned NestedLoopCount = checkOpenMPLoop( OMPD_target_teams_distribute_simd, getCollapseNumberExpr(Clauses), - nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack, + nullptr /*ordered not a clause on distribute*/, CS, SemaRef, *DSAStack, VarsWithImplicitDSA, B); if (NestedLoopCount == 0) return StmtError(); - assert((CurContext->isDependentContext() || B.builtAll()) && + assert((SemaRef.CurContext->isDependentContext() || B.builtAll()) && "omp target teams distribute simd loop exprs were not built"); - if (!CurContext->isDependentContext()) { + if (!SemaRef.CurContext->isDependentContext()) { // Finalize the clauses that need pre-built expressions for CodeGen. for (OMPClause *C : Clauses) { if (auto *LC = dyn_cast(C)) if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), - B.NumIterations, *this, CurScope, - DSAStack)) + B.NumIterations, SemaRef, + SemaRef.getCurScope(), DSAStack)) return StmtError(); } } - if (checkSimdlenSafelenSpecified(*this, Clauses)) + if (checkSimdlenSafelenSpecified(SemaRef, Clauses)) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); return OMPTargetTeamsDistributeSimdDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -bool Sema::checkTransformableLoopNest( +bool SemaOpenMP::checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl &LoopHelpers, Stmt *&Body, @@ -15029,7 +15065,7 @@ bool Sema::checkTransformableLoopNest( Stmt *CurStmt) { VarsWithInheritedDSAType TmpDSA; unsigned SingleNumLoops = - checkOpenMPLoop(Kind, nullptr, nullptr, CurStmt, *this, *DSAStack, + checkOpenMPLoop(Kind, nullptr, nullptr, CurStmt, SemaRef, *DSAStack, TmpDSA, LoopHelpers[Cnt]); if (SingleNumLoops == 0) return true; @@ -15065,9 +15101,11 @@ bool Sema::checkTransformableLoopNest( return Result; } -StmtResult Sema::ActOnOpenMPTileDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); auto SizesClauses = OMPExecutableDirective::getClausesOfKind(Clauses); if (SizesClauses.empty()) { @@ -15091,7 +15129,7 @@ StmtResult Sema::ActOnOpenMPTileDirective(ArrayRef Clauses, return StmtError(); // Delay tiling to when template is completely instantiated. - if (CurContext->isDependentContext()) + if (SemaRef.CurContext->isDependentContext()) return OMPTileDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops, AStmt, nullptr, nullptr); @@ -15117,7 +15155,7 @@ StmtResult Sema::ActOnOpenMPTileDirective(ArrayRef Clauses, std::string FloorCntName = (Twine(".floor_") + llvm::utostr(I) + ".iv." + OrigVarName).str(); VarDecl *FloorCntDecl = - buildVarDecl(*this, {}, CntTy, FloorCntName, nullptr, OrigCntVar); + buildVarDecl(SemaRef, {}, CntTy, FloorCntName, nullptr, OrigCntVar); FloorIndVars[I] = FloorCntDecl; } @@ -15130,7 +15168,8 @@ StmtResult Sema::ActOnOpenMPTileDirective(ArrayRef Clauses, // used by the expressions to derive the original iteration variable's // value from the logical iteration number. auto *TileCntDecl = cast(IterVarRef->getDecl()); - TileCntDecl->setDeclName(&PP.getIdentifierTable().get(TileCntName)); + TileCntDecl->setDeclName( + &SemaRef.PP.getIdentifierTable().get(TileCntName)); TileIndVars[I] = TileCntDecl; } for (auto &P : OriginalInits[I]) { @@ -15159,17 +15198,18 @@ StmtResult Sema::ActOnOpenMPTileDirective(ArrayRef Clauses, auto *OrigCntVar = cast(LoopHelper.Counters[0]); QualType CntTy = OrigCntVar->getType(); Expr *DimTileSize = SizesClause->getSizesRefs()[I]; - Scope *CurScope = getCurScope(); + Scope *CurScope = SemaRef.getCurScope(); // Commonly used variables. - DeclRefExpr *TileIV = buildDeclRefExpr(*this, TileIndVars[I], CntTy, + DeclRefExpr *TileIV = buildDeclRefExpr(SemaRef, TileIndVars[I], CntTy, OrigCntVar->getExprLoc()); - DeclRefExpr *FloorIV = buildDeclRefExpr(*this, FloorIndVars[I], CntTy, + DeclRefExpr *FloorIV = buildDeclRefExpr(SemaRef, FloorIndVars[I], CntTy, OrigCntVar->getExprLoc()); // For init-statement: auto .tile.iv = .floor.iv - AddInitializerToDecl(TileIndVars[I], DefaultLvalueConversion(FloorIV).get(), - /*DirectInit=*/false); + SemaRef.AddInitializerToDecl(TileIndVars[I], + SemaRef.DefaultLvalueConversion(FloorIV).get(), + /*DirectInit=*/false); Decl *CounterDecl = TileIndVars[I]; StmtResult InitStmt = new (Context) DeclStmt(DeclGroupRef::Create(Context, &CounterDecl, 1), @@ -15179,28 +15219,29 @@ StmtResult Sema::ActOnOpenMPTileDirective(ArrayRef Clauses, // For cond-expression: .tile.iv < min(.floor.iv + DimTileSize, // NumIterations) - ExprResult EndOfTile = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), - BO_Add, FloorIV, DimTileSize); + ExprResult EndOfTile = SemaRef.BuildBinOp( + CurScope, LoopHelper.Cond->getExprLoc(), BO_Add, FloorIV, DimTileSize); if (!EndOfTile.isUsable()) return StmtError(); ExprResult IsPartialTile = - BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, - NumIterations, EndOfTile.get()); + SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, + NumIterations, EndOfTile.get()); if (!IsPartialTile.isUsable()) return StmtError(); - ExprResult MinTileAndIterSpace = ActOnConditionalOp( + ExprResult MinTileAndIterSpace = SemaRef.ActOnConditionalOp( LoopHelper.Cond->getBeginLoc(), LoopHelper.Cond->getEndLoc(), IsPartialTile.get(), NumIterations, EndOfTile.get()); if (!MinTileAndIterSpace.isUsable()) return StmtError(); - ExprResult CondExpr = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), - BO_LT, TileIV, MinTileAndIterSpace.get()); + ExprResult CondExpr = + SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, + TileIV, MinTileAndIterSpace.get()); if (!CondExpr.isUsable()) return StmtError(); // For incr-statement: ++.tile.iv - ExprResult IncrStmt = - BuildUnaryOp(CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, TileIV); + ExprResult IncrStmt = SemaRef.BuildUnaryOp( + CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, TileIV); if (!IncrStmt.isUsable()) return StmtError(); @@ -15235,16 +15276,16 @@ StmtResult Sema::ActOnOpenMPTileDirective(ArrayRef Clauses, DeclRefExpr *OrigCntVar = cast(LoopHelper.Counters[0]); QualType CntTy = OrigCntVar->getType(); Expr *DimTileSize = SizesClause->getSizesRefs()[I]; - Scope *CurScope = getCurScope(); + Scope *CurScope = SemaRef.getCurScope(); // Commonly used variables. - DeclRefExpr *FloorIV = buildDeclRefExpr(*this, FloorIndVars[I], CntTy, + DeclRefExpr *FloorIV = buildDeclRefExpr(SemaRef, FloorIndVars[I], CntTy, OrigCntVar->getExprLoc()); // For init-statement: auto .floor.iv = 0 - AddInitializerToDecl( + SemaRef.AddInitializerToDecl( FloorIndVars[I], - ActOnIntegerConstant(LoopHelper.Init->getExprLoc(), 0).get(), + SemaRef.ActOnIntegerConstant(LoopHelper.Init->getExprLoc(), 0).get(), /*DirectInit=*/false); Decl *CounterDecl = FloorIndVars[I]; StmtResult InitStmt = new (Context) @@ -15254,14 +15295,15 @@ StmtResult Sema::ActOnOpenMPTileDirective(ArrayRef Clauses, return StmtError(); // For cond-expression: .floor.iv < NumIterations - ExprResult CondExpr = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), - BO_LT, FloorIV, NumIterations); + ExprResult CondExpr = SemaRef.BuildBinOp( + CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, FloorIV, NumIterations); if (!CondExpr.isUsable()) return StmtError(); // For incr-statement: .floor.iv += DimTileSize - ExprResult IncrStmt = BuildBinOp(CurScope, LoopHelper.Inc->getExprLoc(), - BO_AddAssign, FloorIV, DimTileSize); + ExprResult IncrStmt = + SemaRef.BuildBinOp(CurScope, LoopHelper.Inc->getExprLoc(), BO_AddAssign, + FloorIV, DimTileSize); if (!IncrStmt.isUsable()) return StmtError(); @@ -15276,15 +15318,18 @@ StmtResult Sema::ActOnOpenMPTileDirective(ArrayRef Clauses, buildPreInits(Context, PreInits)); } -StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef Clauses, - Stmt *AStmt, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); + Scope *CurScope = SemaRef.getCurScope(); // Empty statement should only be possible if there already was an error. if (!AStmt) return StmtError(); - if (checkMutuallyExclusiveClauses(*this, Clauses, {OMPC_partial, OMPC_full})) + if (checkMutuallyExclusiveClauses(SemaRef, Clauses, + {OMPC_partial, OMPC_full})) return StmtError(); const OMPFullClause *FullClause = @@ -15307,7 +15352,7 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef Clauses, unsigned NumGeneratedLoops = PartialClause ? 1 : 0; // Delay unrolling to when template is completely instantiated. - if (CurContext->isDependentContext()) + if (SemaRef.CurContext->isDependentContext()) return OMPUnrollDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt, NumGeneratedLoops, nullptr, nullptr); @@ -15412,8 +15457,8 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef Clauses, assert(Factor > 0 && "Expected positive unroll factor"); auto MakeFactorExpr = [this, Factor, IVTy, FactorLoc]() { return IntegerLiteral::Create( - Context, llvm::APInt(Context.getIntWidth(IVTy), Factor), IVTy, - FactorLoc); + getASTContext(), llvm::APInt(getASTContext().getIntWidth(IVTy), Factor), + IVTy, FactorLoc); }; // Iteration variable SourceLocations. @@ -15430,30 +15475,31 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef Clauses, // Create the iteration variable for the unrolled loop. VarDecl *OuterIVDecl = - buildVarDecl(*this, {}, IVTy, OuterIVName, nullptr, OrigVar); + buildVarDecl(SemaRef, {}, IVTy, OuterIVName, nullptr, OrigVar); auto MakeOuterRef = [this, OuterIVDecl, IVTy, OrigVarLoc]() { - return buildDeclRefExpr(*this, OuterIVDecl, IVTy, OrigVarLoc); + return buildDeclRefExpr(SemaRef, OuterIVDecl, IVTy, OrigVarLoc); }; // Iteration variable for the inner loop: Reuse the iteration variable created // by checkOpenMPLoop. auto *InnerIVDecl = cast(IterationVarRef->getDecl()); - InnerIVDecl->setDeclName(&PP.getIdentifierTable().get(InnerIVName)); + InnerIVDecl->setDeclName(&SemaRef.PP.getIdentifierTable().get(InnerIVName)); auto MakeInnerRef = [this, InnerIVDecl, IVTy, OrigVarLoc]() { - return buildDeclRefExpr(*this, InnerIVDecl, IVTy, OrigVarLoc); + return buildDeclRefExpr(SemaRef, InnerIVDecl, IVTy, OrigVarLoc); }; // Make a copy of the NumIterations expression for each use: By the AST // constraints, every expression object in a DeclContext must be unique. - CaptureVars CopyTransformer(*this); + CaptureVars CopyTransformer(SemaRef); auto MakeNumIterations = [&CopyTransformer, &LoopHelper]() -> Expr * { return AssertSuccess( CopyTransformer.TransformExpr(LoopHelper.NumIterations)); }; // Inner For init-statement: auto .unroll_inner.iv = .unrolled.iv - ExprResult LValueConv = DefaultLvalueConversion(MakeOuterRef()); - AddInitializerToDecl(InnerIVDecl, LValueConv.get(), /*DirectInit=*/false); + ExprResult LValueConv = SemaRef.DefaultLvalueConversion(MakeOuterRef()); + SemaRef.AddInitializerToDecl(InnerIVDecl, LValueConv.get(), + /*DirectInit=*/false); StmtResult InnerInit = new (Context) DeclStmt(DeclGroupRef(InnerIVDecl), OrigVarLocBegin, OrigVarLocEnd); if (!InnerInit.isUsable()) @@ -15466,28 +15512,30 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef Clauses, // \endcode // This conjunction of two conditions allows ScalarEvolution to derive the // maximum trip count of the inner loop. - ExprResult EndOfTile = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), - BO_Add, MakeOuterRef(), MakeFactorExpr()); + ExprResult EndOfTile = + SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_Add, + MakeOuterRef(), MakeFactorExpr()); if (!EndOfTile.isUsable()) return StmtError(); - ExprResult InnerCond1 = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), - BO_LT, MakeInnerRef(), EndOfTile.get()); + ExprResult InnerCond1 = + SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, + MakeInnerRef(), EndOfTile.get()); if (!InnerCond1.isUsable()) return StmtError(); ExprResult InnerCond2 = - BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, MakeInnerRef(), - MakeNumIterations()); + SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, + MakeInnerRef(), MakeNumIterations()); if (!InnerCond2.isUsable()) return StmtError(); ExprResult InnerCond = - BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LAnd, - InnerCond1.get(), InnerCond2.get()); + SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LAnd, + InnerCond1.get(), InnerCond2.get()); if (!InnerCond.isUsable()) return StmtError(); // Inner For incr-statement: ++.unroll_inner.iv - ExprResult InnerIncr = BuildUnaryOp(CurScope, LoopHelper.Inc->getExprLoc(), - UO_PreInc, MakeInnerRef()); + ExprResult InnerIncr = SemaRef.BuildUnaryOp( + CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, MakeInnerRef()); if (!InnerIncr.isUsable()) return StmtError(); @@ -15496,7 +15544,7 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef Clauses, InnerBodyStmts.append(LoopHelper.Updates.begin(), LoopHelper.Updates.end()); InnerBodyStmts.push_back(Body); CompoundStmt *InnerBody = - CompoundStmt::Create(Context, InnerBodyStmts, FPOptionsOverride(), + CompoundStmt::Create(getASTContext(), InnerBodyStmts, FPOptionsOverride(), Body->getBeginLoc(), Body->getEndLoc()); ForStmt *InnerFor = new (Context) ForStmt(Context, InnerInit.get(), InnerCond.get(), nullptr, @@ -15518,12 +15566,13 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef Clauses, LoopHintAttr *UnrollHintAttr = LoopHintAttr::CreateImplicit(Context, LoopHintAttr::UnrollCount, LoopHintAttr::Numeric, MakeFactorExpr()); - AttributedStmt *InnerUnrolled = - AttributedStmt::Create(Context, StartLoc, {UnrollHintAttr}, InnerFor); + AttributedStmt *InnerUnrolled = AttributedStmt::Create( + getASTContext(), StartLoc, {UnrollHintAttr}, InnerFor); // Outer For init-statement: auto .unrolled.iv = 0 - AddInitializerToDecl( - OuterIVDecl, ActOnIntegerConstant(LoopHelper.Init->getExprLoc(), 0).get(), + SemaRef.AddInitializerToDecl( + OuterIVDecl, + SemaRef.ActOnIntegerConstant(LoopHelper.Init->getExprLoc(), 0).get(), /*DirectInit=*/false); StmtResult OuterInit = new (Context) DeclStmt(DeclGroupRef(OuterIVDecl), OrigVarLocBegin, OrigVarLocEnd); @@ -15532,15 +15581,15 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef Clauses, // Outer For cond-expression: .unrolled.iv < NumIterations ExprResult OuterConde = - BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, MakeOuterRef(), - MakeNumIterations()); + SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, + MakeOuterRef(), MakeNumIterations()); if (!OuterConde.isUsable()) return StmtError(); // Outer For incr-statement: .unrolled.iv += Factor ExprResult OuterIncr = - BuildBinOp(CurScope, LoopHelper.Inc->getExprLoc(), BO_AddAssign, - MakeOuterRef(), MakeFactorExpr()); + SemaRef.BuildBinOp(CurScope, LoopHelper.Inc->getExprLoc(), BO_AddAssign, + MakeOuterRef(), MakeFactorExpr()); if (!OuterIncr.isUsable()) return StmtError(); @@ -15555,10 +15604,11 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef Clauses, buildPreInits(Context, PreInits)); } -OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, + Expr *Expr, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { OMPClause *Res = nullptr; switch (Kind) { case OMPC_final: @@ -16646,19 +16696,17 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( return CaptureRegion; } -OMPClause *Sema::ActOnOpenMPIfClause(OpenMPDirectiveKind NameModifier, - Expr *Condition, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation NameModifierLoc, - SourceLocation ColonLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPIfClause( + OpenMPDirectiveKind NameModifier, Expr *Condition, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation NameModifierLoc, + SourceLocation ColonLoc, SourceLocation EndLoc) { Expr *ValExpr = Condition; Stmt *HelperValStmt = nullptr; OpenMPDirectiveKind CaptureRegion = OMPD_unknown; if (!Condition->isValueDependent() && !Condition->isTypeDependent() && !Condition->isInstantiationDependent() && !Condition->containsUnexpandedParameterPack()) { - ExprResult Val = CheckBooleanCondition(StartLoc, Condition); + ExprResult Val = SemaRef.CheckBooleanCondition(StartLoc, Condition); if (Val.isInvalid()) return nullptr; @@ -16666,57 +16714,60 @@ OMPClause *Sema::ActOnOpenMPIfClause(OpenMPDirectiveKind NameModifier, OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); CaptureRegion = getOpenMPCaptureRegionForClause( - DKind, OMPC_if, LangOpts.OpenMP, NameModifier); - if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { - ValExpr = MakeFullExpr(ValExpr).get(); + DKind, OMPC_if, getLangOpts().OpenMP, NameModifier); + if (CaptureRegion != OMPD_unknown && + !SemaRef.CurContext->isDependentContext()) { + ValExpr = SemaRef.MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; - ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); - HelperValStmt = buildPreInits(Context, Captures); + ValExpr = tryBuildCapture(SemaRef, ValExpr, Captures).get(); + HelperValStmt = buildPreInits(getASTContext(), Captures); } } - return new (Context) + return new (getASTContext()) OMPIfClause(NameModifier, ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, NameModifierLoc, ColonLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPFinalClause(Expr *Condition, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPFinalClause(Expr *Condition, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { Expr *ValExpr = Condition; Stmt *HelperValStmt = nullptr; OpenMPDirectiveKind CaptureRegion = OMPD_unknown; if (!Condition->isValueDependent() && !Condition->isTypeDependent() && !Condition->isInstantiationDependent() && !Condition->containsUnexpandedParameterPack()) { - ExprResult Val = CheckBooleanCondition(StartLoc, Condition); + ExprResult Val = SemaRef.CheckBooleanCondition(StartLoc, Condition); if (Val.isInvalid()) return nullptr; - ValExpr = MakeFullExpr(Val.get()).get(); + ValExpr = SemaRef.MakeFullExpr(Val.get()).get(); OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); - CaptureRegion = - getOpenMPCaptureRegionForClause(DKind, OMPC_final, LangOpts.OpenMP); - if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { - ValExpr = MakeFullExpr(ValExpr).get(); + CaptureRegion = getOpenMPCaptureRegionForClause(DKind, OMPC_final, + getLangOpts().OpenMP); + if (CaptureRegion != OMPD_unknown && + !SemaRef.CurContext->isDependentContext()) { + ValExpr = SemaRef.MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; - ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); - HelperValStmt = buildPreInits(Context, Captures); + ValExpr = tryBuildCapture(SemaRef, ValExpr, Captures).get(); + HelperValStmt = buildPreInits(getASTContext(), Captures); } } - return new (Context) OMPFinalClause(ValExpr, HelperValStmt, CaptureRegion, - StartLoc, LParenLoc, EndLoc); + return new (getASTContext()) OMPFinalClause( + ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, EndLoc); } -ExprResult Sema::PerformOpenMPImplicitIntegerConversion(SourceLocation Loc, - Expr *Op) { +ExprResult +SemaOpenMP::PerformOpenMPImplicitIntegerConversion(SourceLocation Loc, + Expr *Op) { if (!Op) return ExprError(); - class IntConvertDiagnoser : public ICEConvertDiagnoser { + class IntConvertDiagnoser : public Sema::ICEConvertDiagnoser { public: IntConvertDiagnoser() : ICEConvertDiagnoser(/*AllowScopedEnumerations*/ false, false, true) {} @@ -16752,7 +16803,7 @@ ExprResult Sema::PerformOpenMPImplicitIntegerConversion(SourceLocation Loc, llvm_unreachable("conversion functions are permitted"); } } ConvertDiagnoser; - return PerformContextualImplicitConversion(Loc, Op, ConvertDiagnoser); + return SemaRef.PerformContextualImplicitConversion(Loc, Op, ConvertDiagnoser); } static bool @@ -16765,7 +16816,7 @@ isNonNegativeIntegerValue(Expr *&ValExpr, Sema &SemaRef, OpenMPClauseKind CKind, !ValExpr->isInstantiationDependent()) { SourceLocation Loc = ValExpr->getExprLoc(); ExprResult Value = - SemaRef.PerformOpenMPImplicitIntegerConversion(Loc, ValExpr); + SemaRef.OpenMP().PerformOpenMPImplicitIntegerConversion(Loc, ValExpr); if (Value.isInvalid()) return false; @@ -16797,37 +16848,37 @@ isNonNegativeIntegerValue(Expr *&ValExpr, Sema &SemaRef, OpenMPClauseKind CKind, return true; } -OMPClause *Sema::ActOnOpenMPNumThreadsClause(Expr *NumThreads, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPNumThreadsClause(Expr *NumThreads, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { Expr *ValExpr = NumThreads; Stmt *HelperValStmt = nullptr; // OpenMP [2.5, Restrictions] // The num_threads expression must evaluate to a positive integer value. - if (!isNonNegativeIntegerValue(ValExpr, *this, OMPC_num_threads, + if (!isNonNegativeIntegerValue(ValExpr, SemaRef, OMPC_num_threads, /*StrictlyPositive=*/true)) return nullptr; OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); - OpenMPDirectiveKind CaptureRegion = - getOpenMPCaptureRegionForClause(DKind, OMPC_num_threads, LangOpts.OpenMP); - if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { - ValExpr = MakeFullExpr(ValExpr).get(); + OpenMPDirectiveKind CaptureRegion = getOpenMPCaptureRegionForClause( + DKind, OMPC_num_threads, getLangOpts().OpenMP); + if (CaptureRegion != OMPD_unknown && + !SemaRef.CurContext->isDependentContext()) { + ValExpr = SemaRef.MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; - ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); - HelperValStmt = buildPreInits(Context, Captures); + ValExpr = tryBuildCapture(SemaRef, ValExpr, Captures).get(); + HelperValStmt = buildPreInits(getASTContext(), Captures); } - return new (Context) OMPNumThreadsClause( + return new (getASTContext()) OMPNumThreadsClause( ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, EndLoc); } -ExprResult Sema::VerifyPositiveIntegerConstantInClause(Expr *E, - OpenMPClauseKind CKind, - bool StrictlyPositive, - bool SuppressExprDiags) { +ExprResult SemaOpenMP::VerifyPositiveIntegerConstantInClause( + Expr *E, OpenMPClauseKind CKind, bool StrictlyPositive, + bool SuppressExprDiags) { if (!E) return ExprError(); if (E->isValueDependent() || E->isTypeDependent() || @@ -16841,14 +16892,16 @@ ExprResult Sema::VerifyPositiveIntegerConstantInClause(Expr *E, // expression. struct SuppressedDiagnoser : public Sema::VerifyICEDiagnoser { SuppressedDiagnoser() : VerifyICEDiagnoser(/*Suppress=*/true) {} - Sema::SemaDiagnosticBuilder diagnoseNotICE(Sema &S, - SourceLocation Loc) override { + SemaBase::SemaDiagnosticBuilder + diagnoseNotICE(Sema &S, SourceLocation Loc) override { llvm_unreachable("Diagnostic suppressed"); } } Diagnoser; - ICE = VerifyIntegerConstantExpression(E, &Result, Diagnoser, AllowFold); + ICE = SemaRef.VerifyIntegerConstantExpression(E, &Result, Diagnoser, + Sema::AllowFold); } else { - ICE = VerifyIntegerConstantExpression(E, &Result, /*FIXME*/ AllowFold); + ICE = SemaRef.VerifyIntegerConstantExpression(E, &Result, + /*FIXME*/ Sema::AllowFold); } if (ICE.isInvalid()) return ExprError(); @@ -16872,29 +16925,31 @@ ExprResult Sema::VerifyPositiveIntegerConstantInClause(Expr *E, return ICE; } -OMPClause *Sema::ActOnOpenMPSafelenClause(Expr *Len, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPSafelenClause(Expr *Len, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { // OpenMP [2.8.1, simd construct, Description] // The parameter of the safelen clause must be a constant // positive integer expression. ExprResult Safelen = VerifyPositiveIntegerConstantInClause(Len, OMPC_safelen); if (Safelen.isInvalid()) return nullptr; - return new (Context) + return new (getASTContext()) OMPSafelenClause(Safelen.get(), StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPSimdlenClause(Expr *Len, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPSimdlenClause(Expr *Len, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { // OpenMP [2.8.1, simd construct, Description] // The parameter of the simdlen clause must be a constant // positive integer expression. ExprResult Simdlen = VerifyPositiveIntegerConstantInClause(Len, OMPC_simdlen); if (Simdlen.isInvalid()) return nullptr; - return new (Context) + return new (getASTContext()) OMPSimdlenClause(Simdlen.get(), StartLoc, LParenLoc, EndLoc); } @@ -16954,31 +17009,32 @@ static bool findOMPAllocatorHandleT(Sema &S, SourceLocation Loc, return true; } -OMPClause *Sema::ActOnOpenMPAllocatorClause(Expr *A, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPAllocatorClause(Expr *A, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { // OpenMP [2.11.3, allocate Directive, Description] // allocator is an expression of omp_allocator_handle_t type. - if (!findOMPAllocatorHandleT(*this, A->getExprLoc(), DSAStack)) + if (!findOMPAllocatorHandleT(SemaRef, A->getExprLoc(), DSAStack)) return nullptr; - ExprResult Allocator = DefaultLvalueConversion(A); + ExprResult Allocator = SemaRef.DefaultLvalueConversion(A); if (Allocator.isInvalid()) return nullptr; - Allocator = PerformImplicitConversion(Allocator.get(), - DSAStack->getOMPAllocatorHandleT(), - Sema::AA_Initializing, - /*AllowExplicit=*/true); + Allocator = SemaRef.PerformImplicitConversion( + Allocator.get(), DSAStack->getOMPAllocatorHandleT(), + Sema::AA_Initializing, + /*AllowExplicit=*/true); if (Allocator.isInvalid()) return nullptr; - return new (Context) + return new (getASTContext()) OMPAllocatorClause(Allocator.get(), StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPCollapseClause(Expr *NumForLoops, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPCollapseClause(Expr *NumForLoops, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { // OpenMP [2.7.1, loop construct, Description] // OpenMP [2.8.1, simd construct, Description] // OpenMP [2.9.6, distribute construct, Description] @@ -16988,14 +17044,14 @@ OMPClause *Sema::ActOnOpenMPCollapseClause(Expr *NumForLoops, VerifyPositiveIntegerConstantInClause(NumForLoops, OMPC_collapse); if (NumForLoopsResult.isInvalid()) return nullptr; - return new (Context) + return new (getASTContext()) OMPCollapseClause(NumForLoopsResult.get(), StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPOrderedClause(SourceLocation StartLoc, - SourceLocation EndLoc, - SourceLocation LParenLoc, - Expr *NumForLoops) { +OMPClause *SemaOpenMP::ActOnOpenMPOrderedClause(SourceLocation StartLoc, + SourceLocation EndLoc, + SourceLocation LParenLoc, + Expr *NumForLoops) { // OpenMP [2.7.1, loop construct, Description] // OpenMP [2.8.1, simd construct, Description] // OpenMP [2.9.6, distribute construct, Description] @@ -17010,14 +17066,15 @@ OMPClause *Sema::ActOnOpenMPOrderedClause(SourceLocation StartLoc, } else { NumForLoops = nullptr; } - auto *Clause = OMPOrderedClause::Create( - Context, NumForLoops, NumForLoops ? DSAStack->getAssociatedLoops() : 0, - StartLoc, LParenLoc, EndLoc); + auto *Clause = + OMPOrderedClause::Create(getASTContext(), NumForLoops, + NumForLoops ? DSAStack->getAssociatedLoops() : 0, + StartLoc, LParenLoc, EndLoc); DSAStack->setOrderedRegion(/*IsOrdered=*/true, NumForLoops, Clause); return Clause; } -OMPClause *Sema::ActOnOpenMPSimpleClause( +OMPClause *SemaOpenMP::ActOnOpenMPSimpleClause( OpenMPClauseKind Kind, unsigned Argument, SourceLocation ArgumentLoc, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { OMPClause *Res = nullptr; @@ -17159,11 +17216,11 @@ getListOfPossibleValues(OpenMPClauseKind K, unsigned First, unsigned Last, return std::string(Out.str()); } -OMPClause *Sema::ActOnOpenMPDefaultClause(DefaultKind Kind, - SourceLocation KindKwLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPDefaultClause(DefaultKind Kind, + SourceLocation KindKwLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { if (Kind == OMP_DEFAULT_unknown) { Diag(KindKwLoc, diag::err_omp_unexpected_clause_value) << getListOfPossibleValues(OMPC_default, /*First=*/0, @@ -17189,39 +17246,39 @@ OMPClause *Sema::ActOnOpenMPDefaultClause(DefaultKind Kind, llvm_unreachable("DSA unexpected in OpenMP default clause"); } - return new (Context) + return new (getASTContext()) OMPDefaultClause(Kind, KindKwLoc, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPProcBindClause(ProcBindKind Kind, - SourceLocation KindKwLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPProcBindClause(ProcBindKind Kind, + SourceLocation KindKwLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { if (Kind == OMP_PROC_BIND_unknown) { Diag(KindKwLoc, diag::err_omp_unexpected_clause_value) << getListOfPossibleValues(OMPC_proc_bind, /*First=*/unsigned(OMP_PROC_BIND_master), /*Last=*/ - unsigned(LangOpts.OpenMP > 50 + unsigned(getLangOpts().OpenMP > 50 ? OMP_PROC_BIND_primary : OMP_PROC_BIND_spread) + 1) << getOpenMPClauseName(OMPC_proc_bind); return nullptr; } - if (Kind == OMP_PROC_BIND_primary && LangOpts.OpenMP < 51) + if (Kind == OMP_PROC_BIND_primary && getLangOpts().OpenMP < 51) Diag(KindKwLoc, diag::err_omp_unexpected_clause_value) << getListOfPossibleValues(OMPC_proc_bind, /*First=*/unsigned(OMP_PROC_BIND_master), /*Last=*/ unsigned(OMP_PROC_BIND_spread) + 1) << getOpenMPClauseName(OMPC_proc_bind); - return new (Context) + return new (getASTContext()) OMPProcBindClause(Kind, KindKwLoc, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPAtomicDefaultMemOrderClause( +OMPClause *SemaOpenMP::ActOnOpenMPAtomicDefaultMemOrderClause( OpenMPAtomicDefaultMemOrderClauseKind Kind, SourceLocation KindKwLoc, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { if (Kind == OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown) { @@ -17232,15 +17289,15 @@ OMPClause *Sema::ActOnOpenMPAtomicDefaultMemOrderClause( << getOpenMPClauseName(OMPC_atomic_default_mem_order); return nullptr; } - return new (Context) OMPAtomicDefaultMemOrderClause(Kind, KindKwLoc, StartLoc, - LParenLoc, EndLoc); + return new (getASTContext()) OMPAtomicDefaultMemOrderClause( + Kind, KindKwLoc, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPAtClause(OpenMPAtClauseKind Kind, - SourceLocation KindKwLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPAtClause(OpenMPAtClauseKind Kind, + SourceLocation KindKwLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { if (Kind == OMPC_AT_unknown) { Diag(KindKwLoc, diag::err_omp_unexpected_clause_value) << getListOfPossibleValues(OMPC_at, /*First=*/0, @@ -17248,15 +17305,15 @@ OMPClause *Sema::ActOnOpenMPAtClause(OpenMPAtClauseKind Kind, << getOpenMPClauseName(OMPC_at); return nullptr; } - return new (Context) + return new (getASTContext()) OMPAtClause(Kind, KindKwLoc, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPSeverityClause(OpenMPSeverityClauseKind Kind, - SourceLocation KindKwLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPSeverityClause(OpenMPSeverityClauseKind Kind, + SourceLocation KindKwLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { if (Kind == OMPC_SEVERITY_unknown) { Diag(KindKwLoc, diag::err_omp_unexpected_clause_value) << getListOfPossibleValues(OMPC_severity, /*First=*/0, @@ -17264,28 +17321,30 @@ OMPClause *Sema::ActOnOpenMPSeverityClause(OpenMPSeverityClauseKind Kind, << getOpenMPClauseName(OMPC_severity); return nullptr; } - return new (Context) + return new (getASTContext()) OMPSeverityClause(Kind, KindKwLoc, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPMessageClause(Expr *ME, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPMessageClause(Expr *ME, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { assert(ME && "NULL expr in Message clause"); if (!isa(ME)) { Diag(ME->getBeginLoc(), diag::warn_clause_expected_string) << getOpenMPClauseName(OMPC_message); return nullptr; } - return new (Context) OMPMessageClause(ME, StartLoc, LParenLoc, EndLoc); + return new (getASTContext()) + OMPMessageClause(ME, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPOrderClause( +OMPClause *SemaOpenMP::ActOnOpenMPOrderClause( OpenMPOrderClauseModifier Modifier, OpenMPOrderClauseKind Kind, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation MLoc, SourceLocation KindLoc, SourceLocation EndLoc) { if (Kind != OMPC_ORDER_concurrent || - (LangOpts.OpenMP < 51 && MLoc.isValid())) { + (getLangOpts().OpenMP < 51 && MLoc.isValid())) { // Kind should be concurrent, // Modifiers introduced in OpenMP 5.1 static_assert(OMPC_ORDER_unknown > 0, @@ -17298,7 +17357,7 @@ OMPClause *Sema::ActOnOpenMPOrderClause( << getOpenMPClauseName(OMPC_order); return nullptr; } - if (LangOpts.OpenMP >= 51) { + if (getLangOpts().OpenMP >= 51) { if (Modifier == OMPC_ORDER_MODIFIER_unknown && MLoc.isValid()) { Diag(MLoc, diag::err_omp_unexpected_clause_value) << getListOfPossibleValues(OMPC_order, @@ -17315,21 +17374,21 @@ OMPClause *Sema::ActOnOpenMPOrderClause( } } } - return new (Context) OMPOrderClause(Kind, KindLoc, StartLoc, LParenLoc, - EndLoc, Modifier, MLoc); + return new (getASTContext()) OMPOrderClause( + Kind, KindLoc, StartLoc, LParenLoc, EndLoc, Modifier, MLoc); } -OMPClause *Sema::ActOnOpenMPUpdateClause(OpenMPDependClauseKind Kind, - SourceLocation KindKwLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPUpdateClause(OpenMPDependClauseKind Kind, + SourceLocation KindKwLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { if (Kind == OMPC_DEPEND_unknown || Kind == OMPC_DEPEND_source || Kind == OMPC_DEPEND_sink || Kind == OMPC_DEPEND_depobj) { SmallVector Except = { OMPC_DEPEND_source, OMPC_DEPEND_sink, OMPC_DEPEND_depobj, OMPC_DEPEND_outallmemory, OMPC_DEPEND_inoutallmemory}; - if (LangOpts.OpenMP < 51) + if (getLangOpts().OpenMP < 51) Except.push_back(OMPC_DEPEND_inoutset); Diag(KindKwLoc, diag::err_omp_unexpected_clause_value) << getListOfPossibleValues(OMPC_depend, /*First=*/0, @@ -17337,14 +17396,14 @@ OMPClause *Sema::ActOnOpenMPUpdateClause(OpenMPDependClauseKind Kind, << getOpenMPClauseName(OMPC_update); return nullptr; } - return OMPUpdateClause::Create(Context, StartLoc, LParenLoc, KindKwLoc, Kind, - EndLoc); + return OMPUpdateClause::Create(getASTContext(), StartLoc, LParenLoc, + KindKwLoc, Kind, EndLoc); } -OMPClause *Sema::ActOnOpenMPSizesClause(ArrayRef SizeExprs, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPSizesClause(ArrayRef SizeExprs, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { for (Expr *SizeExpr : SizeExprs) { ExprResult NumForLoopsResult = VerifyPositiveIntegerConstantInClause( SizeExpr, OMPC_sizes, /*StrictlyPositive=*/true); @@ -17353,19 +17412,19 @@ OMPClause *Sema::ActOnOpenMPSizesClause(ArrayRef SizeExprs, } DSAStack->setAssociatedLoops(SizeExprs.size()); - return OMPSizesClause::Create(Context, StartLoc, LParenLoc, EndLoc, + return OMPSizesClause::Create(getASTContext(), StartLoc, LParenLoc, EndLoc, SizeExprs); } -OMPClause *Sema::ActOnOpenMPFullClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return OMPFullClause::Create(Context, StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPFullClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return OMPFullClause::Create(getASTContext(), StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPPartialClause(Expr *FactorExpr, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPPartialClause(Expr *FactorExpr, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { if (FactorExpr) { // If an argument is specified, it must be a constant (or an unevaluated // template expression). @@ -17376,22 +17435,22 @@ OMPClause *Sema::ActOnOpenMPPartialClause(Expr *FactorExpr, FactorExpr = FactorResult.get(); } - return OMPPartialClause::Create(Context, StartLoc, LParenLoc, EndLoc, + return OMPPartialClause::Create(getASTContext(), StartLoc, LParenLoc, EndLoc, FactorExpr); } -OMPClause *Sema::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { ExprResult AlignVal; AlignVal = VerifyPositiveIntegerConstantInClause(A, OMPC_align); if (AlignVal.isInvalid()) return nullptr; - return OMPAlignClause::Create(Context, AlignVal.get(), StartLoc, LParenLoc, - EndLoc); + return OMPAlignClause::Create(getASTContext(), AlignVal.get(), StartLoc, + LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause( +OMPClause *SemaOpenMP::ActOnOpenMPSingleExprWithArgClause( OpenMPClauseKind Kind, ArrayRef Argument, Expr *Expr, SourceLocation StartLoc, SourceLocation LParenLoc, ArrayRef ArgumentLoc, SourceLocation DelimLoc, @@ -17559,13 +17618,13 @@ static bool checkScheduleModifiers(Sema &S, OpenMPScheduleClauseModifier M1, return false; } -OMPClause *Sema::ActOnOpenMPScheduleClause( +OMPClause *SemaOpenMP::ActOnOpenMPScheduleClause( OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, OpenMPScheduleClauseKind Kind, Expr *ChunkSize, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation M1Loc, SourceLocation M2Loc, SourceLocation KindLoc, SourceLocation CommaLoc, SourceLocation EndLoc) { - if (checkScheduleModifiers(*this, M1, M2, M1Loc, M2Loc) || - checkScheduleModifiers(*this, M2, M1, M2Loc, M1Loc)) + if (checkScheduleModifiers(SemaRef, M1, M2, M1Loc, M2Loc) || + checkScheduleModifiers(SemaRef, M2, M1, M2Loc, M1Loc)) return nullptr; // OpenMP, 2.7.1, Loop Construct, Restrictions // Either the monotonic modifier or the nonmonotonic modifier can be specified @@ -17599,7 +17658,7 @@ OMPClause *Sema::ActOnOpenMPScheduleClause( // The nonmonotonic modifier can only be specified with schedule(dynamic) or // schedule(guided). // OpenMP 5.0 does not have this restriction. - if (LangOpts.OpenMP < 50 && + if (getLangOpts().OpenMP < 50 && (M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic) && Kind != OMPC_SCHEDULE_dynamic && Kind != OMPC_SCHEDULE_guided) { @@ -17625,7 +17684,7 @@ OMPClause *Sema::ActOnOpenMPScheduleClause( // chunk_size must be a loop invariant integer expression with a positive // value. if (std::optional Result = - ValExpr->getIntegerConstantExpr(Context)) { + ValExpr->getIntegerConstantExpr(getASTContext())) { if (Result->isSigned() && !Result->isStrictlyPositive()) { Diag(ChunkSizeLoc, diag::err_omp_negative_expression_in_clause) << "schedule" << 1 << ChunkSize->getSourceRange(); @@ -17633,24 +17692,24 @@ OMPClause *Sema::ActOnOpenMPScheduleClause( } } else if (getOpenMPCaptureRegionForClause( DSAStack->getCurrentDirective(), OMPC_schedule, - LangOpts.OpenMP) != OMPD_unknown && - !CurContext->isDependentContext()) { - ValExpr = MakeFullExpr(ValExpr).get(); + getLangOpts().OpenMP) != OMPD_unknown && + !SemaRef.CurContext->isDependentContext()) { + ValExpr = SemaRef.MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; - ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); - HelperValStmt = buildPreInits(Context, Captures); + ValExpr = tryBuildCapture(SemaRef, ValExpr, Captures).get(); + HelperValStmt = buildPreInits(getASTContext(), Captures); } } } - return new (Context) + return new (getASTContext()) OMPScheduleClause(StartLoc, LParenLoc, KindLoc, CommaLoc, EndLoc, Kind, ValExpr, HelperValStmt, M1, M1Loc, M2, M2Loc); } -OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind, - SourceLocation StartLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPClause(OpenMPClauseKind Kind, + SourceLocation StartLoc, + SourceLocation EndLoc) { OMPClause *Res = nullptr; switch (Kind) { case OMPC_ordered: @@ -17804,134 +17863,138 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind, return Res; } -OMPClause *Sema::ActOnOpenMPNowaitClause(SourceLocation StartLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPNowaitClause(SourceLocation StartLoc, + SourceLocation EndLoc) { DSAStack->setNowaitRegion(); - return new (Context) OMPNowaitClause(StartLoc, EndLoc); + return new (getASTContext()) OMPNowaitClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPUntiedClause(SourceLocation StartLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPUntiedClause(SourceLocation StartLoc, + SourceLocation EndLoc) { DSAStack->setUntiedRegion(); - return new (Context) OMPUntiedClause(StartLoc, EndLoc); + return new (getASTContext()) OMPUntiedClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPMergeableClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPMergeableClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPMergeableClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPMergeableClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPReadClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPReadClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPReadClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPReadClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPWriteClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPWriteClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPWriteClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPWriteClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPUpdateClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return OMPUpdateClause::Create(Context, StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPUpdateClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return OMPUpdateClause::Create(getASTContext(), StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPCaptureClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPCaptureClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPCaptureClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPCaptureClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPCompareClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPCompareClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPCompareClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPCompareClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPFailClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPFailClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPFailClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPFailClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPFailClause( - OpenMPClauseKind Parameter, SourceLocation KindLoc, - SourceLocation StartLoc, SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPFailClause(OpenMPClauseKind Parameter, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { if (!checkFailClauseParameter(Parameter)) { Diag(KindLoc, diag::err_omp_atomic_fail_wrong_or_no_clauses); return nullptr; } - return new (Context) + return new (getASTContext()) OMPFailClause(Parameter, KindLoc, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPSeqCstClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPSeqCstClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPSeqCstClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPSeqCstClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPAcqRelClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPAcqRelClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPAcqRelClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPAcqRelClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPAcquireClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPAcquireClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPAcquireClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPAcquireClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPReleaseClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPReleaseClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPReleaseClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPReleaseClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPRelaxedClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPRelaxedClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPRelaxedClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPRelaxedClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPWeakClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPWeakClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPWeakClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPWeakClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPThreadsClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPThreadsClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPThreadsClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPThreadsClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPSIMDClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPSIMDClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPSIMDClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPSIMDClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPNogroupClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPNogroupClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPNogroupClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPNogroupClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPUnifiedAddressClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPUnifiedAddressClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPUnifiedAddressClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPUnifiedAddressClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPUnifiedSharedMemoryClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPUnifiedSharedMemoryClause(StartLoc, EndLoc); +OMPClause * +SemaOpenMP::ActOnOpenMPUnifiedSharedMemoryClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPUnifiedSharedMemoryClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPReverseOffloadClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPReverseOffloadClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPReverseOffloadClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPReverseOffloadClause(StartLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPDynamicAllocatorsClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPDynamicAllocatorsClause(StartLoc, EndLoc); +OMPClause * +SemaOpenMP::ActOnOpenMPDynamicAllocatorsClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPDynamicAllocatorsClause(StartLoc, EndLoc); } -StmtResult Sema::ActOnOpenMPInteropDirective(ArrayRef Clauses, - SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult +SemaOpenMP::ActOnOpenMPInteropDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc) { // OpenMP 5.1 [2.15.1, interop Construct, Restrictions] // At least one action-clause must appear on a directive. @@ -17981,13 +18044,13 @@ StmtResult Sema::ActOnOpenMPInteropDirective(ArrayRef Clauses, if (ClauseKind == OMPC_init) { auto *E = cast(C)->getInteropVar(); - DeclResult = getPrivateItem(*this, E, ELoc, ERange); + DeclResult = getPrivateItem(SemaRef, E, ELoc, ERange); } else if (ClauseKind == OMPC_use) { auto *E = cast(C)->getInteropVar(); - DeclResult = getPrivateItem(*this, E, ELoc, ERange); + DeclResult = getPrivateItem(SemaRef, E, ELoc, ERange); } else if (ClauseKind == OMPC_destroy) { auto *E = cast(C)->getInteropVar(); - DeclResult = getPrivateItem(*this, E, ELoc, ERange); + DeclResult = getPrivateItem(SemaRef, E, ELoc, ERange); } if (DeclResult.first) { @@ -17999,7 +18062,8 @@ StmtResult Sema::ActOnOpenMPInteropDirective(ArrayRef Clauses, } } - return OMPInteropDirective::Create(Context, StartLoc, EndLoc, Clauses); + return OMPInteropDirective::Create(getASTContext(), StartLoc, EndLoc, + Clauses); } static bool isValidInteropVariable(Sema &SemaRef, Expr *InteropVarExpr, @@ -18059,12 +18123,11 @@ static bool isValidInteropVariable(Sema &SemaRef, Expr *InteropVarExpr, return true; } -OMPClause * -Sema::ActOnOpenMPInitClause(Expr *InteropVar, OMPInteropInfo &InteropInfo, - SourceLocation StartLoc, SourceLocation LParenLoc, - SourceLocation VarLoc, SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPInitClause( + Expr *InteropVar, OMPInteropInfo &InteropInfo, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation VarLoc, SourceLocation EndLoc) { - if (!isValidInteropVariable(*this, InteropVar, VarLoc, OMPC_init)) + if (!isValidInteropVariable(SemaRef, InteropVar, VarLoc, OMPC_init)) return nullptr; // Check prefer_type values. These foreign-runtime-id values are either @@ -18073,7 +18136,7 @@ Sema::ActOnOpenMPInitClause(Expr *InteropVar, OMPInteropInfo &InteropInfo, if (E->isValueDependent() || E->isTypeDependent() || E->isInstantiationDependent() || E->containsUnexpandedParameterPack()) continue; - if (E->isIntegerConstantExpr(Context)) + if (E->isIntegerConstantExpr(getASTContext())) continue; if (isa(E)) continue; @@ -18081,28 +18144,29 @@ Sema::ActOnOpenMPInitClause(Expr *InteropVar, OMPInteropInfo &InteropInfo, return nullptr; } - return OMPInitClause::Create(Context, InteropVar, InteropInfo, StartLoc, - LParenLoc, VarLoc, EndLoc); + return OMPInitClause::Create(getASTContext(), InteropVar, InteropInfo, + StartLoc, LParenLoc, VarLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPUseClause(Expr *InteropVar, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation VarLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPUseClause(Expr *InteropVar, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation VarLoc, + SourceLocation EndLoc) { - if (!isValidInteropVariable(*this, InteropVar, VarLoc, OMPC_use)) + if (!isValidInteropVariable(SemaRef, InteropVar, VarLoc, OMPC_use)) return nullptr; - return new (Context) + return new (getASTContext()) OMPUseClause(InteropVar, StartLoc, LParenLoc, VarLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPDestroyClause(Expr *InteropVar, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation VarLoc, - SourceLocation EndLoc) { - if (!InteropVar && LangOpts.OpenMP >= 52 && +OMPClause *SemaOpenMP::ActOnOpenMPDestroyClause(Expr *InteropVar, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation VarLoc, + SourceLocation EndLoc) { + if (!InteropVar && getLangOpts().OpenMP >= 52 && DSAStack->getCurrentDirective() == OMPD_depobj) { Diag(StartLoc, diag::err_omp_expected_clause_argument) << getOpenMPClauseName(OMPC_destroy) @@ -18110,100 +18174,103 @@ OMPClause *Sema::ActOnOpenMPDestroyClause(Expr *InteropVar, return nullptr; } if (InteropVar && - !isValidInteropVariable(*this, InteropVar, VarLoc, OMPC_destroy)) + !isValidInteropVariable(SemaRef, InteropVar, VarLoc, OMPC_destroy)) return nullptr; - return new (Context) + return new (getASTContext()) OMPDestroyClause(InteropVar, StartLoc, LParenLoc, VarLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPNovariantsClause(Expr *Condition, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPNovariantsClause(Expr *Condition, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { Expr *ValExpr = Condition; Stmt *HelperValStmt = nullptr; OpenMPDirectiveKind CaptureRegion = OMPD_unknown; if (!Condition->isValueDependent() && !Condition->isTypeDependent() && !Condition->isInstantiationDependent() && !Condition->containsUnexpandedParameterPack()) { - ExprResult Val = CheckBooleanCondition(StartLoc, Condition); + ExprResult Val = SemaRef.CheckBooleanCondition(StartLoc, Condition); if (Val.isInvalid()) return nullptr; - ValExpr = MakeFullExpr(Val.get()).get(); + ValExpr = SemaRef.MakeFullExpr(Val.get()).get(); OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); CaptureRegion = getOpenMPCaptureRegionForClause(DKind, OMPC_novariants, - LangOpts.OpenMP); - if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { - ValExpr = MakeFullExpr(ValExpr).get(); + getLangOpts().OpenMP); + if (CaptureRegion != OMPD_unknown && + !SemaRef.CurContext->isDependentContext()) { + ValExpr = SemaRef.MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; - ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); - HelperValStmt = buildPreInits(Context, Captures); + ValExpr = tryBuildCapture(SemaRef, ValExpr, Captures).get(); + HelperValStmt = buildPreInits(getASTContext(), Captures); } } - return new (Context) OMPNovariantsClause( + return new (getASTContext()) OMPNovariantsClause( ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPNocontextClause(Expr *Condition, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPNocontextClause(Expr *Condition, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { Expr *ValExpr = Condition; Stmt *HelperValStmt = nullptr; OpenMPDirectiveKind CaptureRegion = OMPD_unknown; if (!Condition->isValueDependent() && !Condition->isTypeDependent() && !Condition->isInstantiationDependent() && !Condition->containsUnexpandedParameterPack()) { - ExprResult Val = CheckBooleanCondition(StartLoc, Condition); + ExprResult Val = SemaRef.CheckBooleanCondition(StartLoc, Condition); if (Val.isInvalid()) return nullptr; - ValExpr = MakeFullExpr(Val.get()).get(); + ValExpr = SemaRef.MakeFullExpr(Val.get()).get(); OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); - CaptureRegion = - getOpenMPCaptureRegionForClause(DKind, OMPC_nocontext, LangOpts.OpenMP); - if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { - ValExpr = MakeFullExpr(ValExpr).get(); + CaptureRegion = getOpenMPCaptureRegionForClause(DKind, OMPC_nocontext, + getLangOpts().OpenMP); + if (CaptureRegion != OMPD_unknown && + !SemaRef.CurContext->isDependentContext()) { + ValExpr = SemaRef.MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; - ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); - HelperValStmt = buildPreInits(Context, Captures); + ValExpr = tryBuildCapture(SemaRef, ValExpr, Captures).get(); + HelperValStmt = buildPreInits(getASTContext(), Captures); } } - return new (Context) OMPNocontextClause(ValExpr, HelperValStmt, CaptureRegion, - StartLoc, LParenLoc, EndLoc); + return new (getASTContext()) OMPNocontextClause( + ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPFilterClause(Expr *ThreadID, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPFilterClause(Expr *ThreadID, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { Expr *ValExpr = ThreadID; Stmt *HelperValStmt = nullptr; OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); OpenMPDirectiveKind CaptureRegion = - getOpenMPCaptureRegionForClause(DKind, OMPC_filter, LangOpts.OpenMP); - if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { - ValExpr = MakeFullExpr(ValExpr).get(); + getOpenMPCaptureRegionForClause(DKind, OMPC_filter, getLangOpts().OpenMP); + if (CaptureRegion != OMPD_unknown && + !SemaRef.CurContext->isDependentContext()) { + ValExpr = SemaRef.MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; - ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); - HelperValStmt = buildPreInits(Context, Captures); + ValExpr = tryBuildCapture(SemaRef, ValExpr, Captures).get(); + HelperValStmt = buildPreInits(getASTContext(), Captures); } - return new (Context) OMPFilterClause(ValExpr, HelperValStmt, CaptureRegion, - StartLoc, LParenLoc, EndLoc); + return new (getASTContext()) OMPFilterClause( + ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPVarListClause(OpenMPClauseKind Kind, - ArrayRef VarList, - const OMPVarListLocTy &Locs, - OpenMPVarListDataTy &Data) { +OMPClause *SemaOpenMP::ActOnOpenMPVarListClause(OpenMPClauseKind Kind, + ArrayRef VarList, + const OMPVarListLocTy &Locs, + OpenMPVarListDataTy &Data) { SourceLocation StartLoc = Locs.StartLoc; SourceLocation LParenLoc = Locs.LParenLoc; SourceLocation EndLoc = Locs.EndLoc; @@ -18395,29 +18462,30 @@ OMPClause *Sema::ActOnOpenMPVarListClause(OpenMPClauseKind Kind, return Res; } -ExprResult Sema::getOpenMPCapturedExpr(VarDecl *Capture, ExprValueKind VK, - ExprObjectKind OK, SourceLocation Loc) { - ExprResult Res = BuildDeclRefExpr( +ExprResult SemaOpenMP::getOpenMPCapturedExpr(VarDecl *Capture, ExprValueKind VK, + ExprObjectKind OK, + SourceLocation Loc) { + ExprResult Res = SemaRef.BuildDeclRefExpr( Capture, Capture->getType().getNonReferenceType(), VK_LValue, Loc); if (!Res.isUsable()) return ExprError(); if (OK == OK_Ordinary && !getLangOpts().CPlusPlus) { - Res = CreateBuiltinUnaryOp(Loc, UO_Deref, Res.get()); + Res = SemaRef.CreateBuiltinUnaryOp(Loc, UO_Deref, Res.get()); if (!Res.isUsable()) return ExprError(); } if (VK != VK_LValue && Res.get()->isGLValue()) { - Res = DefaultLvalueConversion(Res.get()); + Res = SemaRef.DefaultLvalueConversion(Res.get()); if (!Res.isUsable()) return ExprError(); } return Res; } -OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPPrivateClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { SmallVector Vars; SmallVector PrivateCopies; bool IsImplicitClause = @@ -18427,7 +18495,7 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef VarList, SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange); + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange); if (Res.second) { // It will be analyzed later. Vars.push_back(RefExpr); @@ -18443,7 +18511,8 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef VarList, // OpenMP [2.9.3.3, Restrictions, C/C++, p.3] // A variable that appears in a private clause must not have an incomplete // type or a reference type. - if (RequireCompleteType(ELoc, Type, diag::err_omp_private_incomplete_type)) + if (SemaRef.RequireCompleteType(ELoc, Type, + diag::err_omp_private_incomplete_type)) continue; Type = Type.getNonReferenceType(); @@ -18455,7 +18524,7 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef VarList, // OpenMP 3.1 [2.9.3.3, private clause, Restrictions] // A variable that appears in a private clause must not have a // const-qualified type unless it is of class type with a mutable member. - if (rejectConstNotMutableType(*this, D, Type, OMPC_private, ELoc)) + if (rejectConstNotMutableType(SemaRef, D, Type, OMPC_private, ELoc)) continue; // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced @@ -18469,7 +18538,7 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef VarList, if (DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_private) { Diag(ELoc, diag::err_omp_wrong_dsa) << getOpenMPClauseName(DVar.CKind) << getOpenMPClauseName(OMPC_private); - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } @@ -18480,7 +18549,7 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef VarList, Diag(ELoc, diag::err_omp_variably_modified_type_not_supported) << getOpenMPClauseName(OMPC_private) << Type << getOpenMPDirectiveName(CurrDir); - bool IsDecl = !VD || VD->isThisDeclarationADefinition(Context) == + bool IsDecl = !VD || VD->isThisDeclarationADefinition(getASTContext()) == VarDecl::DeclarationOnly; Diag(D->getLocation(), IsDecl ? diag::note_previous_decl : diag::note_defined_here) @@ -18496,7 +18565,8 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef VarList, // A list item cannot appear in both a map clause and a data-sharing // attribute clause on the same construct unless the construct is a // combined construct. - if ((LangOpts.OpenMP <= 45 && isOpenMPTargetExecutionDirective(CurrDir)) || + if ((getLangOpts().OpenMP <= 45 && + isOpenMPTargetExecutionDirective(CurrDir)) || CurrDir == OMPD_target) { OpenMPClauseKind ConflictKind; if (DSAStack->checkMappableExprComponentListsForDecl( @@ -18510,7 +18580,7 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef VarList, << getOpenMPClauseName(OMPC_private) << getOpenMPClauseName(ConflictKind) << getOpenMPDirectiveName(CurrDir); - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } } @@ -18526,28 +18596,28 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef VarList, // proper diagnostics. Type = Type.getUnqualifiedType(); VarDecl *VDPrivate = - buildVarDecl(*this, ELoc, Type, D->getName(), + buildVarDecl(SemaRef, ELoc, Type, D->getName(), D->hasAttrs() ? &D->getAttrs() : nullptr, VD ? cast(SimpleRefExpr) : nullptr); - ActOnUninitializedDecl(VDPrivate); + SemaRef.ActOnUninitializedDecl(VDPrivate); if (VDPrivate->isInvalidDecl()) continue; DeclRefExpr *VDPrivateRefExpr = buildDeclRefExpr( - *this, VDPrivate, RefExpr->getType().getUnqualifiedType(), ELoc); + SemaRef, VDPrivate, RefExpr->getType().getUnqualifiedType(), ELoc); DeclRefExpr *Ref = nullptr; - if (!VD && !CurContext->isDependentContext()) { + if (!VD && !SemaRef.CurContext->isDependentContext()) { auto *FD = dyn_cast(D); VarDecl *VD = FD ? DSAStack->getImplicitFDCapExprDecl(FD) : nullptr; if (VD) - Ref = buildDeclRefExpr(*this, VD, VD->getType().getNonReferenceType(), + Ref = buildDeclRefExpr(SemaRef, VD, VD->getType().getNonReferenceType(), RefExpr->getExprLoc()); else - Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/false); + Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/false); } if (!IsImplicitClause) DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_private, Ref); - Vars.push_back((VD || CurContext->isDependentContext()) + Vars.push_back((VD || SemaRef.CurContext->isDependentContext()) ? RefExpr->IgnoreParens() : Ref); PrivateCopies.push_back(VDPrivateRefExpr); @@ -18556,14 +18626,14 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef VarList, if (Vars.empty()) return nullptr; - return OMPPrivateClause::Create(Context, StartLoc, LParenLoc, EndLoc, Vars, - PrivateCopies); + return OMPPrivateClause::Create(getASTContext(), StartLoc, LParenLoc, EndLoc, + Vars, PrivateCopies); } -OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPFirstprivateClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { SmallVector Vars; SmallVector PrivateCopies; SmallVector Inits; @@ -18577,7 +18647,7 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange); + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange); if (Res.second) { // It will be analyzed later. Vars.push_back(RefExpr); @@ -18595,8 +18665,8 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, // OpenMP [2.9.3.3, Restrictions, C/C++, p.3] // A variable that appears in a private clause must not have an incomplete // type or a reference type. - if (RequireCompleteType(ELoc, Type, - diag::err_omp_firstprivate_incomplete_type)) + if (SemaRef.RequireCompleteType(ELoc, Type, + diag::err_omp_firstprivate_incomplete_type)) continue; Type = Type.getNonReferenceType(); @@ -18604,7 +18674,8 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, // A variable of class type (or array thereof) that appears in a private // clause requires an accessible, unambiguous copy constructor for the // class type. - QualType ElemType = Context.getBaseElementType(Type).getNonReferenceType(); + QualType ElemType = + getASTContext().getBaseElementType(Type).getNonReferenceType(); // If an implicit firstprivate variable found it was checked already. DSAStackTy::DSAVarData TopDVar; @@ -18613,7 +18684,7 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, DSAStack->getTopDSA(D, /*FromParent=*/false); TopDVar = DVar; OpenMPDirectiveKind CurrDir = DSAStack->getCurrentDirective(); - bool IsConstant = ElemType.isConstant(Context); + bool IsConstant = ElemType.isConstant(getASTContext()); // OpenMP [2.4.13, Data-sharing Attribute Clauses] // A list item that specifies a given variable may not appear in more // than one clause on the same directive, except that a variable may be @@ -18628,7 +18699,7 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, Diag(ELoc, diag::err_omp_wrong_dsa) << getOpenMPClauseName(DVar.CKind) << getOpenMPClauseName(OMPC_firstprivate); - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } @@ -18648,7 +18719,7 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, Diag(ELoc, diag::err_omp_wrong_dsa) << getOpenMPClauseName(DVar.CKind) << getOpenMPClauseName(OMPC_firstprivate); - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } @@ -18679,7 +18750,7 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, Diag(ELoc, diag::err_omp_required_access) << getOpenMPClauseName(OMPC_firstprivate) << getOpenMPClauseName(OMPC_shared); - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } } @@ -18712,7 +18783,7 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, isOpenMPTeamsDirective(DVar.DKind))) { Diag(ELoc, diag::err_omp_parallel_reduction_in_task_firstprivate) << getOpenMPDirectiveName(DVar.DKind); - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } } @@ -18725,7 +18796,7 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, // A list item cannot appear in both a map clause and a data-sharing // attribute clause on the same construct unless the construct is a // combined construct. - if ((LangOpts.OpenMP <= 45 && + if ((getLangOpts().OpenMP <= 45 && isOpenMPTargetExecutionDirective(CurrDir)) || CurrDir == OMPD_target) { OpenMPClauseKind ConflictKind; @@ -18741,7 +18812,7 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, << getOpenMPClauseName(OMPC_firstprivate) << getOpenMPClauseName(ConflictKind) << getOpenMPDirectiveName(DSAStack->getCurrentDirective()); - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } } @@ -18753,7 +18824,7 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, Diag(ELoc, diag::err_omp_variably_modified_type_not_supported) << getOpenMPClauseName(OMPC_firstprivate) << Type << getOpenMPDirectiveName(DSAStack->getCurrentDirective()); - bool IsDecl = !VD || VD->isThisDeclarationADefinition(Context) == + bool IsDecl = !VD || VD->isThisDeclarationADefinition(getASTContext()) == VarDecl::DeclarationOnly; Diag(D->getLocation(), IsDecl ? diag::note_previous_decl : diag::note_defined_here) @@ -18763,7 +18834,7 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, Type = Type.getUnqualifiedType(); VarDecl *VDPrivate = - buildVarDecl(*this, ELoc, Type, D->getName(), + buildVarDecl(SemaRef, ELoc, Type, D->getName(), D->hasAttrs() ? &D->getAttrs() : nullptr, VD ? cast(SimpleRefExpr) : nullptr); // Generate helper private variable and initialize it with the value of the @@ -18776,32 +18847,32 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, // original array element in CodeGen. if (Type->isArrayType()) { VarDecl *VDInit = - buildVarDecl(*this, RefExpr->getExprLoc(), ElemType, D->getName()); - VDInitRefExpr = buildDeclRefExpr(*this, VDInit, ElemType, ELoc); - Expr *Init = DefaultLvalueConversion(VDInitRefExpr).get(); + buildVarDecl(SemaRef, RefExpr->getExprLoc(), ElemType, D->getName()); + VDInitRefExpr = buildDeclRefExpr(SemaRef, VDInit, ElemType, ELoc); + Expr *Init = SemaRef.DefaultLvalueConversion(VDInitRefExpr).get(); ElemType = ElemType.getUnqualifiedType(); - VarDecl *VDInitTemp = buildVarDecl(*this, RefExpr->getExprLoc(), ElemType, - ".firstprivate.temp"); + VarDecl *VDInitTemp = buildVarDecl(SemaRef, RefExpr->getExprLoc(), + ElemType, ".firstprivate.temp"); InitializedEntity Entity = InitializedEntity::InitializeVariable(VDInitTemp); InitializationKind Kind = InitializationKind::CreateCopy(ELoc, ELoc); - InitializationSequence InitSeq(*this, Entity, Kind, Init); - ExprResult Result = InitSeq.Perform(*this, Entity, Kind, Init); + InitializationSequence InitSeq(SemaRef, Entity, Kind, Init); + ExprResult Result = InitSeq.Perform(SemaRef, Entity, Kind, Init); if (Result.isInvalid()) VDPrivate->setInvalidDecl(); else VDPrivate->setInit(Result.getAs()); // Remove temp variable declaration. - Context.Deallocate(VDInitTemp); + getASTContext().Deallocate(VDInitTemp); } else { - VarDecl *VDInit = buildVarDecl(*this, RefExpr->getExprLoc(), Type, + VarDecl *VDInit = buildVarDecl(SemaRef, RefExpr->getExprLoc(), Type, ".firstprivate.temp"); - VDInitRefExpr = buildDeclRefExpr(*this, VDInit, RefExpr->getType(), + VDInitRefExpr = buildDeclRefExpr(SemaRef, VDInit, RefExpr->getType(), RefExpr->getExprLoc()); - AddInitializerToDecl(VDPrivate, - DefaultLvalueConversion(VDInitRefExpr).get(), - /*DirectInit=*/false); + SemaRef.AddInitializerToDecl( + VDPrivate, SemaRef.DefaultLvalueConversion(VDInitRefExpr).get(), + /*DirectInit=*/false); } if (VDPrivate->isInvalidDecl()) { if (IsImplicitClause) { @@ -18810,29 +18881,30 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, } continue; } - CurContext->addDecl(VDPrivate); + SemaRef.CurContext->addDecl(VDPrivate); DeclRefExpr *VDPrivateRefExpr = buildDeclRefExpr( - *this, VDPrivate, RefExpr->getType().getUnqualifiedType(), + SemaRef, VDPrivate, RefExpr->getType().getUnqualifiedType(), RefExpr->getExprLoc()); DeclRefExpr *Ref = nullptr; - if (!VD && !CurContext->isDependentContext()) { + if (!VD && !SemaRef.CurContext->isDependentContext()) { if (TopDVar.CKind == OMPC_lastprivate) { Ref = TopDVar.PrivateCopy; } else { auto *FD = dyn_cast(D); VarDecl *VD = FD ? DSAStack->getImplicitFDCapExprDecl(FD) : nullptr; if (VD) - Ref = buildDeclRefExpr(*this, VD, VD->getType().getNonReferenceType(), - RefExpr->getExprLoc()); + Ref = + buildDeclRefExpr(SemaRef, VD, VD->getType().getNonReferenceType(), + RefExpr->getExprLoc()); else - Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/true); + Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/true); if (VD || !isOpenMPCapturedDecl(D)) ExprCaptures.push_back(Ref->getDecl()); } } if (!IsImplicitClause) DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_firstprivate, Ref); - Vars.push_back((VD || CurContext->isDependentContext()) + Vars.push_back((VD || SemaRef.CurContext->isDependentContext()) ? RefExpr->IgnoreParens() : Ref); PrivateCopies.push_back(VDPrivateRefExpr); @@ -18842,12 +18914,12 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, if (Vars.empty()) return nullptr; - return OMPFirstprivateClause::Create(Context, StartLoc, LParenLoc, EndLoc, - Vars, PrivateCopies, Inits, - buildPreInits(Context, ExprCaptures)); + return OMPFirstprivateClause::Create( + getASTContext(), StartLoc, LParenLoc, EndLoc, Vars, PrivateCopies, Inits, + buildPreInits(getASTContext(), ExprCaptures)); } -OMPClause *Sema::ActOnOpenMPLastprivateClause( +OMPClause *SemaOpenMP::ActOnOpenMPLastprivateClause( ArrayRef VarList, OpenMPLastprivateModifier LPKind, SourceLocation LPKindLoc, SourceLocation ColonLoc, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { @@ -18871,7 +18943,7 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause( SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange); + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange); if (Res.second) { // It will be analyzed later. Vars.push_back(RefExpr); @@ -18889,8 +18961,8 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause( // OpenMP [2.14.3.5, Restrictions, C/C++, p.2] // A variable that appears in a lastprivate clause must not have an // incomplete type or a reference type. - if (RequireCompleteType(ELoc, Type, - diag::err_omp_lastprivate_incomplete_type)) + if (SemaRef.RequireCompleteType(ELoc, Type, + diag::err_omp_lastprivate_incomplete_type)) continue; Type = Type.getNonReferenceType(); @@ -18902,7 +18974,7 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause( // OpenMP 3.1 [2.9.3.5, lastprivate clause, Restrictions] // A variable that appears in a lastprivate clause must not have a // const-qualified type unless it is of class type with a mutable member. - if (rejectConstNotMutableType(*this, D, Type, OMPC_lastprivate, ELoc)) + if (rejectConstNotMutableType(SemaRef, D, Type, OMPC_lastprivate, ELoc)) continue; // OpenMP 5.0 [2.19.4.5 lastprivate Clause, Restrictions] @@ -18910,7 +18982,7 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause( // modifier must be a scalar variable. if (LPKind == OMPC_LASTPRIVATE_conditional && !Type->isScalarType()) { Diag(ELoc, diag::err_omp_lastprivate_conditional_non_scalar); - bool IsDecl = !VD || VD->isThisDeclarationADefinition(Context) == + bool IsDecl = !VD || VD->isThisDeclarationADefinition(getASTContext()) == VarDecl::DeclarationOnly; Diag(D->getLocation(), IsDecl ? diag::note_previous_decl : diag::note_defined_here) @@ -18935,7 +19007,7 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause( Diag(ELoc, diag::err_omp_wrong_dsa) << getOpenMPClauseName(DVar.CKind) << getOpenMPClauseName(OMPC_lastprivate); - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } @@ -18954,7 +19026,7 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause( Diag(ELoc, diag::err_omp_required_access) << getOpenMPClauseName(OMPC_lastprivate) << getOpenMPClauseName(OMPC_shared); - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } } @@ -18967,53 +19039,53 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause( // A variable of class type (or array thereof) that appears in a // lastprivate clause requires an accessible, unambiguous copy assignment // operator for the class type. - Type = Context.getBaseElementType(Type).getNonReferenceType(); - VarDecl *SrcVD = buildVarDecl(*this, ERange.getBegin(), + Type = getASTContext().getBaseElementType(Type).getNonReferenceType(); + VarDecl *SrcVD = buildVarDecl(SemaRef, ERange.getBegin(), Type.getUnqualifiedType(), ".lastprivate.src", D->hasAttrs() ? &D->getAttrs() : nullptr); DeclRefExpr *PseudoSrcExpr = - buildDeclRefExpr(*this, SrcVD, Type.getUnqualifiedType(), ELoc); + buildDeclRefExpr(SemaRef, SrcVD, Type.getUnqualifiedType(), ELoc); VarDecl *DstVD = - buildVarDecl(*this, ERange.getBegin(), Type, ".lastprivate.dst", + buildVarDecl(SemaRef, ERange.getBegin(), Type, ".lastprivate.dst", D->hasAttrs() ? &D->getAttrs() : nullptr); - DeclRefExpr *PseudoDstExpr = buildDeclRefExpr(*this, DstVD, Type, ELoc); + DeclRefExpr *PseudoDstExpr = buildDeclRefExpr(SemaRef, DstVD, Type, ELoc); // For arrays generate assignment operation for single element and replace // it by the original array element in CodeGen. - ExprResult AssignmentOp = BuildBinOp(/*S=*/nullptr, ELoc, BO_Assign, - PseudoDstExpr, PseudoSrcExpr); + ExprResult AssignmentOp = SemaRef.BuildBinOp(/*S=*/nullptr, ELoc, BO_Assign, + PseudoDstExpr, PseudoSrcExpr); if (AssignmentOp.isInvalid()) continue; - AssignmentOp = - ActOnFinishFullExpr(AssignmentOp.get(), ELoc, /*DiscardedValue*/ false); + AssignmentOp = SemaRef.ActOnFinishFullExpr(AssignmentOp.get(), ELoc, + /*DiscardedValue*/ false); if (AssignmentOp.isInvalid()) continue; DeclRefExpr *Ref = nullptr; - if (!VD && !CurContext->isDependentContext()) { + if (!VD && !SemaRef.CurContext->isDependentContext()) { if (TopDVar.CKind == OMPC_firstprivate) { Ref = TopDVar.PrivateCopy; } else { - Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/false); + Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/false); if (!isOpenMPCapturedDecl(D)) ExprCaptures.push_back(Ref->getDecl()); } if ((TopDVar.CKind == OMPC_firstprivate && !TopDVar.PrivateCopy) || (!isOpenMPCapturedDecl(D) && Ref->getDecl()->hasAttr())) { - ExprResult RefRes = DefaultLvalueConversion(Ref); + ExprResult RefRes = SemaRef.DefaultLvalueConversion(Ref); if (!RefRes.isUsable()) continue; ExprResult PostUpdateRes = - BuildBinOp(DSAStack->getCurScope(), ELoc, BO_Assign, SimpleRefExpr, - RefRes.get()); + SemaRef.BuildBinOp(DSAStack->getCurScope(), ELoc, BO_Assign, + SimpleRefExpr, RefRes.get()); if (!PostUpdateRes.isUsable()) continue; ExprPostUpdates.push_back( - IgnoredValueConversions(PostUpdateRes.get()).get()); + SemaRef.IgnoredValueConversions(PostUpdateRes.get()).get()); } } DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_lastprivate, Ref); - Vars.push_back((VD || CurContext->isDependentContext()) + Vars.push_back((VD || SemaRef.CurContext->isDependentContext()) ? RefExpr->IgnoreParens() : Ref); SrcExprs.push_back(PseudoSrcExpr); @@ -19024,24 +19096,24 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause( if (Vars.empty()) return nullptr; - return OMPLastprivateClause::Create(Context, StartLoc, LParenLoc, EndLoc, - Vars, SrcExprs, DstExprs, AssignmentOps, - LPKind, LPKindLoc, ColonLoc, - buildPreInits(Context, ExprCaptures), - buildPostUpdate(*this, ExprPostUpdates)); + return OMPLastprivateClause::Create( + getASTContext(), StartLoc, LParenLoc, EndLoc, Vars, SrcExprs, DstExprs, + AssignmentOps, LPKind, LPKindLoc, ColonLoc, + buildPreInits(getASTContext(), ExprCaptures), + buildPostUpdate(SemaRef, ExprPostUpdates)); } -OMPClause *Sema::ActOnOpenMPSharedClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPSharedClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { SmallVector Vars; for (Expr *RefExpr : VarList) { assert(RefExpr && "NULL expr in OpenMP lastprivate clause."); SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange); + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange); if (Res.second) { // It will be analyzed later. Vars.push_back(RefExpr); @@ -19063,15 +19135,16 @@ OMPClause *Sema::ActOnOpenMPSharedClause(ArrayRef VarList, DVar.RefExpr) { Diag(ELoc, diag::err_omp_wrong_dsa) << getOpenMPClauseName(DVar.CKind) << getOpenMPClauseName(OMPC_shared); - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } DeclRefExpr *Ref = nullptr; - if (!VD && isOpenMPCapturedDecl(D) && !CurContext->isDependentContext()) - Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/true); + if (!VD && isOpenMPCapturedDecl(D) && + !SemaRef.CurContext->isDependentContext()) + Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/true); DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_shared, Ref); - Vars.push_back((VD || !Ref || CurContext->isDependentContext()) + Vars.push_back((VD || !Ref || SemaRef.CurContext->isDependentContext()) ? RefExpr->IgnoreParens() : Ref); } @@ -19079,7 +19152,8 @@ OMPClause *Sema::ActOnOpenMPSharedClause(ArrayRef VarList, if (Vars.empty()) return nullptr; - return OMPSharedClause::Create(Context, StartLoc, LParenLoc, EndLoc, Vars); + return OMPSharedClause::Create(getASTContext(), StartLoc, LParenLoc, EndLoc, + Vars); } namespace { @@ -20200,7 +20274,7 @@ static bool actOnOMPReductionKindClause( } else { VarsExpr = Ref = buildCapture(S, D, SimpleRefExpr, /*WithInit=*/false); } - if (!S.isOpenMPCapturedDecl(D)) { + if (!S.OpenMP().isOpenMPCapturedDecl(D)) { RD.ExprCaptures.emplace_back(Ref->getDecl()); if (Ref->getDecl()->hasAttr()) { ExprResult RefRes = S.DefaultLvalueConversion(Ref); @@ -20250,7 +20324,7 @@ static bool actOnOMPReductionKindClause( return RD.Vars.empty(); } -OMPClause *Sema::ActOnOpenMPReductionClause( +OMPClause *SemaOpenMP::ActOnOpenMPReductionClause( ArrayRef VarList, OpenMPReductionClauseModifier Modifier, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, @@ -20279,77 +20353,80 @@ OMPClause *Sema::ActOnOpenMPReductionClause( } ReductionData RD(VarList.size(), Modifier); - if (actOnOMPReductionKindClause(*this, DSAStack, OMPC_reduction, VarList, + if (actOnOMPReductionKindClause(SemaRef, DSAStack, OMPC_reduction, VarList, StartLoc, LParenLoc, ColonLoc, EndLoc, ReductionIdScopeSpec, ReductionId, UnresolvedReductions, RD)) return nullptr; return OMPReductionClause::Create( - Context, StartLoc, LParenLoc, ModifierLoc, ColonLoc, EndLoc, Modifier, - RD.Vars, ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, + getASTContext(), StartLoc, LParenLoc, ModifierLoc, ColonLoc, EndLoc, + Modifier, RD.Vars, + ReductionIdScopeSpec.getWithLocInContext(getASTContext()), ReductionId, RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps, RD.InscanCopyOps, RD.InscanCopyArrayTemps, RD.InscanCopyArrayElems, - buildPreInits(Context, RD.ExprCaptures), - buildPostUpdate(*this, RD.ExprPostUpdates)); + buildPreInits(getASTContext(), RD.ExprCaptures), + buildPostUpdate(SemaRef, RD.ExprPostUpdates)); } -OMPClause *Sema::ActOnOpenMPTaskReductionClause( +OMPClause *SemaOpenMP::ActOnOpenMPTaskReductionClause( ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId, ArrayRef UnresolvedReductions) { ReductionData RD(VarList.size()); - if (actOnOMPReductionKindClause(*this, DSAStack, OMPC_task_reduction, VarList, - StartLoc, LParenLoc, ColonLoc, EndLoc, - ReductionIdScopeSpec, ReductionId, + if (actOnOMPReductionKindClause(SemaRef, DSAStack, OMPC_task_reduction, + VarList, StartLoc, LParenLoc, ColonLoc, + EndLoc, ReductionIdScopeSpec, ReductionId, UnresolvedReductions, RD)) return nullptr; return OMPTaskReductionClause::Create( - Context, StartLoc, LParenLoc, ColonLoc, EndLoc, RD.Vars, - ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, + getASTContext(), StartLoc, LParenLoc, ColonLoc, EndLoc, RD.Vars, + ReductionIdScopeSpec.getWithLocInContext(getASTContext()), ReductionId, RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps, - buildPreInits(Context, RD.ExprCaptures), - buildPostUpdate(*this, RD.ExprPostUpdates)); + buildPreInits(getASTContext(), RD.ExprCaptures), + buildPostUpdate(SemaRef, RD.ExprPostUpdates)); } -OMPClause *Sema::ActOnOpenMPInReductionClause( +OMPClause *SemaOpenMP::ActOnOpenMPInReductionClause( ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId, ArrayRef UnresolvedReductions) { ReductionData RD(VarList.size()); - if (actOnOMPReductionKindClause(*this, DSAStack, OMPC_in_reduction, VarList, + if (actOnOMPReductionKindClause(SemaRef, DSAStack, OMPC_in_reduction, VarList, StartLoc, LParenLoc, ColonLoc, EndLoc, ReductionIdScopeSpec, ReductionId, UnresolvedReductions, RD)) return nullptr; return OMPInReductionClause::Create( - Context, StartLoc, LParenLoc, ColonLoc, EndLoc, RD.Vars, - ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, + getASTContext(), StartLoc, LParenLoc, ColonLoc, EndLoc, RD.Vars, + ReductionIdScopeSpec.getWithLocInContext(getASTContext()), ReductionId, RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps, RD.TaskgroupDescriptors, - buildPreInits(Context, RD.ExprCaptures), - buildPostUpdate(*this, RD.ExprPostUpdates)); + buildPreInits(getASTContext(), RD.ExprCaptures), + buildPostUpdate(SemaRef, RD.ExprPostUpdates)); } -bool Sema::CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind, - SourceLocation LinLoc) { - if ((!LangOpts.CPlusPlus && LinKind != OMPC_LINEAR_val) || +bool SemaOpenMP::CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind, + SourceLocation LinLoc) { + if ((!getLangOpts().CPlusPlus && LinKind != OMPC_LINEAR_val) || LinKind == OMPC_LINEAR_unknown || LinKind == OMPC_LINEAR_step) { - Diag(LinLoc, diag::err_omp_wrong_linear_modifier) << LangOpts.CPlusPlus; + Diag(LinLoc, diag::err_omp_wrong_linear_modifier) + << getLangOpts().CPlusPlus; return true; } return false; } -bool Sema::CheckOpenMPLinearDecl(const ValueDecl *D, SourceLocation ELoc, - OpenMPLinearClauseKind LinKind, QualType Type, - bool IsDeclareSimd) { +bool SemaOpenMP::CheckOpenMPLinearDecl(const ValueDecl *D, SourceLocation ELoc, + OpenMPLinearClauseKind LinKind, + QualType Type, bool IsDeclareSimd) { const auto *VD = dyn_cast_or_null(D); // A variable must not have an incomplete type or a reference type. - if (RequireCompleteType(ELoc, Type, diag::err_omp_linear_incomplete_type)) + if (SemaRef.RequireCompleteType(ELoc, Type, + diag::err_omp_linear_incomplete_type)) return true; if ((LinKind == OMPC_LINEAR_uval || LinKind == OMPC_LINEAR_ref) && !Type->isReferenceType()) { @@ -20365,17 +20442,17 @@ bool Sema::CheckOpenMPLinearDecl(const ValueDecl *D, SourceLocation ELoc, // not apply to the firstprivate clause, nor to the linear clause on // declarative directives (like declare simd). if (!IsDeclareSimd && - rejectConstNotMutableType(*this, D, Type, OMPC_linear, ELoc)) + rejectConstNotMutableType(SemaRef, D, Type, OMPC_linear, ELoc)) return true; // A list item must be of integral or pointer type. Type = Type.getUnqualifiedType().getCanonicalType(); const auto *Ty = Type.getTypePtrOrNull(); if (!Ty || (LinKind != OMPC_LINEAR_ref && !Ty->isDependentType() && - !Ty->isIntegralType(Context) && !Ty->isPointerType())) { + !Ty->isIntegralType(getASTContext()) && !Ty->isPointerType())) { Diag(ELoc, diag::err_omp_linear_expected_int_or_ptr) << Type; if (D) { - bool IsDecl = !VD || VD->isThisDeclarationADefinition(Context) == + bool IsDecl = !VD || VD->isThisDeclarationADefinition(getASTContext()) == VarDecl::DeclarationOnly; Diag(D->getLocation(), IsDecl ? diag::note_previous_decl : diag::note_defined_here) @@ -20386,7 +20463,7 @@ bool Sema::CheckOpenMPLinearDecl(const ValueDecl *D, SourceLocation ELoc, return false; } -OMPClause *Sema::ActOnOpenMPLinearClause( +OMPClause *SemaOpenMP::ActOnOpenMPLinearClause( ArrayRef VarList, Expr *Step, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind LinKind, SourceLocation LinLoc, SourceLocation ColonLoc, @@ -20409,7 +20486,7 @@ OMPClause *Sema::ActOnOpenMPLinearClause( SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange); + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange); if (Res.second) { // It will be analyzed later. Vars.push_back(RefExpr); @@ -20431,7 +20508,7 @@ OMPClause *Sema::ActOnOpenMPLinearClause( if (DVar.RefExpr) { Diag(ELoc, diag::err_omp_wrong_dsa) << getOpenMPClauseName(DVar.CKind) << getOpenMPClauseName(OMPC_linear); - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } @@ -20441,29 +20518,29 @@ OMPClause *Sema::ActOnOpenMPLinearClause( // Build private copy of original var. VarDecl *Private = - buildVarDecl(*this, ELoc, Type, D->getName(), + buildVarDecl(SemaRef, ELoc, Type, D->getName(), D->hasAttrs() ? &D->getAttrs() : nullptr, VD ? cast(SimpleRefExpr) : nullptr); - DeclRefExpr *PrivateRef = buildDeclRefExpr(*this, Private, Type, ELoc); + DeclRefExpr *PrivateRef = buildDeclRefExpr(SemaRef, Private, Type, ELoc); // Build var to save initial value. - VarDecl *Init = buildVarDecl(*this, ELoc, Type, ".linear.start"); + VarDecl *Init = buildVarDecl(SemaRef, ELoc, Type, ".linear.start"); Expr *InitExpr; DeclRefExpr *Ref = nullptr; - if (!VD && !CurContext->isDependentContext()) { - Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/false); + if (!VD && !SemaRef.CurContext->isDependentContext()) { + Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/false); if (!isOpenMPCapturedDecl(D)) { ExprCaptures.push_back(Ref->getDecl()); if (Ref->getDecl()->hasAttr()) { - ExprResult RefRes = DefaultLvalueConversion(Ref); + ExprResult RefRes = SemaRef.DefaultLvalueConversion(Ref); if (!RefRes.isUsable()) continue; ExprResult PostUpdateRes = - BuildBinOp(DSAStack->getCurScope(), ELoc, BO_Assign, - SimpleRefExpr, RefRes.get()); + SemaRef.BuildBinOp(DSAStack->getCurScope(), ELoc, BO_Assign, + SimpleRefExpr, RefRes.get()); if (!PostUpdateRes.isUsable()) continue; ExprPostUpdates.push_back( - IgnoredValueConversions(PostUpdateRes.get()).get()); + SemaRef.IgnoredValueConversions(PostUpdateRes.get()).get()); } } } @@ -20471,12 +20548,13 @@ OMPClause *Sema::ActOnOpenMPLinearClause( InitExpr = VD ? VD->getInit() : SimpleRefExpr; else InitExpr = VD ? SimpleRefExpr : Ref; - AddInitializerToDecl(Init, DefaultLvalueConversion(InitExpr).get(), - /*DirectInit=*/false); - DeclRefExpr *InitRef = buildDeclRefExpr(*this, Init, Type, ELoc); + SemaRef.AddInitializerToDecl( + Init, SemaRef.DefaultLvalueConversion(InitExpr).get(), + /*DirectInit=*/false); + DeclRefExpr *InitRef = buildDeclRefExpr(SemaRef, Init, Type, ELoc); DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_linear, Ref); - Vars.push_back((VD || CurContext->isDependentContext()) + Vars.push_back((VD || SemaRef.CurContext->isDependentContext()) ? RefExpr->IgnoreParens() : Ref); Privates.push_back(PrivateRef); @@ -20499,17 +20577,18 @@ OMPClause *Sema::ActOnOpenMPLinearClause( // Build var to save the step value. VarDecl *SaveVar = - buildVarDecl(*this, StepLoc, StepExpr->getType(), ".linear.step"); + buildVarDecl(SemaRef, StepLoc, StepExpr->getType(), ".linear.step"); ExprResult SaveRef = - buildDeclRefExpr(*this, SaveVar, StepExpr->getType(), StepLoc); - ExprResult CalcStep = - BuildBinOp(CurScope, StepLoc, BO_Assign, SaveRef.get(), StepExpr); - CalcStep = ActOnFinishFullExpr(CalcStep.get(), /*DiscardedValue*/ false); + buildDeclRefExpr(SemaRef, SaveVar, StepExpr->getType(), StepLoc); + ExprResult CalcStep = SemaRef.BuildBinOp( + SemaRef.getCurScope(), StepLoc, BO_Assign, SaveRef.get(), StepExpr); + CalcStep = + SemaRef.ActOnFinishFullExpr(CalcStep.get(), /*DiscardedValue*/ false); // Warn about zero linear step (it would be probably better specified as // making corresponding variables 'const'). if (std::optional Result = - StepExpr->getIntegerConstantExpr(Context)) { + StepExpr->getIntegerConstantExpr(getASTContext())) { if (!Result->isNegative() && !Result->isStrictlyPositive()) Diag(StepLoc, diag::warn_omp_linear_step_zero) << Vars[0] << (Vars.size() > 1); @@ -20520,11 +20599,11 @@ OMPClause *Sema::ActOnOpenMPLinearClause( } } - return OMPLinearClause::Create(Context, StartLoc, LParenLoc, LinKind, LinLoc, - ColonLoc, StepModifierLoc, EndLoc, Vars, - Privates, Inits, StepExpr, CalcStepExpr, - buildPreInits(Context, ExprCaptures), - buildPostUpdate(*this, ExprPostUpdates)); + return OMPLinearClause::Create(getASTContext(), StartLoc, LParenLoc, LinKind, + LinLoc, ColonLoc, StepModifierLoc, EndLoc, + Vars, Privates, Inits, StepExpr, CalcStepExpr, + buildPreInits(getASTContext(), ExprCaptures), + buildPostUpdate(SemaRef, ExprPostUpdates)); } static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV, @@ -20630,7 +20709,7 @@ static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV, return HasErrors; } -OMPClause *Sema::ActOnOpenMPAlignedClause( +OMPClause *SemaOpenMP::ActOnOpenMPAlignedClause( ArrayRef VarList, Expr *Alignment, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc) { SmallVector Vars; @@ -20639,7 +20718,7 @@ OMPClause *Sema::ActOnOpenMPAlignedClause( SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange); + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange); if (Res.second) { // It will be analyzed later. Vars.push_back(RefExpr); @@ -20659,7 +20738,7 @@ OMPClause *Sema::ActOnOpenMPAlignedClause( if (!Ty || (!Ty->isArrayType() && !Ty->isPointerType())) { Diag(ELoc, diag::err_omp_aligned_expected_array_or_ptr) << QType << getLangOpts().CPlusPlus << ERange; - bool IsDecl = !VD || VD->isThisDeclarationADefinition(Context) == + bool IsDecl = !VD || VD->isThisDeclarationADefinition(getASTContext()) == VarDecl::DeclarationOnly; Diag(D->getLocation(), IsDecl ? diag::note_previous_decl : diag::note_defined_here) @@ -20679,9 +20758,10 @@ OMPClause *Sema::ActOnOpenMPAlignedClause( DeclRefExpr *Ref = nullptr; if (!VD && isOpenMPCapturedDecl(D)) - Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/true); - Vars.push_back(DefaultFunctionArrayConversion( - (VD || !Ref) ? RefExpr->IgnoreParens() : Ref) + Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/true); + Vars.push_back(SemaRef + .DefaultFunctionArrayConversion( + (VD || !Ref) ? RefExpr->IgnoreParens() : Ref) .get()); } @@ -20700,14 +20780,14 @@ OMPClause *Sema::ActOnOpenMPAlignedClause( if (Vars.empty()) return nullptr; - return OMPAlignedClause::Create(Context, StartLoc, LParenLoc, ColonLoc, - EndLoc, Vars, Alignment); + return OMPAlignedClause::Create(getASTContext(), StartLoc, LParenLoc, + ColonLoc, EndLoc, Vars, Alignment); } -OMPClause *Sema::ActOnOpenMPCopyinClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPCopyinClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { SmallVector Vars; SmallVector SrcExprs; SmallVector DstExprs; @@ -20761,26 +20841,28 @@ OMPClause *Sema::ActOnOpenMPCopyinClause(ArrayRef VarList, // A variable of class type (or array thereof) that appears in a // copyin clause requires an accessible, unambiguous copy assignment // operator for the class type. - QualType ElemType = Context.getBaseElementType(Type).getNonReferenceType(); + QualType ElemType = + getASTContext().getBaseElementType(Type).getNonReferenceType(); VarDecl *SrcVD = - buildVarDecl(*this, DE->getBeginLoc(), ElemType.getUnqualifiedType(), + buildVarDecl(SemaRef, DE->getBeginLoc(), ElemType.getUnqualifiedType(), ".copyin.src", VD->hasAttrs() ? &VD->getAttrs() : nullptr); DeclRefExpr *PseudoSrcExpr = buildDeclRefExpr( - *this, SrcVD, ElemType.getUnqualifiedType(), DE->getExprLoc()); + SemaRef, SrcVD, ElemType.getUnqualifiedType(), DE->getExprLoc()); VarDecl *DstVD = - buildVarDecl(*this, DE->getBeginLoc(), ElemType, ".copyin.dst", + buildVarDecl(SemaRef, DE->getBeginLoc(), ElemType, ".copyin.dst", VD->hasAttrs() ? &VD->getAttrs() : nullptr); DeclRefExpr *PseudoDstExpr = - buildDeclRefExpr(*this, DstVD, ElemType, DE->getExprLoc()); + buildDeclRefExpr(SemaRef, DstVD, ElemType, DE->getExprLoc()); // For arrays generate assignment operation for single element and replace // it by the original array element in CodeGen. ExprResult AssignmentOp = - BuildBinOp(/*S=*/nullptr, DE->getExprLoc(), BO_Assign, PseudoDstExpr, - PseudoSrcExpr); + SemaRef.BuildBinOp(/*S=*/nullptr, DE->getExprLoc(), BO_Assign, + PseudoDstExpr, PseudoSrcExpr); if (AssignmentOp.isInvalid()) continue; - AssignmentOp = ActOnFinishFullExpr(AssignmentOp.get(), DE->getExprLoc(), - /*DiscardedValue*/ false); + AssignmentOp = + SemaRef.ActOnFinishFullExpr(AssignmentOp.get(), DE->getExprLoc(), + /*DiscardedValue*/ false); if (AssignmentOp.isInvalid()) continue; @@ -20794,14 +20876,14 @@ OMPClause *Sema::ActOnOpenMPCopyinClause(ArrayRef VarList, if (Vars.empty()) return nullptr; - return OMPCopyinClause::Create(Context, StartLoc, LParenLoc, EndLoc, Vars, - SrcExprs, DstExprs, AssignmentOps); + return OMPCopyinClause::Create(getASTContext(), StartLoc, LParenLoc, EndLoc, + Vars, SrcExprs, DstExprs, AssignmentOps); } -OMPClause *Sema::ActOnOpenMPCopyprivateClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPCopyprivateClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { SmallVector Vars; SmallVector SrcExprs; SmallVector DstExprs; @@ -20811,7 +20893,7 @@ OMPClause *Sema::ActOnOpenMPCopyprivateClause(ArrayRef VarList, SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange); + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange); if (Res.second) { // It will be analyzed later. Vars.push_back(RefExpr); @@ -20837,7 +20919,7 @@ OMPClause *Sema::ActOnOpenMPCopyprivateClause(ArrayRef VarList, Diag(ELoc, diag::err_omp_wrong_dsa) << getOpenMPClauseName(DVar.CKind) << getOpenMPClauseName(OMPC_copyprivate); - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } @@ -20850,7 +20932,7 @@ OMPClause *Sema::ActOnOpenMPCopyprivateClause(ArrayRef VarList, Diag(ELoc, diag::err_omp_required_access) << getOpenMPClauseName(OMPC_copyprivate) << "threadprivate or private in the enclosing context"; - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } } @@ -20861,7 +20943,7 @@ OMPClause *Sema::ActOnOpenMPCopyprivateClause(ArrayRef VarList, Diag(ELoc, diag::err_omp_variably_modified_type_not_supported) << getOpenMPClauseName(OMPC_copyprivate) << Type << getOpenMPDirectiveName(DSAStack->getCurrentDirective()); - bool IsDecl = !VD || VD->isThisDeclarationADefinition(Context) == + bool IsDecl = !VD || VD->isThisDeclarationADefinition(getASTContext()) == VarDecl::DeclarationOnly; Diag(D->getLocation(), IsDecl ? diag::note_previous_decl : diag::note_defined_here) @@ -20873,22 +20955,23 @@ OMPClause *Sema::ActOnOpenMPCopyprivateClause(ArrayRef VarList, // A variable of class type (or array thereof) that appears in a // copyin clause requires an accessible, unambiguous copy assignment // operator for the class type. - Type = Context.getBaseElementType(Type.getNonReferenceType()) + Type = getASTContext() + .getBaseElementType(Type.getNonReferenceType()) .getUnqualifiedType(); VarDecl *SrcVD = - buildVarDecl(*this, RefExpr->getBeginLoc(), Type, ".copyprivate.src", + buildVarDecl(SemaRef, RefExpr->getBeginLoc(), Type, ".copyprivate.src", D->hasAttrs() ? &D->getAttrs() : nullptr); - DeclRefExpr *PseudoSrcExpr = buildDeclRefExpr(*this, SrcVD, Type, ELoc); + DeclRefExpr *PseudoSrcExpr = buildDeclRefExpr(SemaRef, SrcVD, Type, ELoc); VarDecl *DstVD = - buildVarDecl(*this, RefExpr->getBeginLoc(), Type, ".copyprivate.dst", + buildVarDecl(SemaRef, RefExpr->getBeginLoc(), Type, ".copyprivate.dst", D->hasAttrs() ? &D->getAttrs() : nullptr); - DeclRefExpr *PseudoDstExpr = buildDeclRefExpr(*this, DstVD, Type, ELoc); - ExprResult AssignmentOp = BuildBinOp( + DeclRefExpr *PseudoDstExpr = buildDeclRefExpr(SemaRef, DstVD, Type, ELoc); + ExprResult AssignmentOp = SemaRef.BuildBinOp( DSAStack->getCurScope(), ELoc, BO_Assign, PseudoDstExpr, PseudoSrcExpr); if (AssignmentOp.isInvalid()) continue; - AssignmentOp = - ActOnFinishFullExpr(AssignmentOp.get(), ELoc, /*DiscardedValue*/ false); + AssignmentOp = SemaRef.ActOnFinishFullExpr(AssignmentOp.get(), ELoc, + /*DiscardedValue*/ false); if (AssignmentOp.isInvalid()) continue; @@ -20897,7 +20980,7 @@ OMPClause *Sema::ActOnOpenMPCopyprivateClause(ArrayRef VarList, assert(VD || isOpenMPCapturedDecl(D)); Vars.push_back( VD ? RefExpr->IgnoreParens() - : buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/false)); + : buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/false)); SrcExprs.push_back(PseudoSrcExpr); DstExprs.push_back(PseudoDstExpr); AssignmentOps.push_back(AssignmentOp.get()); @@ -20906,18 +20989,20 @@ OMPClause *Sema::ActOnOpenMPCopyprivateClause(ArrayRef VarList, if (Vars.empty()) return nullptr; - return OMPCopyprivateClause::Create(Context, StartLoc, LParenLoc, EndLoc, - Vars, SrcExprs, DstExprs, AssignmentOps); + return OMPCopyprivateClause::Create(getASTContext(), StartLoc, LParenLoc, + EndLoc, Vars, SrcExprs, DstExprs, + AssignmentOps); } -OMPClause *Sema::ActOnOpenMPFlushClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPFlushClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { if (VarList.empty()) return nullptr; - return OMPFlushClause::Create(Context, StartLoc, LParenLoc, EndLoc, VarList); + return OMPFlushClause::Create(getASTContext(), StartLoc, LParenLoc, EndLoc, + VarList); } /// Tries to find omp_depend_t. type. @@ -20937,22 +21022,23 @@ static bool findOMPDependT(Sema &S, SourceLocation Loc, DSAStackTy *Stack, return true; } -OMPClause *Sema::ActOnOpenMPDepobjClause(Expr *Depobj, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPDepobjClause(Expr *Depobj, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { if (!Depobj) return nullptr; - bool OMPDependTFound = findOMPDependT(*this, StartLoc, DSAStack); + bool OMPDependTFound = findOMPDependT(SemaRef, StartLoc, DSAStack); // OpenMP 5.0, 2.17.10.1 depobj Construct // depobj is an lvalue expression of type omp_depend_t. if (!Depobj->isTypeDependent() && !Depobj->isValueDependent() && !Depobj->isInstantiationDependent() && !Depobj->containsUnexpandedParameterPack() && - (OMPDependTFound && - !Context.typesAreCompatible(DSAStack->getOMPDependT(), Depobj->getType(), - /*CompareUnqualified=*/true))) { + (OMPDependTFound && !getASTContext().typesAreCompatible( + DSAStack->getOMPDependT(), Depobj->getType(), + /*CompareUnqualified=*/true))) { Diag(Depobj->getExprLoc(), diag::err_omp_expected_omp_depend_t_lvalue) << 0 << Depobj->getType() << Depobj->getSourceRange(); } @@ -20962,7 +21048,8 @@ OMPClause *Sema::ActOnOpenMPDepobjClause(Expr *Depobj, SourceLocation StartLoc, << 1 << Depobj->getSourceRange(); } - return OMPDepobjClause::Create(Context, StartLoc, LParenLoc, EndLoc, Depobj); + return OMPDepobjClause::Create(getASTContext(), StartLoc, LParenLoc, EndLoc, + Depobj); } namespace { @@ -21062,8 +21149,9 @@ ProcessOpenMPDoacrossClauseCommon(Sema &SemaRef, bool IsSource, continue; } if (RHS) { - ExprResult RHSRes = SemaRef.VerifyPositiveIntegerConstantInClause( - RHS, OMPC_depend, /*StrictlyPositive=*/false); + ExprResult RHSRes = + SemaRef.OpenMP().VerifyPositiveIntegerConstantInClause( + RHS, OMPC_depend, /*StrictlyPositive=*/false); if (RHSRes.isInvalid()) continue; } @@ -21094,11 +21182,10 @@ ProcessOpenMPDoacrossClauseCommon(Sema &SemaRef, bool IsSource, return {Vars, OpsOffs, TotalDepCount}; } -OMPClause * -Sema::ActOnOpenMPDependClause(const OMPDependClause::DependDataTy &Data, - Expr *DepModifier, ArrayRef VarList, - SourceLocation StartLoc, SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPDependClause( + const OMPDependClause::DependDataTy &Data, Expr *DepModifier, + ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc) { OpenMPDependClauseKind DepKind = Data.DepKind; SourceLocation DepLoc = Data.DepLoc; if (DSAStack->getCurrentDirective() == OMPD_ordered && @@ -21116,17 +21203,18 @@ Sema::ActOnOpenMPDependClause(const OMPDependClause::DependDataTy &Data, DSAStack->getCurrentDirective() == OMPD_depobj) && (DepKind == OMPC_DEPEND_unknown || DepKind == OMPC_DEPEND_source || DepKind == OMPC_DEPEND_sink || - ((LangOpts.OpenMP < 50 || + ((getLangOpts().OpenMP < 50 || DSAStack->getCurrentDirective() == OMPD_depobj) && DepKind == OMPC_DEPEND_depobj))) { SmallVector Except = {OMPC_DEPEND_source, OMPC_DEPEND_sink, OMPC_DEPEND_outallmemory, OMPC_DEPEND_inoutallmemory}; - if (LangOpts.OpenMP < 50 || DSAStack->getCurrentDirective() == OMPD_depobj) + if (getLangOpts().OpenMP < 50 || + DSAStack->getCurrentDirective() == OMPD_depobj) Except.push_back(OMPC_DEPEND_depobj); - if (LangOpts.OpenMP < 51) + if (getLangOpts().OpenMP < 51) Except.push_back(OMPC_DEPEND_inoutset); - std::string Expected = (LangOpts.OpenMP >= 50 && !DepModifier) + std::string Expected = (getLangOpts().OpenMP >= 50 && !DepModifier) ? "depend modifier(iterator) or " : ""; Diag(DepLoc, diag::err_omp_unexpected_clause_value) @@ -21152,7 +21240,7 @@ Sema::ActOnOpenMPDependClause(const OMPDependClause::DependDataTy &Data, if (DepKind == OMPC_DEPEND_sink || DepKind == OMPC_DEPEND_source) { DoacrossDataInfoTy VarOffset = ProcessOpenMPDoacrossClauseCommon( - *this, DepKind == OMPC_DEPEND_source, VarList, DSAStack, EndLoc); + SemaRef, DepKind == OMPC_DEPEND_source, VarList, DSAStack, EndLoc); Vars = VarOffset.Vars; OpsOffs = VarOffset.OpsOffs; TotalDepCount = VarOffset.TotalDepCount; @@ -21168,9 +21256,9 @@ Sema::ActOnOpenMPDependClause(const OMPDependClause::DependDataTy &Data, SourceLocation ELoc = RefExpr->getExprLoc(); Expr *SimpleExpr = RefExpr->IgnoreParenCasts(); if (DepKind != OMPC_DEPEND_sink && DepKind != OMPC_DEPEND_source) { - bool OMPDependTFound = LangOpts.OpenMP >= 50; + bool OMPDependTFound = getLangOpts().OpenMP >= 50; if (OMPDependTFound) - OMPDependTFound = findOMPDependT(*this, StartLoc, DSAStack, + OMPDependTFound = findOMPDependT(SemaRef, StartLoc, DSAStack, DepKind == OMPC_DEPEND_depobj); if (DepKind == OMPC_DEPEND_depobj) { // OpenMP 5.0, 2.17.11 depend Clause, Restrictions, C/C++ @@ -21180,8 +21268,8 @@ Sema::ActOnOpenMPDependClause(const OMPDependClause::DependDataTy &Data, !RefExpr->isInstantiationDependent() && !RefExpr->containsUnexpandedParameterPack() && (OMPDependTFound && - !Context.hasSameUnqualifiedType(DSAStack->getOMPDependT(), - RefExpr->getType()))) { + !getASTContext().hasSameUnqualifiedType( + DSAStack->getOMPDependT(), RefExpr->getType()))) { Diag(ELoc, diag::err_omp_expected_omp_depend_t_lvalue) << 0 << RefExpr->getType() << RefExpr->getSourceRange(); continue; @@ -21212,7 +21300,7 @@ Sema::ActOnOpenMPDependClause(const OMPDependClause::DependDataTy &Data, const Expr *Length = OASE->getLength(); Expr::EvalResult Result; if (Length && !Length->isValueDependent() && - Length->EvaluateAsInt(Result, Context) && + Length->EvaluateAsInt(Result, getASTContext()) && Result.Val.getInt().isZero()) { Diag(ELoc, diag::err_omp_depend_zero_length_array_section_not_allowed) @@ -21232,8 +21320,9 @@ Sema::ActOnOpenMPDependClause(const OMPDependClause::DependDataTy &Data, (OMPDependTFound && DSAStack->getOMPDependT().getTypePtr() == ExprTy.getTypePtr()))) { Diag(ELoc, diag::err_omp_expected_addressable_lvalue_or_array_item) - << (LangOpts.OpenMP >= 50 ? 1 : 0) - << (LangOpts.OpenMP >= 50 ? 1 : 0) << RefExpr->getSourceRange(); + << (getLangOpts().OpenMP >= 50 ? 1 : 0) + << (getLangOpts().OpenMP >= 50 ? 1 : 0) + << RefExpr->getSourceRange(); continue; } @@ -21245,22 +21334,24 @@ Sema::ActOnOpenMPDependClause(const OMPDependClause::DependDataTy &Data, ->isPointerType() && !ASE->getBase()->getType().getNonReferenceType()->isArrayType()) { Diag(ELoc, diag::err_omp_expected_addressable_lvalue_or_array_item) - << (LangOpts.OpenMP >= 50 ? 1 : 0) - << (LangOpts.OpenMP >= 50 ? 1 : 0) << RefExpr->getSourceRange(); + << (getLangOpts().OpenMP >= 50 ? 1 : 0) + << (getLangOpts().OpenMP >= 50 ? 1 : 0) + << RefExpr->getSourceRange(); continue; } ExprResult Res; { - Sema::TentativeAnalysisScope Trap(*this); - Res = CreateBuiltinUnaryOp(ELoc, UO_AddrOf, - RefExpr->IgnoreParenImpCasts()); + Sema::TentativeAnalysisScope Trap(SemaRef); + Res = SemaRef.CreateBuiltinUnaryOp(ELoc, UO_AddrOf, + RefExpr->IgnoreParenImpCasts()); } if (!Res.isUsable() && !isa(SimpleExpr) && !isa(SimpleExpr)) { Diag(ELoc, diag::err_omp_expected_addressable_lvalue_or_array_item) - << (LangOpts.OpenMP >= 50 ? 1 : 0) - << (LangOpts.OpenMP >= 50 ? 1 : 0) << RefExpr->getSourceRange(); + << (getLangOpts().OpenMP >= 50 ? 1 : 0) + << (getLangOpts().OpenMP >= 50 ? 1 : 0) + << RefExpr->getSourceRange(); continue; } } @@ -21275,7 +21366,7 @@ Sema::ActOnOpenMPDependClause(const OMPDependClause::DependDataTy &Data, return nullptr; auto *C = OMPDependClause::Create( - Context, StartLoc, LParenLoc, EndLoc, + getASTContext(), StartLoc, LParenLoc, EndLoc, {DepKind, DepLoc, Data.ColonLoc, Data.OmpAllMemoryLoc}, DepModifier, Vars, TotalDepCount.getZExtValue()); if ((DepKind == OMPC_DEPEND_sink || DepKind == OMPC_DEPEND_source) && @@ -21284,12 +21375,11 @@ Sema::ActOnOpenMPDependClause(const OMPDependClause::DependDataTy &Data, return C; } -OMPClause *Sema::ActOnOpenMPDeviceClause(OpenMPDeviceClauseModifier Modifier, - Expr *Device, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation ModifierLoc, - SourceLocation EndLoc) { - assert((ModifierLoc.isInvalid() || LangOpts.OpenMP >= 50) && +OMPClause *SemaOpenMP::ActOnOpenMPDeviceClause( + OpenMPDeviceClauseModifier Modifier, Expr *Device, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation ModifierLoc, + SourceLocation EndLoc) { + assert((ModifierLoc.isInvalid() || getLangOpts().OpenMP >= 50) && "Unexpected device modifier in OpenMP < 50."); bool ErrorFound = false; @@ -21306,7 +21396,7 @@ OMPClause *Sema::ActOnOpenMPDeviceClause(OpenMPDeviceClauseModifier Modifier, // OpenMP [2.9.1, Restrictions] // The device expression must evaluate to a non-negative integer value. - ErrorFound = !isNonNegativeIntegerValue(ValExpr, *this, OMPC_device, + ErrorFound = !isNonNegativeIntegerValue(ValExpr, SemaRef, OMPC_device, /*StrictlyPositive=*/false) || ErrorFound; if (ErrorFound) @@ -21317,7 +21407,7 @@ OMPClause *Sema::ActOnOpenMPDeviceClause(OpenMPDeviceClauseModifier Modifier, // the reverse_offload clause must be specified. if (Modifier == OMPC_DEVICE_ancestor) { if (!DSAStack->hasRequiresDeclWithClause()) { - targetDiag( + SemaRef.targetDiag( StartLoc, diag::err_omp_device_ancestor_without_requires_reverse_offload); ErrorFound = true; @@ -21326,15 +21416,16 @@ OMPClause *Sema::ActOnOpenMPDeviceClause(OpenMPDeviceClauseModifier Modifier, OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); OpenMPDirectiveKind CaptureRegion = - getOpenMPCaptureRegionForClause(DKind, OMPC_device, LangOpts.OpenMP); - if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { - ValExpr = MakeFullExpr(ValExpr).get(); + getOpenMPCaptureRegionForClause(DKind, OMPC_device, getLangOpts().OpenMP); + if (CaptureRegion != OMPD_unknown && + !SemaRef.CurContext->isDependentContext()) { + ValExpr = SemaRef.MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; - ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); - HelperValStmt = buildPreInits(Context, Captures); + ValExpr = tryBuildCapture(SemaRef, ValExpr, Captures).get(); + HelperValStmt = buildPreInits(getASTContext(), Captures); } - return new (Context) + return new (getASTContext()) OMPDeviceClause(Modifier, ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, ModifierLoc, EndLoc); } @@ -22527,7 +22618,7 @@ static void checkMappableExpressionList( } } -OMPClause *Sema::ActOnOpenMPMapClause( +OMPClause *SemaOpenMP::ActOnOpenMPMapClause( Expr *IteratorModifier, ArrayRef MapTypeModifiers, ArrayRef MapTypeModifiersLoc, CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId, @@ -22562,7 +22653,7 @@ OMPClause *Sema::ActOnOpenMPMapClause( } MappableVarListInfo MVLI(VarList); - checkMappableExpressionList(*this, DSAStack, OMPC_map, MVLI, Locs.StartLoc, + checkMappableExpressionList(SemaRef, DSAStack, OMPC_map, MVLI, Locs.StartLoc, MapperIdScopeSpec, MapperId, UnresolvedMappers, MapType, Modifiers, IsMapTypeImplicit, NoDiagnose); @@ -22570,17 +22661,17 @@ OMPClause *Sema::ActOnOpenMPMapClause( // We need to produce a map clause even if we don't have variables so that // other diagnostics related with non-existing map clauses are accurate. return OMPMapClause::Create( - Context, Locs, MVLI.ProcessedVarList, MVLI.VarBaseDeclarations, + getASTContext(), Locs, MVLI.ProcessedVarList, MVLI.VarBaseDeclarations, MVLI.VarComponents, MVLI.UDMapperList, IteratorModifier, Modifiers, - ModifiersLoc, MapperIdScopeSpec.getWithLocInContext(Context), MapperId, - MapType, IsMapTypeImplicit, MapLoc); + ModifiersLoc, MapperIdScopeSpec.getWithLocInContext(getASTContext()), + MapperId, MapType, IsMapTypeImplicit, MapLoc); } -QualType Sema::ActOnOpenMPDeclareReductionType(SourceLocation TyLoc, - TypeResult ParsedType) { +QualType SemaOpenMP::ActOnOpenMPDeclareReductionType(SourceLocation TyLoc, + TypeResult ParsedType) { assert(ParsedType.isUsable()); - QualType ReductionType = GetTypeFromParser(ParsedType.get()); + QualType ReductionType = SemaRef.GetTypeFromParser(ParsedType.get()); if (ReductionType.isNull()) return QualType(); @@ -22608,15 +22699,17 @@ QualType Sema::ActOnOpenMPDeclareReductionType(SourceLocation TyLoc, return ReductionType; } -Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareReductionDirectiveStart( +SemaOpenMP::DeclGroupPtrTy +SemaOpenMP::ActOnOpenMPDeclareReductionDirectiveStart( Scope *S, DeclContext *DC, DeclarationName Name, ArrayRef> ReductionTypes, AccessSpecifier AS, Decl *PrevDeclInScope) { SmallVector Decls; Decls.reserve(ReductionTypes.size()); - LookupResult Lookup(*this, Name, SourceLocation(), LookupOMPReductionName, - forRedeclarationInCurContext()); + LookupResult Lookup(SemaRef, Name, SourceLocation(), + Sema::LookupOMPReductionName, + SemaRef.forRedeclarationInCurContext()); // [OpenMP 4.0], 2.15 declare reduction Directive, Restrictions // A reduction-identifier may not be re-declared in the current scope for the // same type or for a type that is compatible according to the base language @@ -22627,12 +22720,12 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareReductionDirectiveStart( if (S != nullptr) { // Find previous declaration with the same name not referenced in other // declarations. - FunctionScopeInfo *ParentFn = getEnclosingFunction(); + FunctionScopeInfo *ParentFn = SemaRef.getEnclosingFunction(); InCompoundScope = (ParentFn != nullptr) && !ParentFn->CompoundScopes.empty(); - LookupName(Lookup, S); - FilterLookupForScope(Lookup, DC, S, /*ConsiderLinkage=*/false, - /*AllowInlineNamespace=*/false); + SemaRef.LookupName(Lookup, S); + SemaRef.FilterLookupForScope(Lookup, DC, S, /*ConsiderLinkage=*/false, + /*AllowInlineNamespace=*/false); llvm::DenseMap UsedAsPrevious; LookupResult::Filter Filter = Lookup.makeFilter(); while (Filter.hasNext()) { @@ -22675,8 +22768,8 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareReductionDirectiveStart( Invalid = true; } PreviousRedeclTypes[TyData.first.getCanonicalType()] = TyData.second; - auto *DRD = OMPDeclareReductionDecl::Create(Context, DC, TyData.second, - Name, TyData.first, PrevDRD); + auto *DRD = OMPDeclareReductionDecl::Create( + getASTContext(), DC, TyData.second, Name, TyData.first, PrevDRD); DC->addDecl(DRD); DRD->setAccess(AS); Decls.push_back(DRD); @@ -22687,24 +22780,24 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareReductionDirectiveStart( } return DeclGroupPtrTy::make( - DeclGroupRef::Create(Context, Decls.begin(), Decls.size())); + DeclGroupRef::Create(getASTContext(), Decls.begin(), Decls.size())); } -void Sema::ActOnOpenMPDeclareReductionCombinerStart(Scope *S, Decl *D) { +void SemaOpenMP::ActOnOpenMPDeclareReductionCombinerStart(Scope *S, Decl *D) { auto *DRD = cast(D); // Enter new function scope. - PushFunctionScope(); - setFunctionHasBranchProtectedScope(); - getCurFunction()->setHasOMPDeclareReductionCombiner(); + SemaRef.PushFunctionScope(); + SemaRef.setFunctionHasBranchProtectedScope(); + SemaRef.getCurFunction()->setHasOMPDeclareReductionCombiner(); if (S != nullptr) - PushDeclContext(S, DRD); + SemaRef.PushDeclContext(S, DRD); else - CurContext = DRD; + SemaRef.CurContext = DRD; - PushExpressionEvaluationContext( - ExpressionEvaluationContext::PotentiallyEvaluated); + SemaRef.PushExpressionEvaluationContext( + Sema::ExpressionEvaluationContext::PotentiallyEvaluated); QualType ReductionType = DRD->getType(); // Create 'T* omp_parm;T omp_in;'. All references to 'omp_in' will @@ -22714,7 +22807,7 @@ void Sema::ActOnOpenMPDeclareReductionCombinerStart(Scope *S, Decl *D) { // pointers. // Create 'T omp_in;' variable. VarDecl *OmpInParm = - buildVarDecl(*this, D->getLocation(), ReductionType, "omp_in"); + buildVarDecl(SemaRef, D->getLocation(), ReductionType, "omp_in"); // Create 'T* omp_parm;T omp_out;'. All references to 'omp_out' will // be replaced by '*omp_parm' during codegen. This required because 'omp_out' // uses semantics of argument handles by value, but it should be passed by @@ -22722,28 +22815,29 @@ void Sema::ActOnOpenMPDeclareReductionCombinerStart(Scope *S, Decl *D) { // pointers. // Create 'T omp_out;' variable. VarDecl *OmpOutParm = - buildVarDecl(*this, D->getLocation(), ReductionType, "omp_out"); + buildVarDecl(SemaRef, D->getLocation(), ReductionType, "omp_out"); if (S != nullptr) { - PushOnScopeChains(OmpInParm, S); - PushOnScopeChains(OmpOutParm, S); + SemaRef.PushOnScopeChains(OmpInParm, S); + SemaRef.PushOnScopeChains(OmpOutParm, S); } else { DRD->addDecl(OmpInParm); DRD->addDecl(OmpOutParm); } Expr *InE = - ::buildDeclRefExpr(*this, OmpInParm, ReductionType, D->getLocation()); + ::buildDeclRefExpr(SemaRef, OmpInParm, ReductionType, D->getLocation()); Expr *OutE = - ::buildDeclRefExpr(*this, OmpOutParm, ReductionType, D->getLocation()); + ::buildDeclRefExpr(SemaRef, OmpOutParm, ReductionType, D->getLocation()); DRD->setCombinerData(InE, OutE); } -void Sema::ActOnOpenMPDeclareReductionCombinerEnd(Decl *D, Expr *Combiner) { +void SemaOpenMP::ActOnOpenMPDeclareReductionCombinerEnd(Decl *D, + Expr *Combiner) { auto *DRD = cast(D); - DiscardCleanupsInEvaluationContext(); - PopExpressionEvaluationContext(); + SemaRef.DiscardCleanupsInEvaluationContext(); + SemaRef.PopExpressionEvaluationContext(); - PopDeclContext(); - PopFunctionScopeInfo(); + SemaRef.PopDeclContext(); + SemaRef.PopFunctionScopeInfo(); if (Combiner != nullptr) DRD->setCombiner(Combiner); @@ -22751,20 +22845,21 @@ void Sema::ActOnOpenMPDeclareReductionCombinerEnd(Decl *D, Expr *Combiner) { DRD->setInvalidDecl(); } -VarDecl *Sema::ActOnOpenMPDeclareReductionInitializerStart(Scope *S, Decl *D) { +VarDecl *SemaOpenMP::ActOnOpenMPDeclareReductionInitializerStart(Scope *S, + Decl *D) { auto *DRD = cast(D); // Enter new function scope. - PushFunctionScope(); - setFunctionHasBranchProtectedScope(); + SemaRef.PushFunctionScope(); + SemaRef.setFunctionHasBranchProtectedScope(); if (S != nullptr) - PushDeclContext(S, DRD); + SemaRef.PushDeclContext(S, DRD); else - CurContext = DRD; + SemaRef.CurContext = DRD; - PushExpressionEvaluationContext( - ExpressionEvaluationContext::PotentiallyEvaluated); + SemaRef.PushExpressionEvaluationContext( + Sema::ExpressionEvaluationContext::PotentiallyEvaluated); QualType ReductionType = DRD->getType(); // Create 'T* omp_parm;T omp_priv;'. All references to 'omp_priv' will @@ -22774,7 +22869,7 @@ VarDecl *Sema::ActOnOpenMPDeclareReductionInitializerStart(Scope *S, Decl *D) { // pointers. // Create 'T omp_priv;' variable. VarDecl *OmpPrivParm = - buildVarDecl(*this, D->getLocation(), ReductionType, "omp_priv"); + buildVarDecl(SemaRef, D->getLocation(), ReductionType, "omp_priv"); // Create 'T* omp_parm;T omp_orig;'. All references to 'omp_orig' will // be replaced by '*omp_parm' during codegen. This required because 'omp_orig' // uses semantics of argument handles by value, but it should be passed by @@ -22782,30 +22877,30 @@ VarDecl *Sema::ActOnOpenMPDeclareReductionInitializerStart(Scope *S, Decl *D) { // pointers. // Create 'T omp_orig;' variable. VarDecl *OmpOrigParm = - buildVarDecl(*this, D->getLocation(), ReductionType, "omp_orig"); + buildVarDecl(SemaRef, D->getLocation(), ReductionType, "omp_orig"); if (S != nullptr) { - PushOnScopeChains(OmpPrivParm, S); - PushOnScopeChains(OmpOrigParm, S); + SemaRef.PushOnScopeChains(OmpPrivParm, S); + SemaRef.PushOnScopeChains(OmpOrigParm, S); } else { DRD->addDecl(OmpPrivParm); DRD->addDecl(OmpOrigParm); } Expr *OrigE = - ::buildDeclRefExpr(*this, OmpOrigParm, ReductionType, D->getLocation()); + ::buildDeclRefExpr(SemaRef, OmpOrigParm, ReductionType, D->getLocation()); Expr *PrivE = - ::buildDeclRefExpr(*this, OmpPrivParm, ReductionType, D->getLocation()); + ::buildDeclRefExpr(SemaRef, OmpPrivParm, ReductionType, D->getLocation()); DRD->setInitializerData(OrigE, PrivE); return OmpPrivParm; } -void Sema::ActOnOpenMPDeclareReductionInitializerEnd(Decl *D, Expr *Initializer, - VarDecl *OmpPrivParm) { +void SemaOpenMP::ActOnOpenMPDeclareReductionInitializerEnd( + Decl *D, Expr *Initializer, VarDecl *OmpPrivParm) { auto *DRD = cast(D); - DiscardCleanupsInEvaluationContext(); - PopExpressionEvaluationContext(); + SemaRef.DiscardCleanupsInEvaluationContext(); + SemaRef.PopExpressionEvaluationContext(); - PopDeclContext(); - PopFunctionScopeInfo(); + SemaRef.PopDeclContext(); + SemaRef.PopFunctionScopeInfo(); if (Initializer != nullptr) { DRD->setInitializer(Initializer, OMPDeclareReductionInitKind::Call); @@ -22819,13 +22914,13 @@ void Sema::ActOnOpenMPDeclareReductionInitializerEnd(Decl *D, Expr *Initializer, } } -Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareReductionDirectiveEnd( +SemaOpenMP::DeclGroupPtrTy SemaOpenMP::ActOnOpenMPDeclareReductionDirectiveEnd( Scope *S, DeclGroupPtrTy DeclReductions, bool IsValid) { for (Decl *D : DeclReductions.get()) { if (IsValid) { if (S) - PushOnScopeChains(cast(D), S, - /*AddToContext=*/false); + SemaRef.PushOnScopeChains(cast(D), S, + /*AddToContext=*/false); } else { D->setInvalidDecl(); } @@ -22833,25 +22928,26 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareReductionDirectiveEnd( return DeclReductions; } -TypeResult Sema::ActOnOpenMPDeclareMapperVarDecl(Scope *S, Declarator &D) { - TypeSourceInfo *TInfo = GetTypeForDeclarator(D); +TypeResult SemaOpenMP::ActOnOpenMPDeclareMapperVarDecl(Scope *S, + Declarator &D) { + TypeSourceInfo *TInfo = SemaRef.GetTypeForDeclarator(D); QualType T = TInfo->getType(); if (D.isInvalidType()) return true; if (getLangOpts().CPlusPlus) { // Check that there are no default arguments (C++ only). - CheckExtraCXXDefaultArguments(D); + SemaRef.CheckExtraCXXDefaultArguments(D); } - return CreateParsedType(T, TInfo); + return SemaRef.CreateParsedType(T, TInfo); } -QualType Sema::ActOnOpenMPDeclareMapperType(SourceLocation TyLoc, - TypeResult ParsedType) { +QualType SemaOpenMP::ActOnOpenMPDeclareMapperType(SourceLocation TyLoc, + TypeResult ParsedType) { assert(ParsedType.isUsable() && "Expect usable parsed mapper type"); - QualType MapperType = GetTypeFromParser(ParsedType.get()); + QualType MapperType = SemaRef.GetTypeFromParser(ParsedType.get()); assert(!MapperType.isNull() && "Expect valid mapper type"); // [OpenMP 5.0], 2.19.7.3 declare mapper Directive, Restrictions @@ -22863,12 +22959,13 @@ QualType Sema::ActOnOpenMPDeclareMapperType(SourceLocation TyLoc, return MapperType; } -Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareMapperDirective( +SemaOpenMP::DeclGroupPtrTy SemaOpenMP::ActOnOpenMPDeclareMapperDirective( Scope *S, DeclContext *DC, DeclarationName Name, QualType MapperType, SourceLocation StartLoc, DeclarationName VN, AccessSpecifier AS, Expr *MapperVarRef, ArrayRef Clauses, Decl *PrevDeclInScope) { - LookupResult Lookup(*this, Name, SourceLocation(), LookupOMPMapperName, - forRedeclarationInCurContext()); + LookupResult Lookup(SemaRef, Name, SourceLocation(), + Sema::LookupOMPMapperName, + SemaRef.forRedeclarationInCurContext()); // [OpenMP 5.0], 2.19.7.3 declare mapper Directive, Restrictions // A mapper-identifier may not be redeclared in the current scope for the // same type or for a type that is compatible according to the base language @@ -22879,12 +22976,12 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareMapperDirective( if (S != nullptr) { // Find previous declaration with the same name not referenced in other // declarations. - FunctionScopeInfo *ParentFn = getEnclosingFunction(); + FunctionScopeInfo *ParentFn = SemaRef.getEnclosingFunction(); InCompoundScope = (ParentFn != nullptr) && !ParentFn->CompoundScopes.empty(); - LookupName(Lookup, S); - FilterLookupForScope(Lookup, DC, S, /*ConsiderLinkage=*/false, - /*AllowInlineNamespace=*/false); + SemaRef.LookupName(Lookup, S); + SemaRef.FilterLookupForScope(Lookup, DC, S, /*ConsiderLinkage=*/false, + /*AllowInlineNamespace=*/false); llvm::DenseMap UsedAsPrevious; LookupResult::Filter Filter = Lookup.makeFilter(); while (Filter.hasNext()) { @@ -22929,13 +23026,14 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareMapperDirective( // mappers. SmallVector ClausesWithImplicit(Clauses.begin(), Clauses.end()); - if (LangOpts.OpenMP >= 50) - processImplicitMapsWithDefaultMappers(*this, DSAStack, ClausesWithImplicit); - auto *DMD = - OMPDeclareMapperDecl::Create(Context, DC, StartLoc, Name, MapperType, VN, - ClausesWithImplicit, PrevDMD); + if (getLangOpts().OpenMP >= 50) + processImplicitMapsWithDefaultMappers(SemaRef, DSAStack, + ClausesWithImplicit); + auto *DMD = OMPDeclareMapperDecl::Create(getASTContext(), DC, StartLoc, Name, + MapperType, VN, ClausesWithImplicit, + PrevDMD); if (S) - PushOnScopeChains(DMD, S); + SemaRef.PushOnScopeChains(DMD, S); else DC->addDecl(DMD); DMD->setAccess(AS); @@ -22951,105 +23049,106 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareMapperDirective( return DeclGroupPtrTy::make(DeclGroupRef(DMD)); } -ExprResult -Sema::ActOnOpenMPDeclareMapperDirectiveVarDecl(Scope *S, QualType MapperType, - SourceLocation StartLoc, - DeclarationName VN) { +ExprResult SemaOpenMP::ActOnOpenMPDeclareMapperDirectiveVarDecl( + Scope *S, QualType MapperType, SourceLocation StartLoc, + DeclarationName VN) { TypeSourceInfo *TInfo = - Context.getTrivialTypeSourceInfo(MapperType, StartLoc); - auto *VD = VarDecl::Create(Context, Context.getTranslationUnitDecl(), - StartLoc, StartLoc, VN.getAsIdentifierInfo(), - MapperType, TInfo, SC_None); + getASTContext().getTrivialTypeSourceInfo(MapperType, StartLoc); + auto *VD = VarDecl::Create( + getASTContext(), getASTContext().getTranslationUnitDecl(), StartLoc, + StartLoc, VN.getAsIdentifierInfo(), MapperType, TInfo, SC_None); if (S) - PushOnScopeChains(VD, S, /*AddToContext=*/false); - Expr *E = buildDeclRefExpr(*this, VD, MapperType, StartLoc); + SemaRef.PushOnScopeChains(VD, S, /*AddToContext=*/false); + Expr *E = buildDeclRefExpr(SemaRef, VD, MapperType, StartLoc); DSAStack->addDeclareMapperVarRef(E); return E; } -void Sema::ActOnOpenMPIteratorVarDecl(VarDecl *VD) { +void SemaOpenMP::ActOnOpenMPIteratorVarDecl(VarDecl *VD) { if (DSAStack->getDeclareMapperVarRef()) DSAStack->addIteratorVarDecl(VD); } -bool Sema::isOpenMPDeclareMapperVarDeclAllowed(const VarDecl *VD) const { - assert(LangOpts.OpenMP && "Expected OpenMP mode."); +bool SemaOpenMP::isOpenMPDeclareMapperVarDeclAllowed(const VarDecl *VD) const { + assert(getLangOpts().OpenMP && "Expected OpenMP mode."); const Expr *Ref = DSAStack->getDeclareMapperVarRef(); if (const auto *DRE = cast_or_null(Ref)) { if (VD->getCanonicalDecl() == DRE->getDecl()->getCanonicalDecl()) return true; - if (VD->isUsableInConstantExpressions(Context)) + if (VD->isUsableInConstantExpressions(getASTContext())) return true; - if (LangOpts.OpenMP >= 52 && DSAStack->isIteratorVarDecl(VD)) + if (getLangOpts().OpenMP >= 52 && DSAStack->isIteratorVarDecl(VD)) return true; return false; } return true; } -const ValueDecl *Sema::getOpenMPDeclareMapperVarName() const { - assert(LangOpts.OpenMP && "Expected OpenMP mode."); +const ValueDecl *SemaOpenMP::getOpenMPDeclareMapperVarName() const { + assert(getLangOpts().OpenMP && "Expected OpenMP mode."); return cast(DSAStack->getDeclareMapperVarRef())->getDecl(); } -OMPClause *Sema::ActOnOpenMPNumTeamsClause(Expr *NumTeams, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPNumTeamsClause(Expr *NumTeams, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { Expr *ValExpr = NumTeams; Stmt *HelperValStmt = nullptr; // OpenMP [teams Constrcut, Restrictions] // The num_teams expression must evaluate to a positive integer value. - if (!isNonNegativeIntegerValue(ValExpr, *this, OMPC_num_teams, + if (!isNonNegativeIntegerValue(ValExpr, SemaRef, OMPC_num_teams, /*StrictlyPositive=*/true)) return nullptr; OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); - OpenMPDirectiveKind CaptureRegion = - getOpenMPCaptureRegionForClause(DKind, OMPC_num_teams, LangOpts.OpenMP); - if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { - ValExpr = MakeFullExpr(ValExpr).get(); + OpenMPDirectiveKind CaptureRegion = getOpenMPCaptureRegionForClause( + DKind, OMPC_num_teams, getLangOpts().OpenMP); + if (CaptureRegion != OMPD_unknown && + !SemaRef.CurContext->isDependentContext()) { + ValExpr = SemaRef.MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; - ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); - HelperValStmt = buildPreInits(Context, Captures); + ValExpr = tryBuildCapture(SemaRef, ValExpr, Captures).get(); + HelperValStmt = buildPreInits(getASTContext(), Captures); } - return new (Context) OMPNumTeamsClause(ValExpr, HelperValStmt, CaptureRegion, - StartLoc, LParenLoc, EndLoc); + return new (getASTContext()) OMPNumTeamsClause( + ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPThreadLimitClause(Expr *ThreadLimit, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPThreadLimitClause(Expr *ThreadLimit, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { Expr *ValExpr = ThreadLimit; Stmt *HelperValStmt = nullptr; // OpenMP [teams Constrcut, Restrictions] // The thread_limit expression must evaluate to a positive integer value. - if (!isNonNegativeIntegerValue(ValExpr, *this, OMPC_thread_limit, + if (!isNonNegativeIntegerValue(ValExpr, SemaRef, OMPC_thread_limit, /*StrictlyPositive=*/true)) return nullptr; OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); OpenMPDirectiveKind CaptureRegion = getOpenMPCaptureRegionForClause( - DKind, OMPC_thread_limit, LangOpts.OpenMP); - if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { - ValExpr = MakeFullExpr(ValExpr).get(); + DKind, OMPC_thread_limit, getLangOpts().OpenMP); + if (CaptureRegion != OMPD_unknown && + !SemaRef.CurContext->isDependentContext()) { + ValExpr = SemaRef.MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; - ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); - HelperValStmt = buildPreInits(Context, Captures); + ValExpr = tryBuildCapture(SemaRef, ValExpr, Captures).get(); + HelperValStmt = buildPreInits(getASTContext(), Captures); } - return new (Context) OMPThreadLimitClause( + return new (getASTContext()) OMPThreadLimitClause( ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPPriorityClause(Expr *Priority, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPPriorityClause(Expr *Priority, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { Expr *ValExpr = Priority; Stmt *HelperValStmt = nullptr; OpenMPDirectiveKind CaptureRegion = OMPD_unknown; @@ -23057,20 +23156,20 @@ OMPClause *Sema::ActOnOpenMPPriorityClause(Expr *Priority, // OpenMP [2.9.1, task Constrcut] // The priority-value is a non-negative numerical scalar expression. if (!isNonNegativeIntegerValue( - ValExpr, *this, OMPC_priority, + ValExpr, SemaRef, OMPC_priority, /*StrictlyPositive=*/false, /*BuildCapture=*/true, DSAStack->getCurrentDirective(), &CaptureRegion, &HelperValStmt)) return nullptr; - return new (Context) OMPPriorityClause(ValExpr, HelperValStmt, CaptureRegion, - StartLoc, LParenLoc, EndLoc); + return new (getASTContext()) OMPPriorityClause( + ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPGrainsizeClause( +OMPClause *SemaOpenMP::ActOnOpenMPGrainsizeClause( OpenMPGrainsizeClauseModifier Modifier, Expr *Grainsize, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ModifierLoc, SourceLocation EndLoc) { - assert((ModifierLoc.isInvalid() || LangOpts.OpenMP >= 51) && + assert((ModifierLoc.isInvalid() || getLangOpts().OpenMP >= 51) && "Unexpected grainsize modifier in OpenMP < 51."); if (ModifierLoc.isValid() && Modifier == OMPC_GRAINSIZE_unknown) { @@ -23088,23 +23187,23 @@ OMPClause *Sema::ActOnOpenMPGrainsizeClause( // OpenMP [2.9.2, taskloop Constrcut] // The parameter of the grainsize clause must be a positive integer // expression. - if (!isNonNegativeIntegerValue(ValExpr, *this, OMPC_grainsize, + if (!isNonNegativeIntegerValue(ValExpr, SemaRef, OMPC_grainsize, /*StrictlyPositive=*/true, /*BuildCapture=*/true, DSAStack->getCurrentDirective(), &CaptureRegion, &HelperValStmt)) return nullptr; - return new (Context) + return new (getASTContext()) OMPGrainsizeClause(Modifier, ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, ModifierLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPNumTasksClause( +OMPClause *SemaOpenMP::ActOnOpenMPNumTasksClause( OpenMPNumTasksClauseModifier Modifier, Expr *NumTasks, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ModifierLoc, SourceLocation EndLoc) { - assert((ModifierLoc.isInvalid() || LangOpts.OpenMP >= 51) && + assert((ModifierLoc.isInvalid() || getLangOpts().OpenMP >= 51) && "Unexpected num_tasks modifier in OpenMP < 51."); if (ModifierLoc.isValid() && Modifier == OMPC_NUMTASKS_unknown) { @@ -23123,19 +23222,20 @@ OMPClause *Sema::ActOnOpenMPNumTasksClause( // The parameter of the num_tasks clause must be a positive integer // expression. if (!isNonNegativeIntegerValue( - ValExpr, *this, OMPC_num_tasks, + ValExpr, SemaRef, OMPC_num_tasks, /*StrictlyPositive=*/true, /*BuildCapture=*/true, DSAStack->getCurrentDirective(), &CaptureRegion, &HelperValStmt)) return nullptr; - return new (Context) + return new (getASTContext()) OMPNumTasksClause(Modifier, ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, ModifierLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPHintClause(Expr *Hint, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPHintClause(Expr *Hint, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { // OpenMP [2.13.2, critical construct, Description] // ... where hint-expression is an integer constant expression that evaluates // to a valid lock hint. @@ -23143,7 +23243,7 @@ OMPClause *Sema::ActOnOpenMPHintClause(Expr *Hint, SourceLocation StartLoc, VerifyPositiveIntegerConstantInClause(Hint, OMPC_hint, false); if (HintExpr.isInvalid()) return nullptr; - return new (Context) + return new (getASTContext()) OMPHintClause(HintExpr.get(), StartLoc, LParenLoc, EndLoc); } @@ -23163,13 +23263,14 @@ static bool findOMPEventHandleT(Sema &S, SourceLocation Loc, return true; } -OMPClause *Sema::ActOnOpenMPDetachClause(Expr *Evt, SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPDetachClause(Expr *Evt, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { if (!Evt->isValueDependent() && !Evt->isTypeDependent() && !Evt->isInstantiationDependent() && !Evt->containsUnexpandedParameterPack()) { - if (!findOMPEventHandleT(*this, Evt->getExprLoc(), DSAStack)) + if (!findOMPEventHandleT(SemaRef, Evt->getExprLoc(), DSAStack)) return nullptr; // OpenMP 5.0, 2.10.1 task Construct. // event-handle is a variable of the omp_event_handle_t type. @@ -23185,9 +23286,9 @@ OMPClause *Sema::ActOnOpenMPDetachClause(Expr *Evt, SourceLocation StartLoc, << "omp_event_handle_t" << 0 << Evt->getSourceRange(); return nullptr; } - if (!Context.hasSameUnqualifiedType(DSAStack->getOMPEventHandleT(), - VD->getType()) || - VD->getType().isConstant(Context)) { + if (!getASTContext().hasSameUnqualifiedType(DSAStack->getOMPEventHandleT(), + VD->getType()) || + VD->getType().isConstant(getASTContext())) { Diag(Evt->getExprLoc(), diag::err_omp_var_expected) << "omp_event_handle_t" << 1 << VD->getType() << Evt->getSourceRange(); @@ -23202,15 +23303,16 @@ OMPClause *Sema::ActOnOpenMPDetachClause(Expr *Evt, SourceLocation StartLoc, Diag(Evt->getExprLoc(), diag::err_omp_wrong_dsa) << getOpenMPClauseName(DVar.CKind) << getOpenMPClauseName(OMPC_firstprivate); - reportOriginalDsa(*this, DSAStack, VD, DVar); + reportOriginalDsa(SemaRef, DSAStack, VD, DVar); return nullptr; } } - return new (Context) OMPDetachClause(Evt, StartLoc, LParenLoc, EndLoc); + return new (getASTContext()) + OMPDetachClause(Evt, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPDistScheduleClause( +OMPClause *SemaOpenMP::ActOnOpenMPDistScheduleClause( OpenMPDistScheduleClauseKind Kind, Expr *ChunkSize, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation KindLoc, SourceLocation CommaLoc, SourceLocation EndLoc) { @@ -23241,7 +23343,7 @@ OMPClause *Sema::ActOnOpenMPDistScheduleClause( // chunk_size must be a loop invariant integer expression with a positive // value. if (std::optional Result = - ValExpr->getIntegerConstantExpr(Context)) { + ValExpr->getIntegerConstantExpr(getASTContext())) { if (Result->isSigned() && !Result->isStrictlyPositive()) { Diag(ChunkSizeLoc, diag::err_omp_negative_expression_in_clause) << "dist_schedule" << ChunkSize->getSourceRange(); @@ -23249,22 +23351,22 @@ OMPClause *Sema::ActOnOpenMPDistScheduleClause( } } else if (getOpenMPCaptureRegionForClause( DSAStack->getCurrentDirective(), OMPC_dist_schedule, - LangOpts.OpenMP) != OMPD_unknown && - !CurContext->isDependentContext()) { - ValExpr = MakeFullExpr(ValExpr).get(); + getLangOpts().OpenMP) != OMPD_unknown && + !SemaRef.CurContext->isDependentContext()) { + ValExpr = SemaRef.MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; - ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); - HelperValStmt = buildPreInits(Context, Captures); + ValExpr = tryBuildCapture(SemaRef, ValExpr, Captures).get(); + HelperValStmt = buildPreInits(getASTContext(), Captures); } } } - return new (Context) + return new (getASTContext()) OMPDistScheduleClause(StartLoc, LParenLoc, KindLoc, CommaLoc, EndLoc, Kind, ValExpr, HelperValStmt); } -OMPClause *Sema::ActOnOpenMPDefaultmapClause( +OMPClause *SemaOpenMP::ActOnOpenMPDefaultmapClause( OpenMPDefaultmapClauseModifier M, OpenMPDefaultmapClauseKind Kind, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation MLoc, SourceLocation KindLoc, SourceLocation EndLoc) { @@ -23291,10 +23393,10 @@ OMPClause *Sema::ActOnOpenMPDefaultmapClause( } else { bool isDefaultmapModifier = (M != OMPC_DEFAULTMAP_MODIFIER_unknown); bool isDefaultmapKind = (Kind != OMPC_DEFAULTMAP_unknown) || - (LangOpts.OpenMP >= 50 && KindLoc.isInvalid()); + (getLangOpts().OpenMP >= 50 && KindLoc.isInvalid()); if (!isDefaultmapKind || !isDefaultmapModifier) { StringRef KindValue = "'scalar', 'aggregate', 'pointer'"; - if (LangOpts.OpenMP == 50) { + if (getLangOpts().OpenMP == 50) { StringRef ModifierValue = "'alloc', 'from', 'to', 'tofrom', " "'firstprivate', 'none', 'default'"; if (!isDefaultmapKind && isDefaultmapModifier) { @@ -23346,13 +23448,13 @@ OMPClause *Sema::ActOnOpenMPDefaultmapClause( DSAStack->setDefaultDMAAttr(M, Kind, StartLoc); } - return new (Context) + return new (getASTContext()) OMPDefaultmapClause(StartLoc, LParenLoc, MLoc, KindLoc, EndLoc, Kind, M); } -bool Sema::ActOnStartOpenMPDeclareTargetContext( +bool SemaOpenMP::ActOnStartOpenMPDeclareTargetContext( DeclareTargetContextInfo &DTCI) { - DeclContext *CurLexicalContext = getCurLexicalContext(); + DeclContext *CurLexicalContext = SemaRef.getCurLexicalContext(); if (!CurLexicalContext->isFileContext() && !CurLexicalContext->isExternCContext() && !CurLexicalContext->isExternCXXContext() && @@ -23372,20 +23474,20 @@ bool Sema::ActOnStartOpenMPDeclareTargetContext( return true; } -const Sema::DeclareTargetContextInfo -Sema::ActOnOpenMPEndDeclareTargetDirective() { +const SemaOpenMP::DeclareTargetContextInfo +SemaOpenMP::ActOnOpenMPEndDeclareTargetDirective() { assert(!DeclareTargetNesting.empty() && "check isInOpenMPDeclareTargetContext() first!"); return DeclareTargetNesting.pop_back_val(); } -void Sema::ActOnFinishedOpenMPDeclareTargetContext( +void SemaOpenMP::ActOnFinishedOpenMPDeclareTargetContext( DeclareTargetContextInfo &DTCI) { for (auto &It : DTCI.ExplicitlyMapped) ActOnOpenMPDeclareTargetName(It.first, It.second.Loc, It.second.MT, DTCI); } -void Sema::DiagnoseUnterminatedOpenMPDeclareTarget() { +void SemaOpenMP::DiagnoseUnterminatedOpenMPDeclareTarget() { if (DeclareTargetNesting.empty()) return; DeclareTargetContextInfo &DTCI = DeclareTargetNesting.back(); @@ -23393,23 +23495,23 @@ void Sema::DiagnoseUnterminatedOpenMPDeclareTarget() { << getOpenMPDirectiveName(DTCI.Kind); } -NamedDecl *Sema::lookupOpenMPDeclareTargetName(Scope *CurScope, - CXXScopeSpec &ScopeSpec, - const DeclarationNameInfo &Id) { - LookupResult Lookup(*this, Id, LookupOrdinaryName); - LookupParsedName(Lookup, CurScope, &ScopeSpec, true); +NamedDecl *SemaOpenMP::lookupOpenMPDeclareTargetName( + Scope *CurScope, CXXScopeSpec &ScopeSpec, const DeclarationNameInfo &Id) { + LookupResult Lookup(SemaRef, Id, Sema::LookupOrdinaryName); + SemaRef.LookupParsedName(Lookup, CurScope, &ScopeSpec, true); if (Lookup.isAmbiguous()) return nullptr; Lookup.suppressDiagnostics(); if (!Lookup.isSingleResult()) { - VarOrFuncDeclFilterCCC CCC(*this); + VarOrFuncDeclFilterCCC CCC(SemaRef); if (TypoCorrection Corrected = - CorrectTypo(Id, LookupOrdinaryName, CurScope, nullptr, CCC, - CTK_ErrorRecovery)) { - diagnoseTypo(Corrected, PDiag(diag::err_undeclared_var_use_suggest) - << Id.getName()); + SemaRef.CorrectTypo(Id, Sema::LookupOrdinaryName, CurScope, nullptr, + CCC, Sema::CTK_ErrorRecovery)) { + SemaRef.diagnoseTypo(Corrected, + SemaRef.PDiag(diag::err_undeclared_var_use_suggest) + << Id.getName()); checkDeclIsAllowedInOpenMPTarget(nullptr, Corrected.getCorrectionDecl()); return nullptr; } @@ -23427,9 +23529,9 @@ NamedDecl *Sema::lookupOpenMPDeclareTargetName(Scope *CurScope, return ND; } -void Sema::ActOnOpenMPDeclareTargetName(NamedDecl *ND, SourceLocation Loc, - OMPDeclareTargetDeclAttr::MapTypeTy MT, - DeclareTargetContextInfo &DTCI) { +void SemaOpenMP::ActOnOpenMPDeclareTargetName( + NamedDecl *ND, SourceLocation Loc, OMPDeclareTargetDeclAttr::MapTypeTy MT, + DeclareTargetContextInfo &DTCI) { assert((isa(ND) || isa(ND) || isa(ND)) && "Expected variable, function or function template."); @@ -23445,7 +23547,7 @@ void Sema::ActOnOpenMPDeclareTargetName(NamedDecl *ND, SourceLocation Loc, } // Diagnose marking after use as it may lead to incorrect diagnosis and // codegen. - if (LangOpts.OpenMP >= 50 && + if (getLangOpts().OpenMP >= 50 && (ND->isUsed(/*CheckUsedAttr=*/false) || ND->isReferenced())) Diag(Loc, diag::warn_omp_declare_target_after_first_use); @@ -23484,14 +23586,14 @@ void Sema::ActOnOpenMPDeclareTargetName(NamedDecl *ND, SourceLocation Loc, IsIndirect = true; } auto *A = OMPDeclareTargetDeclAttr::CreateImplicit( - Context, MT, DTCI.DT, IndirectE, IsIndirect, Level, + getASTContext(), MT, DTCI.DT, IndirectE, IsIndirect, Level, SourceRange(Loc, Loc)); ND->addAttr(A); - if (ASTMutationListener *ML = Context.getASTMutationListener()) + if (ASTMutationListener *ML = getASTContext().getASTMutationListener()) ML->DeclarationMarkedOpenMPDeclareTarget(ND, A); checkDeclIsAllowedInOpenMPTarget(nullptr, ND, Loc); if (auto *VD = dyn_cast(ND); - LangOpts.OpenMP && VD && VD->hasAttr() && + getLangOpts().OpenMP && VD && VD->hasAttr() && VD->hasGlobalStorage()) ActOnOpenMPDeclareTargetInitializer(ND); } @@ -23535,8 +23637,8 @@ static bool checkValueDeclInTarget(SourceLocation SL, SourceRange SR, /*FullCheck=*/false); } -void Sema::checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D, - SourceLocation IdLoc) { +void SemaOpenMP::checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D, + SourceLocation IdLoc) { if (!D || D->isInvalidDecl()) return; SourceRange SR = E ? E->getSourceRange() : D->getSourceRange(); @@ -23550,7 +23652,7 @@ void Sema::checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D, // directive. if (DSAStack->isThreadPrivate(VD)) { Diag(SL, diag::err_omp_threadprivate_in_target); - reportOriginalDsa(*this, DSAStack, VD, DSAStack->getTopDSA(VD, false)); + reportOriginalDsa(SemaRef, DSAStack, VD, DSAStack->getTopDSA(VD, false)); return; } } @@ -23569,7 +23671,7 @@ void Sema::checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D, // Problem if any with var declared with incomplete type will be reported // as normal, so no need to check it here. if ((E || !VD->getType()->isIncompleteType()) && - !checkValueDeclInTarget(SL, SR, *this, DSAStack, VD)) + !checkValueDeclInTarget(SL, SR, SemaRef, DSAStack, VD)) return; if (!E && isInOpenMPDeclareTargetContext()) { // Checking declaration inside declare target region. @@ -23589,13 +23691,13 @@ void Sema::checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D, IsIndirect = true; } auto *A = OMPDeclareTargetDeclAttr::CreateImplicit( - Context, + getASTContext(), getLangOpts().OpenMP >= 52 ? OMPDeclareTargetDeclAttr::MT_Enter : OMPDeclareTargetDeclAttr::MT_To, DTCI.DT, IndirectE, IsIndirect, Level, SourceRange(DTCI.Loc, DTCI.Loc)); D->addAttr(A); - if (ASTMutationListener *ML = Context.getASTMutationListener()) + if (ASTMutationListener *ML = getASTContext().getASTMutationListener()) ML->DeclarationMarkedOpenMPDeclareTarget(D, A); } return; @@ -23603,7 +23705,7 @@ void Sema::checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D, } if (!E) return; - checkDeclInTargetContext(E->getExprLoc(), E->getSourceRange(), *this, D); + checkDeclInTargetContext(E->getExprLoc(), E->getSourceRange(), SemaRef, D); } /// This class visits every VarDecl that the initializer references and adds @@ -23649,13 +23751,13 @@ class GlobalDeclRefChecker final /// Adding OMPDeclareTargetDeclAttr to variables with static storage /// duration that are referenced in the initializer expression list of /// variables with static storage duration in declare target directive. -void Sema::ActOnOpenMPDeclareTargetInitializer(Decl *TargetDecl) { +void SemaOpenMP::ActOnOpenMPDeclareTargetInitializer(Decl *TargetDecl) { GlobalDeclRefChecker Checker; if (isa(TargetDecl)) Checker.declareTargetInitializer(TargetDecl); } -OMPClause *Sema::ActOnOpenMPToClause( +OMPClause *SemaOpenMP::ActOnOpenMPToClause( ArrayRef MotionModifiers, ArrayRef MotionModifiersLoc, CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId, @@ -23681,18 +23783,18 @@ OMPClause *Sema::ActOnOpenMPToClause( } MappableVarListInfo MVLI(VarList); - checkMappableExpressionList(*this, DSAStack, OMPC_to, MVLI, Locs.StartLoc, + checkMappableExpressionList(SemaRef, DSAStack, OMPC_to, MVLI, Locs.StartLoc, MapperIdScopeSpec, MapperId, UnresolvedMappers); if (MVLI.ProcessedVarList.empty()) return nullptr; return OMPToClause::Create( - Context, Locs, MVLI.ProcessedVarList, MVLI.VarBaseDeclarations, + getASTContext(), Locs, MVLI.ProcessedVarList, MVLI.VarBaseDeclarations, MVLI.VarComponents, MVLI.UDMapperList, Modifiers, ModifiersLoc, - MapperIdScopeSpec.getWithLocInContext(Context), MapperId); + MapperIdScopeSpec.getWithLocInContext(getASTContext()), MapperId); } -OMPClause *Sema::ActOnOpenMPFromClause( +OMPClause *SemaOpenMP::ActOnOpenMPFromClause( ArrayRef MotionModifiers, ArrayRef MotionModifiersLoc, CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId, @@ -23718,19 +23820,20 @@ OMPClause *Sema::ActOnOpenMPFromClause( } MappableVarListInfo MVLI(VarList); - checkMappableExpressionList(*this, DSAStack, OMPC_from, MVLI, Locs.StartLoc, + checkMappableExpressionList(SemaRef, DSAStack, OMPC_from, MVLI, Locs.StartLoc, MapperIdScopeSpec, MapperId, UnresolvedMappers); if (MVLI.ProcessedVarList.empty()) return nullptr; return OMPFromClause::Create( - Context, Locs, MVLI.ProcessedVarList, MVLI.VarBaseDeclarations, + getASTContext(), Locs, MVLI.ProcessedVarList, MVLI.VarBaseDeclarations, MVLI.VarComponents, MVLI.UDMapperList, Modifiers, ModifiersLoc, - MapperIdScopeSpec.getWithLocInContext(Context), MapperId); + MapperIdScopeSpec.getWithLocInContext(getASTContext()), MapperId); } -OMPClause *Sema::ActOnOpenMPUseDevicePtrClause(ArrayRef VarList, - const OMPVarListLocTy &Locs) { +OMPClause * +SemaOpenMP::ActOnOpenMPUseDevicePtrClause(ArrayRef VarList, + const OMPVarListLocTy &Locs) { MappableVarListInfo MVLI(VarList); SmallVector PrivateCopies; SmallVector Inits; @@ -23740,7 +23843,7 @@ OMPClause *Sema::ActOnOpenMPUseDevicePtrClause(ArrayRef VarList, SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange); + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange); if (Res.second) { // It will be analyzed later. MVLI.ProcessedVarList.push_back(RefExpr); @@ -23765,30 +23868,30 @@ OMPClause *Sema::ActOnOpenMPUseDevicePtrClause(ArrayRef VarList, // Build the private variable and the expression that refers to it. auto VDPrivate = - buildVarDecl(*this, ELoc, Type, D->getName(), + buildVarDecl(SemaRef, ELoc, Type, D->getName(), D->hasAttrs() ? &D->getAttrs() : nullptr, VD ? cast(SimpleRefExpr) : nullptr); if (VDPrivate->isInvalidDecl()) continue; - CurContext->addDecl(VDPrivate); + SemaRef.CurContext->addDecl(VDPrivate); DeclRefExpr *VDPrivateRefExpr = buildDeclRefExpr( - *this, VDPrivate, RefExpr->getType().getUnqualifiedType(), ELoc); + SemaRef, VDPrivate, RefExpr->getType().getUnqualifiedType(), ELoc); // Add temporary variable to initialize the private copy of the pointer. VarDecl *VDInit = - buildVarDecl(*this, RefExpr->getExprLoc(), Type, ".devptr.temp"); + buildVarDecl(SemaRef, RefExpr->getExprLoc(), Type, ".devptr.temp"); DeclRefExpr *VDInitRefExpr = buildDeclRefExpr( - *this, VDInit, RefExpr->getType(), RefExpr->getExprLoc()); - AddInitializerToDecl(VDPrivate, - DefaultLvalueConversion(VDInitRefExpr).get(), - /*DirectInit=*/false); + SemaRef, VDInit, RefExpr->getType(), RefExpr->getExprLoc()); + SemaRef.AddInitializerToDecl( + VDPrivate, SemaRef.DefaultLvalueConversion(VDInitRefExpr).get(), + /*DirectInit=*/false); // If required, build a capture to implement the privatization initialized // with the current list item value. DeclRefExpr *Ref = nullptr; if (!VD) - Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/true); + Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/true); MVLI.ProcessedVarList.push_back(VD ? RefExpr->IgnoreParens() : Ref); PrivateCopies.push_back(VDPrivateRefExpr); Inits.push_back(VDInitRefExpr); @@ -23810,12 +23913,13 @@ OMPClause *Sema::ActOnOpenMPUseDevicePtrClause(ArrayRef VarList, return nullptr; return OMPUseDevicePtrClause::Create( - Context, Locs, MVLI.ProcessedVarList, PrivateCopies, Inits, + getASTContext(), Locs, MVLI.ProcessedVarList, PrivateCopies, Inits, MVLI.VarBaseDeclarations, MVLI.VarComponents); } -OMPClause *Sema::ActOnOpenMPUseDeviceAddrClause(ArrayRef VarList, - const OMPVarListLocTy &Locs) { +OMPClause * +SemaOpenMP::ActOnOpenMPUseDeviceAddrClause(ArrayRef VarList, + const OMPVarListLocTy &Locs) { MappableVarListInfo MVLI(VarList); for (Expr *RefExpr : VarList) { @@ -23823,7 +23927,7 @@ OMPClause *Sema::ActOnOpenMPUseDeviceAddrClause(ArrayRef VarList, SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange, + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange, /*AllowArraySection=*/true); if (Res.second) { // It will be analyzed later. @@ -23838,7 +23942,7 @@ OMPClause *Sema::ActOnOpenMPUseDeviceAddrClause(ArrayRef VarList, // with the current list item value. DeclRefExpr *Ref = nullptr; if (!VD) - Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/true); + Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/true); MVLI.ProcessedVarList.push_back(VD ? RefExpr->IgnoreParens() : Ref); // We need to add a data sharing attribute for this variable to make sure it @@ -23853,7 +23957,8 @@ OMPClause *Sema::ActOnOpenMPUseDeviceAddrClause(ArrayRef VarList, Expr *Component = SimpleRefExpr; if (VD && (isa(RefExpr->IgnoreParenImpCasts()) || isa(RefExpr->IgnoreParenImpCasts()))) - Component = DefaultFunctionArrayLvalueConversion(SimpleRefExpr).get(); + Component = + SemaRef.DefaultFunctionArrayLvalueConversion(SimpleRefExpr).get(); MVLI.VarComponents.back().emplace_back(Component, D, /*IsNonContiguous=*/false); } @@ -23861,20 +23966,21 @@ OMPClause *Sema::ActOnOpenMPUseDeviceAddrClause(ArrayRef VarList, if (MVLI.ProcessedVarList.empty()) return nullptr; - return OMPUseDeviceAddrClause::Create(Context, Locs, MVLI.ProcessedVarList, - MVLI.VarBaseDeclarations, - MVLI.VarComponents); + return OMPUseDeviceAddrClause::Create( + getASTContext(), Locs, MVLI.ProcessedVarList, MVLI.VarBaseDeclarations, + MVLI.VarComponents); } -OMPClause *Sema::ActOnOpenMPIsDevicePtrClause(ArrayRef VarList, - const OMPVarListLocTy &Locs) { +OMPClause * +SemaOpenMP::ActOnOpenMPIsDevicePtrClause(ArrayRef VarList, + const OMPVarListLocTy &Locs) { MappableVarListInfo MVLI(VarList); for (Expr *RefExpr : VarList) { assert(RefExpr && "NULL expr in OpenMP is_device_ptr clause."); SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange); + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange); if (Res.second) { // It will be analyzed later. MVLI.ProcessedVarList.push_back(RefExpr); @@ -23900,7 +24006,7 @@ OMPClause *Sema::ActOnOpenMPIsDevicePtrClause(ArrayRef VarList, << getOpenMPClauseName(DVar.CKind) << getOpenMPClauseName(OMPC_is_device_ptr) << getOpenMPDirectiveName(DSAStack->getCurrentDirective()); - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } @@ -23944,20 +24050,21 @@ OMPClause *Sema::ActOnOpenMPIsDevicePtrClause(ArrayRef VarList, if (MVLI.ProcessedVarList.empty()) return nullptr; - return OMPIsDevicePtrClause::Create(Context, Locs, MVLI.ProcessedVarList, - MVLI.VarBaseDeclarations, - MVLI.VarComponents); + return OMPIsDevicePtrClause::Create( + getASTContext(), Locs, MVLI.ProcessedVarList, MVLI.VarBaseDeclarations, + MVLI.VarComponents); } -OMPClause *Sema::ActOnOpenMPHasDeviceAddrClause(ArrayRef VarList, - const OMPVarListLocTy &Locs) { +OMPClause * +SemaOpenMP::ActOnOpenMPHasDeviceAddrClause(ArrayRef VarList, + const OMPVarListLocTy &Locs) { MappableVarListInfo MVLI(VarList); for (Expr *RefExpr : VarList) { assert(RefExpr && "NULL expr in OpenMP has_device_addr clause."); SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange, + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange, /*AllowArraySection=*/true); if (Res.second) { // It will be analyzed later. @@ -23975,7 +24082,7 @@ OMPClause *Sema::ActOnOpenMPHasDeviceAddrClause(ArrayRef VarList, << getOpenMPClauseName(DVar.CKind) << getOpenMPClauseName(OMPC_has_device_addr) << getOpenMPDirectiveName(DSAStack->getCurrentDirective()); - reportOriginalDsa(*this, DSAStack, D, DVar); + reportOriginalDsa(SemaRef, DSAStack, D, DVar); continue; } @@ -24000,16 +24107,17 @@ OMPClause *Sema::ActOnOpenMPHasDeviceAddrClause(ArrayRef VarList, auto *VD = dyn_cast(D); if (VD && (isa(RefExpr->IgnoreParenImpCasts()) || isa(RefExpr->IgnoreParenImpCasts()))) - Component = DefaultFunctionArrayLvalueConversion(SimpleRefExpr).get(); + Component = + SemaRef.DefaultFunctionArrayLvalueConversion(SimpleRefExpr).get(); OMPClauseMappableExprCommon::MappableComponent MC( Component, D, /*IsNonContiguous=*/false); DSAStack->addMappableExpressionComponents( D, MC, /*WhereFoundClauseKind=*/OMPC_has_device_addr); // Record the expression we've just processed. - if (!VD && !CurContext->isDependentContext()) { + if (!VD && !SemaRef.CurContext->isDependentContext()) { DeclRefExpr *Ref = - buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/true); + buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/true); assert(Ref && "has_device_addr capture failed"); MVLI.ProcessedVarList.push_back(Ref); } else @@ -24030,27 +24138,27 @@ OMPClause *Sema::ActOnOpenMPHasDeviceAddrClause(ArrayRef VarList, if (MVLI.ProcessedVarList.empty()) return nullptr; - return OMPHasDeviceAddrClause::Create(Context, Locs, MVLI.ProcessedVarList, - MVLI.VarBaseDeclarations, - MVLI.VarComponents); + return OMPHasDeviceAddrClause::Create( + getASTContext(), Locs, MVLI.ProcessedVarList, MVLI.VarBaseDeclarations, + MVLI.VarComponents); } -OMPClause *Sema::ActOnOpenMPAllocateClause( +OMPClause *SemaOpenMP::ActOnOpenMPAllocateClause( Expr *Allocator, ArrayRef VarList, SourceLocation StartLoc, SourceLocation ColonLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { if (Allocator) { // OpenMP [2.11.4 allocate Clause, Description] // allocator is an expression of omp_allocator_handle_t type. - if (!findOMPAllocatorHandleT(*this, Allocator->getExprLoc(), DSAStack)) + if (!findOMPAllocatorHandleT(SemaRef, Allocator->getExprLoc(), DSAStack)) return nullptr; - ExprResult AllocatorRes = DefaultLvalueConversion(Allocator); + ExprResult AllocatorRes = SemaRef.DefaultLvalueConversion(Allocator); if (AllocatorRes.isInvalid()) return nullptr; - AllocatorRes = PerformImplicitConversion(AllocatorRes.get(), - DSAStack->getOMPAllocatorHandleT(), - Sema::AA_Initializing, - /*AllowExplicit=*/true); + AllocatorRes = SemaRef.PerformImplicitConversion( + AllocatorRes.get(), DSAStack->getOMPAllocatorHandleT(), + Sema::AA_Initializing, + /*AllowExplicit=*/true); if (AllocatorRes.isInvalid()) return nullptr; Allocator = AllocatorRes.get(); @@ -24060,9 +24168,9 @@ OMPClause *Sema::ActOnOpenMPAllocateClause( // target region must specify an allocator expression unless a requires // directive with the dynamic_allocators clause is present in the same // compilation unit. - if (LangOpts.OpenMPIsTargetDevice && + if (getLangOpts().OpenMPIsTargetDevice && !DSAStack->hasRequiresDeclWithClause()) - targetDiag(StartLoc, diag::err_expected_allocator_expression); + SemaRef.targetDiag(StartLoc, diag::err_expected_allocator_expression); } // Analyze and build list of variables. SmallVector Vars; @@ -24071,7 +24179,7 @@ OMPClause *Sema::ActOnOpenMPAllocateClause( SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange); + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange); if (Res.second) { // It will be analyzed later. Vars.push_back(RefExpr); @@ -24082,9 +24190,9 @@ OMPClause *Sema::ActOnOpenMPAllocateClause( auto *VD = dyn_cast(D); DeclRefExpr *Ref = nullptr; - if (!VD && !CurContext->isDependentContext()) - Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/false); - Vars.push_back((VD || CurContext->isDependentContext()) + if (!VD && !SemaRef.CurContext->isDependentContext()) + Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/false); + Vars.push_back((VD || SemaRef.CurContext->isDependentContext()) ? RefExpr->IgnoreParens() : Ref); } @@ -24094,21 +24202,21 @@ OMPClause *Sema::ActOnOpenMPAllocateClause( if (Allocator) DSAStack->addInnerAllocatorExpr(Allocator); - return OMPAllocateClause::Create(Context, StartLoc, LParenLoc, Allocator, - ColonLoc, EndLoc, Vars); + return OMPAllocateClause::Create(getASTContext(), StartLoc, LParenLoc, + Allocator, ColonLoc, EndLoc, Vars); } -OMPClause *Sema::ActOnOpenMPNontemporalClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPNontemporalClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { SmallVector Vars; for (Expr *RefExpr : VarList) { assert(RefExpr && "NULL expr in OpenMP nontemporal clause."); SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange); + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange); if (Res.second) // It will be analyzed later. Vars.push_back(RefExpr); @@ -24133,32 +24241,34 @@ OMPClause *Sema::ActOnOpenMPNontemporalClause(ArrayRef VarList, if (Vars.empty()) return nullptr; - return OMPNontemporalClause::Create(Context, StartLoc, LParenLoc, EndLoc, - Vars); + return OMPNontemporalClause::Create(getASTContext(), StartLoc, LParenLoc, + EndLoc, Vars); } -StmtResult Sema::ActOnOpenMPScopeDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPScopeDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { if (!AStmt) return StmtError(); - setFunctionHasBranchProtectedScope(); + SemaRef.setFunctionHasBranchProtectedScope(); - return OMPScopeDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt); + return OMPScopeDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses, + AStmt); } -OMPClause *Sema::ActOnOpenMPInclusiveClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPInclusiveClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { SmallVector Vars; for (Expr *RefExpr : VarList) { assert(RefExpr && "NULL expr in OpenMP nontemporal clause."); SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange, + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange, /*AllowArraySection=*/true); if (Res.second) // It will be analyzed later. @@ -24185,20 +24295,21 @@ OMPClause *Sema::ActOnOpenMPInclusiveClause(ArrayRef VarList, if (Vars.empty()) return nullptr; - return OMPInclusiveClause::Create(Context, StartLoc, LParenLoc, EndLoc, Vars); + return OMPInclusiveClause::Create(getASTContext(), StartLoc, LParenLoc, + EndLoc, Vars); } -OMPClause *Sema::ActOnOpenMPExclusiveClause(ArrayRef VarList, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPExclusiveClause(ArrayRef VarList, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { SmallVector Vars; for (Expr *RefExpr : VarList) { assert(RefExpr && "NULL expr in OpenMP nontemporal clause."); SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange, + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange, /*AllowArraySection=*/true); if (Res.second) // It will be analyzed later. @@ -24228,7 +24339,8 @@ OMPClause *Sema::ActOnOpenMPExclusiveClause(ArrayRef VarList, if (Vars.empty()) return nullptr; - return OMPExclusiveClause::Create(Context, StartLoc, LParenLoc, EndLoc, Vars); + return OMPExclusiveClause::Create(getASTContext(), StartLoc, LParenLoc, + EndLoc, Vars); } /// Tries to find omp_alloctrait_t type. @@ -24246,19 +24358,20 @@ static bool findOMPAlloctraitT(Sema &S, SourceLocation Loc, DSAStackTy *Stack) { return true; } -OMPClause *Sema::ActOnOpenMPUsesAllocatorClause( +OMPClause *SemaOpenMP::ActOnOpenMPUsesAllocatorClause( SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc, ArrayRef Data) { + ASTContext &Context = getASTContext(); // OpenMP [2.12.5, target Construct] // allocator is an identifier of omp_allocator_handle_t type. - if (!findOMPAllocatorHandleT(*this, StartLoc, DSAStack)) + if (!findOMPAllocatorHandleT(SemaRef, StartLoc, DSAStack)) return nullptr; // OpenMP [2.12.5, target Construct] // allocator-traits-array is an identifier of const omp_alloctrait_t * type. if (llvm::any_of( Data, [](const UsesAllocatorsData &D) { return D.AllocatorTraits; }) && - !findOMPAlloctraitT(*this, StartLoc, DSAStack)) + !findOMPAlloctraitT(SemaRef, StartLoc, DSAStack)) return nullptr; llvm::SmallPtrSet, 4> PredefinedAllocators; for (int I = 0; I < OMPAllocateDeclAttr::OMPUserDefinedMemAlloc; ++I) { @@ -24266,8 +24379,8 @@ OMPClause *Sema::ActOnOpenMPUsesAllocatorClause( StringRef Allocator = OMPAllocateDeclAttr::ConvertAllocatorTypeTyToStr(AllocatorKind); DeclarationName AllocatorName = &Context.Idents.get(Allocator); - PredefinedAllocators.insert(LookupSingleName( - TUScope, AllocatorName, StartLoc, Sema::LookupAnyName)); + PredefinedAllocators.insert(SemaRef.LookupSingleName( + SemaRef.TUScope, AllocatorName, StartLoc, Sema::LookupAnyName)); } SmallVector NewData; @@ -24284,7 +24397,7 @@ OMPClause *Sema::ActOnOpenMPUsesAllocatorClause( bool IsPredefinedAllocator = false; if (DRE) { OMPAllocateDeclAttr::AllocatorTypeTy AllocatorTy = - getAllocatorKind(*this, DSAStack, AllocatorExpr); + getAllocatorKind(SemaRef, DSAStack, AllocatorExpr); IsPredefinedAllocator = AllocatorTy != OMPAllocateDeclAttr::AllocatorTypeTy::OMPUserDefinedMemAlloc; @@ -24329,7 +24442,7 @@ OMPClause *Sema::ActOnOpenMPUsesAllocatorClause( } // No allocator traits - just convert it to rvalue. if (!D.AllocatorTraits) - AllocatorExpr = DefaultLvalueConversion(AllocatorExpr).get(); + AllocatorExpr = SemaRef.DefaultLvalueConversion(AllocatorExpr).get(); DSAStack->addUsesAllocatorsDecl( DRE->getDecl(), IsPredefinedAllocator @@ -24376,11 +24489,11 @@ OMPClause *Sema::ActOnOpenMPUsesAllocatorClause( NewD.LParenLoc = D.LParenLoc; NewD.RParenLoc = D.RParenLoc; } - return OMPUsesAllocatorsClause::Create(Context, StartLoc, LParenLoc, EndLoc, - NewData); + return OMPUsesAllocatorsClause::Create(getASTContext(), StartLoc, LParenLoc, + EndLoc, NewData); } -OMPClause *Sema::ActOnOpenMPAffinityClause( +OMPClause *SemaOpenMP::ActOnOpenMPAffinityClause( SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc, Expr *Modifier, ArrayRef Locators) { SmallVector Vars; @@ -24403,8 +24516,8 @@ OMPClause *Sema::ActOnOpenMPAffinityClause( ExprResult Res; { - Sema::TentativeAnalysisScope Trap(*this); - Res = CreateBuiltinUnaryOp(ELoc, UO_AddrOf, SimpleExpr); + Sema::TentativeAnalysisScope Trap(SemaRef); + Res = SemaRef.CreateBuiltinUnaryOp(ELoc, UO_AddrOf, SimpleExpr); } if (!Res.isUsable() && !isa(SimpleExpr) && !isa(SimpleExpr)) { @@ -24415,15 +24528,15 @@ OMPClause *Sema::ActOnOpenMPAffinityClause( Vars.push_back(SimpleExpr); } - return OMPAffinityClause::Create(Context, StartLoc, LParenLoc, ColonLoc, - EndLoc, Modifier, Vars); + return OMPAffinityClause::Create(getASTContext(), StartLoc, LParenLoc, + ColonLoc, EndLoc, Modifier, Vars); } -OMPClause *Sema::ActOnOpenMPBindClause(OpenMPBindClauseKind Kind, - SourceLocation KindLoc, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPBindClause(OpenMPBindClauseKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { if (Kind == OMPC_BIND_unknown) { Diag(KindLoc, diag::err_omp_unexpected_clause_value) << getListOfPossibleValues(OMPC_bind, /*First=*/0, @@ -24432,39 +24545,40 @@ OMPClause *Sema::ActOnOpenMPBindClause(OpenMPBindClauseKind Kind, return nullptr; } - return OMPBindClause::Create(Context, Kind, KindLoc, StartLoc, LParenLoc, - EndLoc); + return OMPBindClause::Create(getASTContext(), Kind, KindLoc, StartLoc, + LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPXDynCGroupMemClause(Expr *Size, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { +OMPClause *SemaOpenMP::ActOnOpenMPXDynCGroupMemClause(Expr *Size, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { Expr *ValExpr = Size; Stmt *HelperValStmt = nullptr; // OpenMP [2.5, Restrictions] // The ompx_dyn_cgroup_mem expression must evaluate to a positive integer // value. - if (!isNonNegativeIntegerValue(ValExpr, *this, OMPC_ompx_dyn_cgroup_mem, + if (!isNonNegativeIntegerValue(ValExpr, SemaRef, OMPC_ompx_dyn_cgroup_mem, /*StrictlyPositive=*/false)) return nullptr; OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); OpenMPDirectiveKind CaptureRegion = getOpenMPCaptureRegionForClause( - DKind, OMPC_ompx_dyn_cgroup_mem, LangOpts.OpenMP); - if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { - ValExpr = MakeFullExpr(ValExpr).get(); + DKind, OMPC_ompx_dyn_cgroup_mem, getLangOpts().OpenMP); + if (CaptureRegion != OMPD_unknown && + !SemaRef.CurContext->isDependentContext()) { + ValExpr = SemaRef.MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; - ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); - HelperValStmt = buildPreInits(Context, Captures); + ValExpr = tryBuildCapture(SemaRef, ValExpr, Captures).get(); + HelperValStmt = buildPreInits(getASTContext(), Captures); } - return new (Context) OMPXDynCGroupMemClause( + return new (getASTContext()) OMPXDynCGroupMemClause( ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPDoacrossClause( +OMPClause *SemaOpenMP::ActOnOpenMPDoacrossClause( OpenMPDoacrossClauseModifier DepType, SourceLocation DepLoc, SourceLocation ColonLoc, ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { @@ -24483,7 +24597,7 @@ OMPClause *Sema::ActOnOpenMPDoacrossClause( DSAStackTy::OperatorOffsetTy OpsOffs; llvm::APSInt TotalDepCount(/*BitWidth=*/32); DoacrossDataInfoTy VarOffset = ProcessOpenMPDoacrossClauseCommon( - *this, + SemaRef, DepType == OMPC_DOACROSS_source || DepType == OMPC_DOACROSS_source_omp_cur_iteration || DepType == OMPC_DOACROSS_sink_omp_cur_iteration, @@ -24491,22 +24605,587 @@ OMPClause *Sema::ActOnOpenMPDoacrossClause( Vars = VarOffset.Vars; OpsOffs = VarOffset.OpsOffs; TotalDepCount = VarOffset.TotalDepCount; - auto *C = OMPDoacrossClause::Create(Context, StartLoc, LParenLoc, EndLoc, - DepType, DepLoc, ColonLoc, Vars, + auto *C = OMPDoacrossClause::Create(getASTContext(), StartLoc, LParenLoc, + EndLoc, DepType, DepLoc, ColonLoc, Vars, TotalDepCount.getZExtValue()); if (DSAStack->isParentOrderedRegion()) DSAStack->addDoacrossDependClause(C, OpsOffs); return C; } -OMPClause *Sema::ActOnOpenMPXAttributeClause(ArrayRef Attrs, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc) { - return new (Context) OMPXAttributeClause(Attrs, StartLoc, LParenLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPXAttributeClause(ArrayRef Attrs, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + return new (getASTContext()) + OMPXAttributeClause(Attrs, StartLoc, LParenLoc, EndLoc); } -OMPClause *Sema::ActOnOpenMPXBareClause(SourceLocation StartLoc, - SourceLocation EndLoc) { - return new (Context) OMPXBareClause(StartLoc, EndLoc); +OMPClause *SemaOpenMP::ActOnOpenMPXBareClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (getASTContext()) OMPXBareClause(StartLoc, EndLoc); +} + +ExprResult SemaOpenMP::ActOnOMPArraySectionExpr( + Expr *Base, SourceLocation LBLoc, Expr *LowerBound, + SourceLocation ColonLocFirst, SourceLocation ColonLocSecond, Expr *Length, + Expr *Stride, SourceLocation RBLoc) { + ASTContext &Context = getASTContext(); + if (Base->hasPlaceholderType() && + !Base->hasPlaceholderType(BuiltinType::OMPArraySection)) { + ExprResult Result = SemaRef.CheckPlaceholderExpr(Base); + if (Result.isInvalid()) + return ExprError(); + Base = Result.get(); + } + if (LowerBound && LowerBound->getType()->isNonOverloadPlaceholderType()) { + ExprResult Result = SemaRef.CheckPlaceholderExpr(LowerBound); + if (Result.isInvalid()) + return ExprError(); + Result = SemaRef.DefaultLvalueConversion(Result.get()); + if (Result.isInvalid()) + return ExprError(); + LowerBound = Result.get(); + } + if (Length && Length->getType()->isNonOverloadPlaceholderType()) { + ExprResult Result = SemaRef.CheckPlaceholderExpr(Length); + if (Result.isInvalid()) + return ExprError(); + Result = SemaRef.DefaultLvalueConversion(Result.get()); + if (Result.isInvalid()) + return ExprError(); + Length = Result.get(); + } + if (Stride && Stride->getType()->isNonOverloadPlaceholderType()) { + ExprResult Result = SemaRef.CheckPlaceholderExpr(Stride); + if (Result.isInvalid()) + return ExprError(); + Result = SemaRef.DefaultLvalueConversion(Result.get()); + if (Result.isInvalid()) + return ExprError(); + Stride = Result.get(); + } + + // Build an unanalyzed expression if either operand is type-dependent. + if (Base->isTypeDependent() || + (LowerBound && + (LowerBound->isTypeDependent() || LowerBound->isValueDependent())) || + (Length && (Length->isTypeDependent() || Length->isValueDependent())) || + (Stride && (Stride->isTypeDependent() || Stride->isValueDependent()))) { + return new (Context) OMPArraySectionExpr( + Base, LowerBound, Length, Stride, Context.DependentTy, VK_LValue, + OK_Ordinary, ColonLocFirst, ColonLocSecond, RBLoc); + } + + // Perform default conversions. + QualType OriginalTy = OMPArraySectionExpr::getBaseOriginalType(Base); + QualType ResultTy; + if (OriginalTy->isAnyPointerType()) { + ResultTy = OriginalTy->getPointeeType(); + } else if (OriginalTy->isArrayType()) { + ResultTy = OriginalTy->getAsArrayTypeUnsafe()->getElementType(); + } else { + return ExprError( + Diag(Base->getExprLoc(), diag::err_omp_typecheck_section_value) + << Base->getSourceRange()); + } + // C99 6.5.2.1p1 + if (LowerBound) { + auto Res = PerformOpenMPImplicitIntegerConversion(LowerBound->getExprLoc(), + LowerBound); + if (Res.isInvalid()) + return ExprError(Diag(LowerBound->getExprLoc(), + diag::err_omp_typecheck_section_not_integer) + << 0 << LowerBound->getSourceRange()); + LowerBound = Res.get(); + + if (LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || + LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) + Diag(LowerBound->getExprLoc(), diag::warn_omp_section_is_char) + << 0 << LowerBound->getSourceRange(); + } + if (Length) { + auto Res = + PerformOpenMPImplicitIntegerConversion(Length->getExprLoc(), Length); + if (Res.isInvalid()) + return ExprError(Diag(Length->getExprLoc(), + diag::err_omp_typecheck_section_not_integer) + << 1 << Length->getSourceRange()); + Length = Res.get(); + + if (Length->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || + Length->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) + Diag(Length->getExprLoc(), diag::warn_omp_section_is_char) + << 1 << Length->getSourceRange(); + } + if (Stride) { + ExprResult Res = + PerformOpenMPImplicitIntegerConversion(Stride->getExprLoc(), Stride); + if (Res.isInvalid()) + return ExprError(Diag(Stride->getExprLoc(), + diag::err_omp_typecheck_section_not_integer) + << 1 << Stride->getSourceRange()); + Stride = Res.get(); + + if (Stride->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || + Stride->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) + Diag(Stride->getExprLoc(), diag::warn_omp_section_is_char) + << 1 << Stride->getSourceRange(); + } + + // C99 6.5.2.1p1: "shall have type "pointer to *object* type". Similarly, + // C++ [expr.sub]p1: The type "T" shall be a completely-defined object + // type. Note that functions are not objects, and that (in C99 parlance) + // incomplete types are not object types. + if (ResultTy->isFunctionType()) { + Diag(Base->getExprLoc(), diag::err_omp_section_function_type) + << ResultTy << Base->getSourceRange(); + return ExprError(); + } + + if (SemaRef.RequireCompleteType(Base->getExprLoc(), ResultTy, + diag::err_omp_section_incomplete_type, Base)) + return ExprError(); + + if (LowerBound && !OriginalTy->isAnyPointerType()) { + Expr::EvalResult Result; + if (LowerBound->EvaluateAsInt(Result, Context)) { + // OpenMP 5.0, [2.1.5 Array Sections] + // The array section must be a subset of the original array. + llvm::APSInt LowerBoundValue = Result.Val.getInt(); + if (LowerBoundValue.isNegative()) { + Diag(LowerBound->getExprLoc(), + diag::err_omp_section_not_subset_of_array) + << LowerBound->getSourceRange(); + return ExprError(); + } + } + } + + if (Length) { + Expr::EvalResult Result; + if (Length->EvaluateAsInt(Result, Context)) { + // OpenMP 5.0, [2.1.5 Array Sections] + // The length must evaluate to non-negative integers. + llvm::APSInt LengthValue = Result.Val.getInt(); + if (LengthValue.isNegative()) { + Diag(Length->getExprLoc(), diag::err_omp_section_length_negative) + << toString(LengthValue, /*Radix=*/10, /*Signed=*/true) + << Length->getSourceRange(); + return ExprError(); + } + } + } else if (ColonLocFirst.isValid() && + (OriginalTy.isNull() || (!OriginalTy->isConstantArrayType() && + !OriginalTy->isVariableArrayType()))) { + // OpenMP 5.0, [2.1.5 Array Sections] + // When the size of the array dimension is not known, the length must be + // specified explicitly. + Diag(ColonLocFirst, diag::err_omp_section_length_undefined) + << (!OriginalTy.isNull() && OriginalTy->isArrayType()); + return ExprError(); + } + + if (Stride) { + Expr::EvalResult Result; + if (Stride->EvaluateAsInt(Result, Context)) { + // OpenMP 5.0, [2.1.5 Array Sections] + // The stride must evaluate to a positive integer. + llvm::APSInt StrideValue = Result.Val.getInt(); + if (!StrideValue.isStrictlyPositive()) { + Diag(Stride->getExprLoc(), diag::err_omp_section_stride_non_positive) + << toString(StrideValue, /*Radix=*/10, /*Signed=*/true) + << Stride->getSourceRange(); + return ExprError(); + } + } + } + + if (!Base->hasPlaceholderType(BuiltinType::OMPArraySection)) { + ExprResult Result = SemaRef.DefaultFunctionArrayLvalueConversion(Base); + if (Result.isInvalid()) + return ExprError(); + Base = Result.get(); + } + return new (Context) OMPArraySectionExpr( + Base, LowerBound, Length, Stride, Context.OMPArraySectionTy, VK_LValue, + OK_Ordinary, ColonLocFirst, ColonLocSecond, RBLoc); +} + +ExprResult SemaOpenMP::ActOnOMPArrayShapingExpr( + Expr *Base, SourceLocation LParenLoc, SourceLocation RParenLoc, + ArrayRef Dims, ArrayRef Brackets) { + ASTContext &Context = getASTContext(); + if (Base->hasPlaceholderType()) { + ExprResult Result = SemaRef.CheckPlaceholderExpr(Base); + if (Result.isInvalid()) + return ExprError(); + Result = SemaRef.DefaultLvalueConversion(Result.get()); + if (Result.isInvalid()) + return ExprError(); + Base = Result.get(); + } + QualType BaseTy = Base->getType(); + // Delay analysis of the types/expressions if instantiation/specialization is + // required. + if (!BaseTy->isPointerType() && Base->isTypeDependent()) + return OMPArrayShapingExpr::Create(Context, Context.DependentTy, Base, + LParenLoc, RParenLoc, Dims, Brackets); + if (!BaseTy->isPointerType() || + (!Base->isTypeDependent() && + BaseTy->getPointeeType()->isIncompleteType())) + return ExprError(Diag(Base->getExprLoc(), + diag::err_omp_non_pointer_type_array_shaping_base) + << Base->getSourceRange()); + + SmallVector NewDims; + bool ErrorFound = false; + for (Expr *Dim : Dims) { + if (Dim->hasPlaceholderType()) { + ExprResult Result = SemaRef.CheckPlaceholderExpr(Dim); + if (Result.isInvalid()) { + ErrorFound = true; + continue; + } + Result = SemaRef.DefaultLvalueConversion(Result.get()); + if (Result.isInvalid()) { + ErrorFound = true; + continue; + } + Dim = Result.get(); + } + if (!Dim->isTypeDependent()) { + ExprResult Result = + PerformOpenMPImplicitIntegerConversion(Dim->getExprLoc(), Dim); + if (Result.isInvalid()) { + ErrorFound = true; + Diag(Dim->getExprLoc(), diag::err_omp_typecheck_shaping_not_integer) + << Dim->getSourceRange(); + continue; + } + Dim = Result.get(); + Expr::EvalResult EvResult; + if (!Dim->isValueDependent() && Dim->EvaluateAsInt(EvResult, Context)) { + // OpenMP 5.0, [2.1.4 Array Shaping] + // Each si is an integral type expression that must evaluate to a + // positive integer. + llvm::APSInt Value = EvResult.Val.getInt(); + if (!Value.isStrictlyPositive()) { + Diag(Dim->getExprLoc(), diag::err_omp_shaping_dimension_not_positive) + << toString(Value, /*Radix=*/10, /*Signed=*/true) + << Dim->getSourceRange(); + ErrorFound = true; + continue; + } + } + } + NewDims.push_back(Dim); + } + if (ErrorFound) + return ExprError(); + return OMPArrayShapingExpr::Create(Context, Context.OMPArrayShapingTy, Base, + LParenLoc, RParenLoc, NewDims, Brackets); } + +ExprResult SemaOpenMP::ActOnOMPIteratorExpr(Scope *S, + SourceLocation IteratorKwLoc, + SourceLocation LLoc, + SourceLocation RLoc, + ArrayRef Data) { + ASTContext &Context = getASTContext(); + SmallVector ID; + bool IsCorrect = true; + for (const OMPIteratorData &D : Data) { + TypeSourceInfo *TInfo = nullptr; + SourceLocation StartLoc; + QualType DeclTy; + if (!D.Type.getAsOpaquePtr()) { + // OpenMP 5.0, 2.1.6 Iterators + // In an iterator-specifier, if the iterator-type is not specified then + // the type of that iterator is of int type. + DeclTy = Context.IntTy; + StartLoc = D.DeclIdentLoc; + } else { + DeclTy = Sema::GetTypeFromParser(D.Type, &TInfo); + StartLoc = TInfo->getTypeLoc().getBeginLoc(); + } + + bool IsDeclTyDependent = DeclTy->isDependentType() || + DeclTy->containsUnexpandedParameterPack() || + DeclTy->isInstantiationDependentType(); + if (!IsDeclTyDependent) { + if (!DeclTy->isIntegralType(Context) && !DeclTy->isAnyPointerType()) { + // OpenMP 5.0, 2.1.6 Iterators, Restrictions, C/C++ + // The iterator-type must be an integral or pointer type. + Diag(StartLoc, diag::err_omp_iterator_not_integral_or_pointer) + << DeclTy; + IsCorrect = false; + continue; + } + if (DeclTy.isConstant(Context)) { + // OpenMP 5.0, 2.1.6 Iterators, Restrictions, C/C++ + // The iterator-type must not be const qualified. + Diag(StartLoc, diag::err_omp_iterator_not_integral_or_pointer) + << DeclTy; + IsCorrect = false; + continue; + } + } + + // Iterator declaration. + assert(D.DeclIdent && "Identifier expected."); + // Always try to create iterator declarator to avoid extra error messages + // about unknown declarations use. + auto *VD = + VarDecl::Create(Context, SemaRef.CurContext, StartLoc, D.DeclIdentLoc, + D.DeclIdent, DeclTy, TInfo, SC_None); + VD->setImplicit(); + if (S) { + // Check for conflicting previous declaration. + DeclarationNameInfo NameInfo(VD->getDeclName(), D.DeclIdentLoc); + LookupResult Previous(SemaRef, NameInfo, Sema::LookupOrdinaryName, + Sema::ForVisibleRedeclaration); + Previous.suppressDiagnostics(); + SemaRef.LookupName(Previous, S); + + SemaRef.FilterLookupForScope(Previous, SemaRef.CurContext, S, + /*ConsiderLinkage=*/false, + /*AllowInlineNamespace=*/false); + if (!Previous.empty()) { + NamedDecl *Old = Previous.getRepresentativeDecl(); + Diag(D.DeclIdentLoc, diag::err_redefinition) << VD->getDeclName(); + Diag(Old->getLocation(), diag::note_previous_definition); + } else { + SemaRef.PushOnScopeChains(VD, S); + } + } else { + SemaRef.CurContext->addDecl(VD); + } + + /// Act on the iterator variable declaration. + ActOnOpenMPIteratorVarDecl(VD); + + Expr *Begin = D.Range.Begin; + if (!IsDeclTyDependent && Begin && !Begin->isTypeDependent()) { + ExprResult BeginRes = + SemaRef.PerformImplicitConversion(Begin, DeclTy, Sema::AA_Converting); + Begin = BeginRes.get(); + } + Expr *End = D.Range.End; + if (!IsDeclTyDependent && End && !End->isTypeDependent()) { + ExprResult EndRes = + SemaRef.PerformImplicitConversion(End, DeclTy, Sema::AA_Converting); + End = EndRes.get(); + } + Expr *Step = D.Range.Step; + if (!IsDeclTyDependent && Step && !Step->isTypeDependent()) { + if (!Step->getType()->isIntegralType(Context)) { + Diag(Step->getExprLoc(), diag::err_omp_iterator_step_not_integral) + << Step << Step->getSourceRange(); + IsCorrect = false; + continue; + } + std::optional Result = + Step->getIntegerConstantExpr(Context); + // OpenMP 5.0, 2.1.6 Iterators, Restrictions + // If the step expression of a range-specification equals zero, the + // behavior is unspecified. + if (Result && Result->isZero()) { + Diag(Step->getExprLoc(), diag::err_omp_iterator_step_constant_zero) + << Step << Step->getSourceRange(); + IsCorrect = false; + continue; + } + } + if (!Begin || !End || !IsCorrect) { + IsCorrect = false; + continue; + } + OMPIteratorExpr::IteratorDefinition &IDElem = ID.emplace_back(); + IDElem.IteratorDecl = VD; + IDElem.AssignmentLoc = D.AssignLoc; + IDElem.Range.Begin = Begin; + IDElem.Range.End = End; + IDElem.Range.Step = Step; + IDElem.ColonLoc = D.ColonLoc; + IDElem.SecondColonLoc = D.SecColonLoc; + } + if (!IsCorrect) { + // Invalidate all created iterator declarations if error is found. + for (const OMPIteratorExpr::IteratorDefinition &D : ID) { + if (Decl *ID = D.IteratorDecl) + ID->setInvalidDecl(); + } + return ExprError(); + } + SmallVector Helpers; + if (!SemaRef.CurContext->isDependentContext()) { + // Build number of ityeration for each iteration range. + // Ni = ((Stepi > 0) ? ((Endi + Stepi -1 - Begini)/Stepi) : + // ((Begini-Stepi-1-Endi) / -Stepi); + for (OMPIteratorExpr::IteratorDefinition &D : ID) { + // (Endi - Begini) + ExprResult Res = SemaRef.CreateBuiltinBinOp(D.AssignmentLoc, BO_Sub, + D.Range.End, D.Range.Begin); + if (!Res.isUsable()) { + IsCorrect = false; + continue; + } + ExprResult St, St1; + if (D.Range.Step) { + St = D.Range.Step; + // (Endi - Begini) + Stepi + Res = SemaRef.CreateBuiltinBinOp(D.AssignmentLoc, BO_Add, Res.get(), + St.get()); + if (!Res.isUsable()) { + IsCorrect = false; + continue; + } + // (Endi - Begini) + Stepi - 1 + Res = SemaRef.CreateBuiltinBinOp( + D.AssignmentLoc, BO_Sub, Res.get(), + SemaRef.ActOnIntegerConstant(D.AssignmentLoc, 1).get()); + if (!Res.isUsable()) { + IsCorrect = false; + continue; + } + // ((Endi - Begini) + Stepi - 1) / Stepi + Res = SemaRef.CreateBuiltinBinOp(D.AssignmentLoc, BO_Div, Res.get(), + St.get()); + if (!Res.isUsable()) { + IsCorrect = false; + continue; + } + St1 = SemaRef.CreateBuiltinUnaryOp(D.AssignmentLoc, UO_Minus, + D.Range.Step); + // (Begini - Endi) + ExprResult Res1 = SemaRef.CreateBuiltinBinOp( + D.AssignmentLoc, BO_Sub, D.Range.Begin, D.Range.End); + if (!Res1.isUsable()) { + IsCorrect = false; + continue; + } + // (Begini - Endi) - Stepi + Res1 = SemaRef.CreateBuiltinBinOp(D.AssignmentLoc, BO_Add, Res1.get(), + St1.get()); + if (!Res1.isUsable()) { + IsCorrect = false; + continue; + } + // (Begini - Endi) - Stepi - 1 + Res1 = SemaRef.CreateBuiltinBinOp( + D.AssignmentLoc, BO_Sub, Res1.get(), + SemaRef.ActOnIntegerConstant(D.AssignmentLoc, 1).get()); + if (!Res1.isUsable()) { + IsCorrect = false; + continue; + } + // ((Begini - Endi) - Stepi - 1) / (-Stepi) + Res1 = SemaRef.CreateBuiltinBinOp(D.AssignmentLoc, BO_Div, Res1.get(), + St1.get()); + if (!Res1.isUsable()) { + IsCorrect = false; + continue; + } + // Stepi > 0. + ExprResult CmpRes = SemaRef.CreateBuiltinBinOp( + D.AssignmentLoc, BO_GT, D.Range.Step, + SemaRef.ActOnIntegerConstant(D.AssignmentLoc, 0).get()); + if (!CmpRes.isUsable()) { + IsCorrect = false; + continue; + } + Res = SemaRef.ActOnConditionalOp(D.AssignmentLoc, D.AssignmentLoc, + CmpRes.get(), Res.get(), Res1.get()); + if (!Res.isUsable()) { + IsCorrect = false; + continue; + } + } + Res = SemaRef.ActOnFinishFullExpr(Res.get(), /*DiscardedValue=*/false); + if (!Res.isUsable()) { + IsCorrect = false; + continue; + } + + // Build counter update. + // Build counter. + auto *CounterVD = VarDecl::Create(Context, SemaRef.CurContext, + D.IteratorDecl->getBeginLoc(), + D.IteratorDecl->getBeginLoc(), nullptr, + Res.get()->getType(), nullptr, SC_None); + CounterVD->setImplicit(); + ExprResult RefRes = + SemaRef.BuildDeclRefExpr(CounterVD, CounterVD->getType(), VK_LValue, + D.IteratorDecl->getBeginLoc()); + // Build counter update. + // I = Begini + counter * Stepi; + ExprResult UpdateRes; + if (D.Range.Step) { + UpdateRes = SemaRef.CreateBuiltinBinOp( + D.AssignmentLoc, BO_Mul, + SemaRef.DefaultLvalueConversion(RefRes.get()).get(), St.get()); + } else { + UpdateRes = SemaRef.DefaultLvalueConversion(RefRes.get()); + } + if (!UpdateRes.isUsable()) { + IsCorrect = false; + continue; + } + UpdateRes = SemaRef.CreateBuiltinBinOp(D.AssignmentLoc, BO_Add, + D.Range.Begin, UpdateRes.get()); + if (!UpdateRes.isUsable()) { + IsCorrect = false; + continue; + } + ExprResult VDRes = + SemaRef.BuildDeclRefExpr(cast(D.IteratorDecl), + cast(D.IteratorDecl)->getType(), + VK_LValue, D.IteratorDecl->getBeginLoc()); + UpdateRes = SemaRef.CreateBuiltinBinOp(D.AssignmentLoc, BO_Assign, + VDRes.get(), UpdateRes.get()); + if (!UpdateRes.isUsable()) { + IsCorrect = false; + continue; + } + UpdateRes = + SemaRef.ActOnFinishFullExpr(UpdateRes.get(), /*DiscardedValue=*/true); + if (!UpdateRes.isUsable()) { + IsCorrect = false; + continue; + } + ExprResult CounterUpdateRes = SemaRef.CreateBuiltinUnaryOp( + D.AssignmentLoc, UO_PreInc, RefRes.get()); + if (!CounterUpdateRes.isUsable()) { + IsCorrect = false; + continue; + } + CounterUpdateRes = SemaRef.ActOnFinishFullExpr(CounterUpdateRes.get(), + /*DiscardedValue=*/true); + if (!CounterUpdateRes.isUsable()) { + IsCorrect = false; + continue; + } + OMPIteratorHelperData &HD = Helpers.emplace_back(); + HD.CounterVD = CounterVD; + HD.Upper = Res.get(); + HD.Update = UpdateRes.get(); + HD.CounterUpdate = CounterUpdateRes.get(); + } + } else { + Helpers.assign(ID.size(), {}); + } + if (!IsCorrect) { + // Invalidate all created iterator declarations if error is found. + for (const OMPIteratorExpr::IteratorDefinition &D : ID) { + if (Decl *ID = D.IteratorDecl) + ID->setInvalidDecl(); + } + return ExprError(); + } + return OMPIteratorExpr::Create(Context, Context.OMPIteratorTy, IteratorKwLoc, + LLoc, RLoc, ID, Helpers); +} + +SemaOpenMP::SemaOpenMP(Sema &S) + : SemaBase(S), VarDataSharingAttributesStack(nullptr) {} diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index d28c24cfdfd33c..a7b33f0db047eb 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -35,6 +35,7 @@ #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaInternal.h" +#include "clang/Sema/SemaOpenMP.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -3097,7 +3098,7 @@ StmtResult Sema::BuildCXXForRangeStmt( // In OpenMP loop region loop control variable must be private. Perform // analysis of first part (if any). if (getLangOpts().OpenMP >= 50 && BeginDeclStmt.isUsable()) - ActOnOpenMPLoopInitialization(ForLoc, BeginDeclStmt.get()); + OpenMP().ActOnOpenMPLoopInitialization(ForLoc, BeginDeclStmt.get()); return new (Context) CXXForRangeStmt( InitStmt, RangeDS, cast_or_null(BeginDeclStmt.get()), @@ -4822,7 +4823,8 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI, assert(Cap.isVariableCapture() && "unknown kind of capture"); if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) - S.setOpenMPCaptureKind(Field, Cap.getVariable(), RSI->OpenMPLevel); + S.OpenMP().setOpenMPCaptureKind(Field, Cap.getVariable(), + RSI->OpenMPLevel); Captures.push_back(CapturedStmt::Capture( Cap.getLocation(), diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index c45a8d1408fff3..6d359c5a9a024c 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -28,6 +28,7 @@ #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaInternal.h" +#include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/Template.h" #include "clang/Sema/TemplateInstCallback.h" #include "llvm/Support/TimeProfiler.h" @@ -399,7 +400,7 @@ static void instantiateOMPDeclareSimdDeclAttr( ++SI; } LinModifiers.append(Attr.modifiers_begin(), Attr.modifiers_end()); - (void)S.ActOnOpenMPDeclareSimdDirective( + (void)S.OpenMP().ActOnOpenMPDeclareSimdDirective( S.ConvertDeclToDeclGroup(New), Attr.getBranchState(), Simdlen.get(), Uniforms, Aligneds, Alignments, Linears, LinModifiers, Steps, Attr.getRange()); @@ -476,9 +477,9 @@ static void instantiateOMPDeclareVariantAttr( // Check function/variant ref for `omp declare variant` but not for `omp // begin declare variant` (which use implicit attributes). std::optional> DeclVarData = - S.checkOpenMPDeclareVariantFunction(S.ConvertDeclToDeclGroup(New), E, TI, - Attr.appendArgs_size(), - Attr.getRange()); + S.OpenMP().checkOpenMPDeclareVariantFunction( + S.ConvertDeclToDeclGroup(New), E, TI, Attr.appendArgs_size(), + Attr.getRange()); if (!DeclVarData) return; @@ -539,7 +540,7 @@ static void instantiateOMPDeclareVariantAttr( AppendArgs.emplace_back(II.IsTarget, II.IsTargetSync); } - S.ActOnOpenMPDeclareVariantDirective( + S.OpenMP().ActOnOpenMPDeclareVariantDirective( FD, E, TI, NothingExprs, NeedDevicePtrExprs, AppendArgs, SourceLocation(), SourceLocation(), Attr.getRange()); } @@ -3587,7 +3588,7 @@ Decl *TemplateDeclInstantiator::VisitOMPThreadPrivateDecl( } OMPThreadPrivateDecl *TD = - SemaRef.CheckOMPThreadPrivateDecl(D->getLocation(), Vars); + SemaRef.OpenMP().CheckOMPThreadPrivateDecl(D->getLocation(), Vars); TD->setAccess(AS_public); Owner->addDecl(TD); @@ -3610,14 +3611,14 @@ Decl *TemplateDeclInstantiator::VisitOMPAllocateDecl(OMPAllocateDecl *D) { ExprResult NewE = SemaRef.SubstExpr(AC->getAllocator(), TemplateArgs); if (!NewE.isUsable()) continue; - IC = SemaRef.ActOnOpenMPAllocatorClause( + IC = SemaRef.OpenMP().ActOnOpenMPAllocatorClause( NewE.get(), AC->getBeginLoc(), AC->getLParenLoc(), AC->getEndLoc()); } else if (auto *AC = dyn_cast(C)) { ExprResult NewE = SemaRef.SubstExpr(AC->getAlignment(), TemplateArgs); if (!NewE.isUsable()) continue; - IC = SemaRef.ActOnOpenMPAlignClause(NewE.get(), AC->getBeginLoc(), - AC->getLParenLoc(), AC->getEndLoc()); + IC = SemaRef.OpenMP().ActOnOpenMPAlignClause( + NewE.get(), AC->getBeginLoc(), AC->getLParenLoc(), AC->getEndLoc()); // If align clause value ends up being invalid, this can end up null. if (!IC) continue; @@ -3625,7 +3626,7 @@ Decl *TemplateDeclInstantiator::VisitOMPAllocateDecl(OMPAllocateDecl *D) { Clauses.push_back(IC); } - Sema::DeclGroupPtrTy Res = SemaRef.ActOnOpenMPAllocateDirective( + Sema::DeclGroupPtrTy Res = SemaRef.OpenMP().ActOnOpenMPAllocateDirective( D->getLocation(), Vars, Clauses, Owner); if (Res.get().isNull()) return nullptr; @@ -3646,7 +3647,7 @@ Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( D->getType()->containsUnexpandedParameterPack(); QualType SubstReductionType; if (RequiresInstantiation) { - SubstReductionType = SemaRef.ActOnOpenMPDeclareReductionType( + SubstReductionType = SemaRef.OpenMP().ActOnOpenMPDeclareReductionType( D->getLocation(), ParsedType::make(SemaRef.SubstType( D->getType(), TemplateArgs, D->getLocation(), DeclarationName()))); @@ -3667,7 +3668,7 @@ Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( SemaRef.CurrentInstantiationScope->findInstantiationOf(PrevDeclInScope) ->get()); } - auto DRD = SemaRef.ActOnOpenMPDeclareReductionDirectiveStart( + auto DRD = SemaRef.OpenMP().ActOnOpenMPDeclareReductionDirectiveStart( /*S=*/nullptr, Owner, D->getDeclName(), ReductionTypes, D->getAccess(), PrevDeclInScope); auto *NewDRD = cast(DRD.get().getSingleDecl()); @@ -3676,7 +3677,7 @@ Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( Expr *SubstInitializer = nullptr; // Combiners instantiation sequence. if (Combiner) { - SemaRef.ActOnOpenMPDeclareReductionCombinerStart( + SemaRef.OpenMP().ActOnOpenMPDeclareReductionCombinerStart( /*S=*/nullptr, NewDRD); SemaRef.CurrentInstantiationScope->InstantiatedLocal( cast(D->getCombinerIn())->getDecl(), @@ -3688,12 +3689,14 @@ Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, Qualifiers(), ThisContext); SubstCombiner = SemaRef.SubstExpr(Combiner, TemplateArgs).get(); - SemaRef.ActOnOpenMPDeclareReductionCombinerEnd(NewDRD, SubstCombiner); + SemaRef.OpenMP().ActOnOpenMPDeclareReductionCombinerEnd(NewDRD, + SubstCombiner); } // Initializers instantiation sequence. if (Init) { - VarDecl *OmpPrivParm = SemaRef.ActOnOpenMPDeclareReductionInitializerStart( - /*S=*/nullptr, NewDRD); + VarDecl *OmpPrivParm = + SemaRef.OpenMP().ActOnOpenMPDeclareReductionInitializerStart( + /*S=*/nullptr, NewDRD); SemaRef.CurrentInstantiationScope->InstantiatedLocal( cast(D->getInitOrig())->getDecl(), cast(NewDRD->getInitOrig())->getDecl()); @@ -3710,8 +3713,8 @@ Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( SemaRef.InstantiateVariableInitializer(OmpPrivParm, OldPrivParm, TemplateArgs); } - SemaRef.ActOnOpenMPDeclareReductionInitializerEnd(NewDRD, SubstInitializer, - OmpPrivParm); + SemaRef.OpenMP().ActOnOpenMPDeclareReductionInitializerEnd( + NewDRD, SubstInitializer, OmpPrivParm); } IsCorrect = IsCorrect && SubstCombiner && (!Init || @@ -3720,7 +3723,7 @@ Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( (D->getInitializerKind() != OMPDeclareReductionInitKind::Call && !SubstInitializer)); - (void)SemaRef.ActOnOpenMPDeclareReductionDirectiveEnd( + (void)SemaRef.OpenMP().ActOnOpenMPDeclareReductionDirectiveEnd( /*S=*/nullptr, DRD, IsCorrect && !D->isInvalidDecl()); return NewDRD; @@ -3736,7 +3739,7 @@ TemplateDeclInstantiator::VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D) { QualType SubstMapperTy; DeclarationName VN = D->getVarName(); if (RequiresInstantiation) { - SubstMapperTy = SemaRef.ActOnOpenMPDeclareMapperType( + SubstMapperTy = SemaRef.OpenMP().ActOnOpenMPDeclareMapperType( D->getLocation(), ParsedType::make(SemaRef.SubstType(D->getType(), TemplateArgs, D->getLocation(), VN))); @@ -3756,11 +3759,12 @@ TemplateDeclInstantiator::VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D) { SmallVector Clauses; // Instantiate the mapper variable. DeclarationNameInfo DirName; - SemaRef.StartOpenMPDSABlock(llvm::omp::OMPD_declare_mapper, DirName, - /*S=*/nullptr, - (*D->clauselist_begin())->getBeginLoc()); - ExprResult MapperVarRef = SemaRef.ActOnOpenMPDeclareMapperDirectiveVarDecl( - /*S=*/nullptr, SubstMapperTy, D->getLocation(), VN); + SemaRef.OpenMP().StartOpenMPDSABlock(llvm::omp::OMPD_declare_mapper, DirName, + /*S=*/nullptr, + (*D->clauselist_begin())->getBeginLoc()); + ExprResult MapperVarRef = + SemaRef.OpenMP().ActOnOpenMPDeclareMapperDirectiveVarDecl( + /*S=*/nullptr, SubstMapperTy, D->getLocation(), VN); SemaRef.CurrentInstantiationScope->InstantiatedLocal( cast(D->getMapperVarRef())->getDecl(), cast(MapperVarRef.get())->getDecl()); @@ -3790,17 +3794,17 @@ TemplateDeclInstantiator::VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D) { SemaRef.SubstDeclarationNameInfo(OldC->getMapperIdInfo(), TemplateArgs); OMPVarListLocTy Locs(OldC->getBeginLoc(), OldC->getLParenLoc(), OldC->getEndLoc()); - OMPClause *NewC = SemaRef.ActOnOpenMPMapClause( + OMPClause *NewC = SemaRef.OpenMP().ActOnOpenMPMapClause( OldC->getIteratorModifier(), OldC->getMapTypeModifiers(), OldC->getMapTypeModifiersLoc(), SS, NewNameInfo, OldC->getMapType(), OldC->isImplicitMapType(), OldC->getMapLoc(), OldC->getColonLoc(), NewVars, Locs); Clauses.push_back(NewC); } - SemaRef.EndOpenMPDSABlock(nullptr); + SemaRef.OpenMP().EndOpenMPDSABlock(nullptr); if (!IsCorrect) return nullptr; - Sema::DeclGroupPtrTy DG = SemaRef.ActOnOpenMPDeclareMapperDirective( + Sema::DeclGroupPtrTy DG = SemaRef.OpenMP().ActOnOpenMPDeclareMapperDirective( /*S=*/nullptr, Owner, D->getDeclName(), SubstMapperTy, D->getLocation(), VN, D->getAccess(), MapperVarRef.get(), Clauses, PrevDeclInScope); Decl *NewDMD = DG.get().getSingleDecl(); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 404c4e8e31b558..1b31df8d97fba2 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -35,6 +35,7 @@ #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaInternal.h" +#include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/Template.h" #include "clang/Sema/TemplateInstCallback.h" #include "llvm/ADT/ArrayRef.h" @@ -2640,7 +2641,7 @@ QualType Sema::BuildArrayType(QualType T, ArraySizeModifier ASM, } else if (isSFINAEContext()) { VLADiag = diag::err_vla_in_sfinae; VLAIsError = true; - } else if (getLangOpts().OpenMP && isInOpenMPTaskUntiedContext()) { + } else if (getLangOpts().OpenMP && OpenMP().isInOpenMPTaskUntiedContext()) { VLADiag = diag::err_openmp_vla_in_task_untied; VLAIsError = true; } else if (getLangOpts().CPlusPlus) { diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 8c96134af7c8f0..0c7fdb357235e1 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -40,6 +40,7 @@ #include "clang/Sema/SemaDiagnostic.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/SemaOpenACC.h" +#include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/SemaSYCL.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/ErrorHandling.h" @@ -1656,7 +1657,7 @@ class TreeTransform { /// Ensures that the outermost loop in @p LoopStmt is wrapped by a /// OMPCanonicalLoop. StmtResult RebuildOMPCanonicalLoop(Stmt *LoopStmt) { - return getSema().ActOnOpenMPCanonicalLoop(LoopStmt); + return getSema().OpenMP().ActOnOpenMPCanonicalLoop(LoopStmt); } /// Build a new OpenMP executable directive. @@ -1669,7 +1670,7 @@ class TreeTransform { Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, OpenMPDirectiveKind PrevMappedDirective = OMPD_unknown) { - return getSema().ActOnOpenMPExecutableDirective( + return getSema().OpenMP().ActOnOpenMPExecutableDirective( Kind, DirName, CancelRegion, Clauses, AStmt, StartLoc, EndLoc, PrevMappedDirective); } @@ -1684,9 +1685,9 @@ class TreeTransform { SourceLocation NameModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPIfClause(NameModifier, Condition, StartLoc, - LParenLoc, NameModifierLoc, ColonLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPIfClause( + NameModifier, Condition, StartLoc, LParenLoc, NameModifierLoc, ColonLoc, + EndLoc); } /// Build a new OpenMP 'final' clause. @@ -1696,8 +1697,8 @@ class TreeTransform { OMPClause *RebuildOMPFinalClause(Expr *Condition, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPFinalClause(Condition, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPFinalClause(Condition, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'num_threads' clause. @@ -1708,8 +1709,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPNumThreadsClause(NumThreads, StartLoc, - LParenLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPNumThreadsClause(NumThreads, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'safelen' clause. @@ -1719,7 +1720,8 @@ class TreeTransform { OMPClause *RebuildOMPSafelenClause(Expr *Len, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPSafelenClause(Len, StartLoc, LParenLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPSafelenClause(Len, StartLoc, LParenLoc, + EndLoc); } /// Build a new OpenMP 'simdlen' clause. @@ -1729,28 +1731,30 @@ class TreeTransform { OMPClause *RebuildOMPSimdlenClause(Expr *Len, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPSimdlenClause(Len, StartLoc, LParenLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPSimdlenClause(Len, StartLoc, LParenLoc, + EndLoc); } OMPClause *RebuildOMPSizesClause(ArrayRef Sizes, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPSizesClause(Sizes, StartLoc, LParenLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPSizesClause(Sizes, StartLoc, LParenLoc, + EndLoc); } /// Build a new OpenMP 'full' clause. OMPClause *RebuildOMPFullClause(SourceLocation StartLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPFullClause(StartLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPFullClause(StartLoc, EndLoc); } /// Build a new OpenMP 'partial' clause. OMPClause *RebuildOMPPartialClause(Expr *Factor, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPPartialClause(Factor, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPPartialClause(Factor, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'allocator' clause. @@ -1760,7 +1764,8 @@ class TreeTransform { OMPClause *RebuildOMPAllocatorClause(Expr *A, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPAllocatorClause(A, StartLoc, LParenLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPAllocatorClause(A, StartLoc, LParenLoc, + EndLoc); } /// Build a new OpenMP 'collapse' clause. @@ -1770,8 +1775,8 @@ class TreeTransform { OMPClause *RebuildOMPCollapseClause(Expr *Num, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPCollapseClause(Num, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPCollapseClause(Num, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'default' clause. @@ -1782,8 +1787,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPDefaultClause(Kind, KindKwLoc, - StartLoc, LParenLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPDefaultClause( + Kind, KindKwLoc, StartLoc, LParenLoc, EndLoc); } /// Build a new OpenMP 'proc_bind' clause. @@ -1795,8 +1800,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPProcBindClause(Kind, KindKwLoc, - StartLoc, LParenLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPProcBindClause( + Kind, KindKwLoc, StartLoc, LParenLoc, EndLoc); } /// Build a new OpenMP 'schedule' clause. @@ -1808,7 +1813,7 @@ class TreeTransform { OpenMPScheduleClauseKind Kind, Expr *ChunkSize, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation M1Loc, SourceLocation M2Loc, SourceLocation KindLoc, SourceLocation CommaLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPScheduleClause( + return getSema().OpenMP().ActOnOpenMPScheduleClause( M1, M2, Kind, ChunkSize, StartLoc, LParenLoc, M1Loc, M2Loc, KindLoc, CommaLoc, EndLoc); } @@ -1820,7 +1825,8 @@ class TreeTransform { OMPClause *RebuildOMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc, SourceLocation LParenLoc, Expr *Num) { - return getSema().ActOnOpenMPOrderedClause(StartLoc, EndLoc, LParenLoc, Num); + return getSema().OpenMP().ActOnOpenMPOrderedClause(StartLoc, EndLoc, + LParenLoc, Num); } /// Build a new OpenMP 'private' clause. @@ -1831,8 +1837,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPPrivateClause(VarList, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPPrivateClause(VarList, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'firstprivate' clause. @@ -1843,8 +1849,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPFirstprivateClause(VarList, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPFirstprivateClause(VarList, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'lastprivate' clause. @@ -1858,7 +1864,7 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPLastprivateClause( + return getSema().OpenMP().ActOnOpenMPLastprivateClause( VarList, LPKind, LPKindLoc, ColonLoc, StartLoc, LParenLoc, EndLoc); } @@ -1870,8 +1876,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPSharedClause(VarList, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPSharedClause(VarList, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'reduction' clause. @@ -1885,7 +1891,7 @@ class TreeTransform { SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId, ArrayRef UnresolvedReductions) { - return getSema().ActOnOpenMPReductionClause( + return getSema().OpenMP().ActOnOpenMPReductionClause( VarList, Modifier, StartLoc, LParenLoc, ModifierLoc, ColonLoc, EndLoc, ReductionIdScopeSpec, ReductionId, UnresolvedReductions); } @@ -1900,7 +1906,7 @@ class TreeTransform { CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId, ArrayRef UnresolvedReductions) { - return getSema().ActOnOpenMPTaskReductionClause( + return getSema().OpenMP().ActOnOpenMPTaskReductionClause( VarList, StartLoc, LParenLoc, ColonLoc, EndLoc, ReductionIdScopeSpec, ReductionId, UnresolvedReductions); } @@ -1916,7 +1922,7 @@ class TreeTransform { CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId, ArrayRef UnresolvedReductions) { - return getSema().ActOnOpenMPInReductionClause( + return getSema().OpenMP().ActOnOpenMPInReductionClause( VarList, StartLoc, LParenLoc, ColonLoc, EndLoc, ReductionIdScopeSpec, ReductionId, UnresolvedReductions); } @@ -1930,9 +1936,9 @@ class TreeTransform { SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation StepModifierLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPLinearClause(VarList, Step, StartLoc, LParenLoc, - Modifier, ModifierLoc, ColonLoc, - StepModifierLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPLinearClause( + VarList, Step, StartLoc, LParenLoc, Modifier, ModifierLoc, ColonLoc, + StepModifierLoc, EndLoc); } /// Build a new OpenMP 'aligned' clause. @@ -1944,8 +1950,8 @@ class TreeTransform { SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPAlignedClause(VarList, Alignment, StartLoc, - LParenLoc, ColonLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPAlignedClause( + VarList, Alignment, StartLoc, LParenLoc, ColonLoc, EndLoc); } /// Build a new OpenMP 'copyin' clause. @@ -1956,8 +1962,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPCopyinClause(VarList, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPCopyinClause(VarList, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'copyprivate' clause. @@ -1968,8 +1974,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPCopyprivateClause(VarList, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPCopyprivateClause(VarList, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'flush' pseudo clause. @@ -1980,8 +1986,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPFlushClause(VarList, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPFlushClause(VarList, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'depobj' pseudo clause. @@ -1991,8 +1997,8 @@ class TreeTransform { OMPClause *RebuildOMPDepobjClause(Expr *Depobj, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPDepobjClause(Depobj, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPDepobjClause(Depobj, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'depend' pseudo clause. @@ -2004,8 +2010,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPDependClause(Data, DepModifier, VarList, - StartLoc, LParenLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPDependClause( + Data, DepModifier, VarList, StartLoc, LParenLoc, EndLoc); } /// Build a new OpenMP 'device' clause. @@ -2017,8 +2023,8 @@ class TreeTransform { SourceLocation LParenLoc, SourceLocation ModifierLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPDeviceClause(Modifier, Device, StartLoc, - LParenLoc, ModifierLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPDeviceClause( + Modifier, Device, StartLoc, LParenLoc, ModifierLoc, EndLoc); } /// Build a new OpenMP 'map' clause. @@ -2032,7 +2038,7 @@ class TreeTransform { OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef VarList, const OMPVarListLocTy &Locs, ArrayRef UnresolvedMappers) { - return getSema().ActOnOpenMPMapClause( + return getSema().OpenMP().ActOnOpenMPMapClause( IteratorModifier, MapTypeModifiers, MapTypeModifiersLoc, MapperIdScopeSpec, MapperId, MapType, IsMapTypeImplicit, MapLoc, ColonLoc, VarList, Locs, @@ -2048,8 +2054,8 @@ class TreeTransform { SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPAllocateClause(Allocate, VarList, StartLoc, - LParenLoc, ColonLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPAllocateClause( + Allocate, VarList, StartLoc, LParenLoc, ColonLoc, EndLoc); } /// Build a new OpenMP 'num_teams' clause. @@ -2059,8 +2065,8 @@ class TreeTransform { OMPClause *RebuildOMPNumTeamsClause(Expr *NumTeams, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPNumTeamsClause(NumTeams, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPNumTeamsClause(NumTeams, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'thread_limit' clause. @@ -2071,8 +2077,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPThreadLimitClause(ThreadLimit, StartLoc, - LParenLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPThreadLimitClause( + ThreadLimit, StartLoc, LParenLoc, EndLoc); } /// Build a new OpenMP 'priority' clause. @@ -2082,8 +2088,8 @@ class TreeTransform { OMPClause *RebuildOMPPriorityClause(Expr *Priority, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPPriorityClause(Priority, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPPriorityClause(Priority, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'grainsize' clause. @@ -2095,8 +2101,8 @@ class TreeTransform { SourceLocation LParenLoc, SourceLocation ModifierLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPGrainsizeClause(Modifier, Device, StartLoc, - LParenLoc, ModifierLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPGrainsizeClause( + Modifier, Device, StartLoc, LParenLoc, ModifierLoc, EndLoc); } /// Build a new OpenMP 'num_tasks' clause. @@ -2108,8 +2114,8 @@ class TreeTransform { SourceLocation LParenLoc, SourceLocation ModifierLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPNumTasksClause(Modifier, NumTasks, StartLoc, - LParenLoc, ModifierLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPNumTasksClause( + Modifier, NumTasks, StartLoc, LParenLoc, ModifierLoc, EndLoc); } /// Build a new OpenMP 'hint' clause. @@ -2119,7 +2125,8 @@ class TreeTransform { OMPClause *RebuildOMPHintClause(Expr *Hint, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPHintClause(Hint, StartLoc, LParenLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPHintClause(Hint, StartLoc, LParenLoc, + EndLoc); } /// Build a new OpenMP 'detach' clause. @@ -2129,7 +2136,8 @@ class TreeTransform { OMPClause *RebuildOMPDetachClause(Expr *Evt, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPDetachClause(Evt, StartLoc, LParenLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPDetachClause(Evt, StartLoc, LParenLoc, + EndLoc); } /// Build a new OpenMP 'dist_schedule' clause. @@ -2141,7 +2149,7 @@ class TreeTransform { Expr *ChunkSize, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation KindLoc, SourceLocation CommaLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPDistScheduleClause( + return getSema().OpenMP().ActOnOpenMPDistScheduleClause( Kind, ChunkSize, StartLoc, LParenLoc, KindLoc, CommaLoc, EndLoc); } @@ -2156,9 +2164,9 @@ class TreeTransform { DeclarationNameInfo &MapperId, SourceLocation ColonLoc, ArrayRef VarList, const OMPVarListLocTy &Locs, ArrayRef UnresolvedMappers) { - return getSema().ActOnOpenMPToClause(MotionModifiers, MotionModifiersLoc, - MapperIdScopeSpec, MapperId, ColonLoc, - VarList, Locs, UnresolvedMappers); + return getSema().OpenMP().ActOnOpenMPToClause( + MotionModifiers, MotionModifiersLoc, MapperIdScopeSpec, MapperId, + ColonLoc, VarList, Locs, UnresolvedMappers); } /// Build a new OpenMP 'from' clause. @@ -2172,7 +2180,7 @@ class TreeTransform { DeclarationNameInfo &MapperId, SourceLocation ColonLoc, ArrayRef VarList, const OMPVarListLocTy &Locs, ArrayRef UnresolvedMappers) { - return getSema().ActOnOpenMPFromClause( + return getSema().OpenMP().ActOnOpenMPFromClause( MotionModifiers, MotionModifiersLoc, MapperIdScopeSpec, MapperId, ColonLoc, VarList, Locs, UnresolvedMappers); } @@ -2183,7 +2191,7 @@ class TreeTransform { /// Subclasses may override this routine to provide different behavior. OMPClause *RebuildOMPUseDevicePtrClause(ArrayRef VarList, const OMPVarListLocTy &Locs) { - return getSema().ActOnOpenMPUseDevicePtrClause(VarList, Locs); + return getSema().OpenMP().ActOnOpenMPUseDevicePtrClause(VarList, Locs); } /// Build a new OpenMP 'use_device_addr' clause. @@ -2192,7 +2200,7 @@ class TreeTransform { /// Subclasses may override this routine to provide different behavior. OMPClause *RebuildOMPUseDeviceAddrClause(ArrayRef VarList, const OMPVarListLocTy &Locs) { - return getSema().ActOnOpenMPUseDeviceAddrClause(VarList, Locs); + return getSema().OpenMP().ActOnOpenMPUseDeviceAddrClause(VarList, Locs); } /// Build a new OpenMP 'is_device_ptr' clause. @@ -2201,7 +2209,7 @@ class TreeTransform { /// Subclasses may override this routine to provide different behavior. OMPClause *RebuildOMPIsDevicePtrClause(ArrayRef VarList, const OMPVarListLocTy &Locs) { - return getSema().ActOnOpenMPIsDevicePtrClause(VarList, Locs); + return getSema().OpenMP().ActOnOpenMPIsDevicePtrClause(VarList, Locs); } /// Build a new OpenMP 'has_device_addr' clause. @@ -2210,7 +2218,7 @@ class TreeTransform { /// Subclasses may override this routine to provide different behavior. OMPClause *RebuildOMPHasDeviceAddrClause(ArrayRef VarList, const OMPVarListLocTy &Locs) { - return getSema().ActOnOpenMPHasDeviceAddrClause(VarList, Locs); + return getSema().OpenMP().ActOnOpenMPHasDeviceAddrClause(VarList, Locs); } /// Build a new OpenMP 'defaultmap' clause. @@ -2224,8 +2232,8 @@ class TreeTransform { SourceLocation MLoc, SourceLocation KindLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPDefaultmapClause(M, Kind, StartLoc, LParenLoc, - MLoc, KindLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPDefaultmapClause( + M, Kind, StartLoc, LParenLoc, MLoc, KindLoc, EndLoc); } /// Build a new OpenMP 'nontemporal' clause. @@ -2236,8 +2244,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPNontemporalClause(VarList, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPNontemporalClause(VarList, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'inclusive' clause. @@ -2248,8 +2256,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPInclusiveClause(VarList, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPInclusiveClause(VarList, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'exclusive' clause. @@ -2260,8 +2268,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPExclusiveClause(VarList, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPExclusiveClause(VarList, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'uses_allocators' clause. @@ -2269,10 +2277,10 @@ class TreeTransform { /// By default, performs semantic analysis to build the new OpenMP clause. /// Subclasses may override this routine to provide different behavior. OMPClause *RebuildOMPUsesAllocatorsClause( - ArrayRef Data, SourceLocation StartLoc, + ArrayRef Data, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPUsesAllocatorClause(StartLoc, LParenLoc, EndLoc, - Data); + return getSema().OpenMP().ActOnOpenMPUsesAllocatorClause( + StartLoc, LParenLoc, EndLoc, Data); } /// Build a new OpenMP 'affinity' clause. @@ -2284,8 +2292,8 @@ class TreeTransform { SourceLocation ColonLoc, SourceLocation EndLoc, Expr *Modifier, ArrayRef Locators) { - return getSema().ActOnOpenMPAffinityClause(StartLoc, LParenLoc, ColonLoc, - EndLoc, Modifier, Locators); + return getSema().OpenMP().ActOnOpenMPAffinityClause( + StartLoc, LParenLoc, ColonLoc, EndLoc, Modifier, Locators); } /// Build a new OpenMP 'order' clause. @@ -2296,8 +2304,8 @@ class TreeTransform { OpenMPOrderClauseKind Kind, SourceLocation KindKwLoc, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc, OpenMPOrderClauseModifier Modifier, SourceLocation ModifierKwLoc) { - return getSema().ActOnOpenMPOrderClause(Modifier, Kind, StartLoc, LParenLoc, - ModifierKwLoc, KindKwLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPOrderClause( + Modifier, Kind, StartLoc, LParenLoc, ModifierKwLoc, KindKwLoc, EndLoc); } /// Build a new OpenMP 'init' clause. @@ -2309,8 +2317,8 @@ class TreeTransform { SourceLocation LParenLoc, SourceLocation VarLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPInitClause(InteropVar, InteropInfo, StartLoc, - LParenLoc, VarLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPInitClause( + InteropVar, InteropInfo, StartLoc, LParenLoc, VarLoc, EndLoc); } /// Build a new OpenMP 'use' clause. @@ -2320,8 +2328,8 @@ class TreeTransform { OMPClause *RebuildOMPUseClause(Expr *InteropVar, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation VarLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPUseClause(InteropVar, StartLoc, LParenLoc, - VarLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPUseClause(InteropVar, StartLoc, + LParenLoc, VarLoc, EndLoc); } /// Build a new OpenMP 'destroy' clause. @@ -2332,8 +2340,8 @@ class TreeTransform { SourceLocation LParenLoc, SourceLocation VarLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPDestroyClause(InteropVar, StartLoc, LParenLoc, - VarLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPDestroyClause( + InteropVar, StartLoc, LParenLoc, VarLoc, EndLoc); } /// Build a new OpenMP 'novariants' clause. @@ -2344,8 +2352,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPNovariantsClause(Condition, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPNovariantsClause(Condition, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'nocontext' clause. @@ -2355,8 +2363,8 @@ class TreeTransform { OMPClause *RebuildOMPNocontextClause(Expr *Condition, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPNocontextClause(Condition, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPNocontextClause(Condition, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'filter' clause. @@ -2366,8 +2374,8 @@ class TreeTransform { OMPClause *RebuildOMPFilterClause(Expr *ThreadID, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPFilterClause(ThreadID, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPFilterClause(ThreadID, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'bind' clause. @@ -2379,8 +2387,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPBindClause(Kind, KindLoc, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPBindClause(Kind, KindLoc, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'ompx_dyn_cgroup_mem' clause. @@ -2390,8 +2398,8 @@ class TreeTransform { OMPClause *RebuildOMPXDynCGroupMemClause(Expr *Size, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPXDynCGroupMemClause(Size, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPXDynCGroupMemClause(Size, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'ompx_attribute' clause. @@ -2402,8 +2410,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPXAttributeClause(Attrs, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPXAttributeClause(Attrs, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'ompx_bare' clause. @@ -2412,7 +2420,7 @@ class TreeTransform { /// Subclasses may override this routine to provide different behavior. OMPClause *RebuildOMPXBareClause(SourceLocation StartLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPXBareClause(StartLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPXBareClause(StartLoc, EndLoc); } /// Build a new OpenMP 'align' clause. @@ -2422,7 +2430,8 @@ class TreeTransform { OMPClause *RebuildOMPAlignClause(Expr *A, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPAlignClause(A, StartLoc, LParenLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPAlignClause(A, StartLoc, LParenLoc, + EndLoc); } /// Build a new OpenMP 'at' clause. @@ -2433,8 +2442,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPAtClause(Kind, KwLoc, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPAtClause(Kind, KwLoc, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'severity' clause. @@ -2446,8 +2455,8 @@ class TreeTransform { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPSeverityClause(Kind, KwLoc, StartLoc, LParenLoc, - EndLoc); + return getSema().OpenMP().ActOnOpenMPSeverityClause(Kind, KwLoc, StartLoc, + LParenLoc, EndLoc); } /// Build a new OpenMP 'message' clause. @@ -2457,7 +2466,8 @@ class TreeTransform { OMPClause *RebuildOMPMessageClause(Expr *MS, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPMessageClause(MS, StartLoc, LParenLoc, EndLoc); + return getSema().OpenMP().ActOnOpenMPMessageClause(MS, StartLoc, LParenLoc, + EndLoc); } /// Build a new OpenMP 'doacross' clause. @@ -2469,7 +2479,7 @@ class TreeTransform { SourceLocation DepLoc, SourceLocation ColonLoc, ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - return getSema().ActOnOpenMPDoacrossClause( + return getSema().OpenMP().ActOnOpenMPDoacrossClause( DepType, DepLoc, ColonLoc, VarList, StartLoc, LParenLoc, EndLoc); } @@ -2777,9 +2787,9 @@ class TreeTransform { SourceLocation ColonLocSecond, Expr *Length, Expr *Stride, SourceLocation RBracketLoc) { - return getSema().ActOnOMPArraySectionExpr(Base, LBracketLoc, LowerBound, - ColonLocFirst, ColonLocSecond, - Length, Stride, RBracketLoc); + return getSema().OpenMP().ActOnOMPArraySectionExpr( + Base, LBracketLoc, LowerBound, ColonLocFirst, ColonLocSecond, Length, + Stride, RBracketLoc); } /// Build a new array shaping expression. @@ -2790,19 +2800,20 @@ class TreeTransform { SourceLocation RParenLoc, ArrayRef Dims, ArrayRef BracketsRanges) { - return getSema().ActOnOMPArrayShapingExpr(Base, LParenLoc, RParenLoc, Dims, - BracketsRanges); + return getSema().OpenMP().ActOnOMPArrayShapingExpr( + Base, LParenLoc, RParenLoc, Dims, BracketsRanges); } /// Build a new iterator expression. /// /// By default, performs semantic analysis to build the new expression. /// Subclasses may override this routine to provide different behavior. - ExprResult RebuildOMPIteratorExpr( - SourceLocation IteratorKwLoc, SourceLocation LLoc, SourceLocation RLoc, - ArrayRef Data) { - return getSema().ActOnOMPIteratorExpr(/*Scope=*/nullptr, IteratorKwLoc, - LLoc, RLoc, Data); + ExprResult + RebuildOMPIteratorExpr(SourceLocation IteratorKwLoc, SourceLocation LLoc, + SourceLocation RLoc, + ArrayRef Data) { + return getSema().OpenMP().ActOnOMPIteratorExpr( + /*Scope=*/nullptr, IteratorKwLoc, LLoc, RLoc, Data); } /// Build a new call expression. @@ -8060,7 +8071,7 @@ template StmtResult TreeTransform::TransformForStmt(ForStmt *S) { if (getSema().getLangOpts().OpenMP) - getSema().startOpenMPLoop(); + getSema().OpenMP().startOpenMPLoop(); // Transform the initialization statement StmtResult Init = getDerived().TransformStmt(S->getInit()); @@ -8070,7 +8081,8 @@ TreeTransform::TransformForStmt(ForStmt *S) { // In OpenMP loop region loop control variable must be captured and be // private. Perform analysis of first part (if any). if (getSema().getLangOpts().OpenMP && Init.isUsable()) - getSema().ActOnOpenMPLoopInitialization(S->getForLoc(), Init.get()); + getSema().OpenMP().ActOnOpenMPLoopInitialization(S->getForLoc(), + Init.get()); // Transform the condition Sema::ConditionResult Cond = getDerived().TransformCondition( @@ -9029,9 +9041,9 @@ StmtResult TreeTransform::TransformOMPExecutableDirective( for (ArrayRef::iterator I = Clauses.begin(), E = Clauses.end(); I != E; ++I) { if (*I) { - getDerived().getSema().StartOpenMPClause((*I)->getClauseKind()); + getDerived().getSema().OpenMP().StartOpenMPClause((*I)->getClauseKind()); OMPClause *Clause = getDerived().TransformOMPClause(*I); - getDerived().getSema().EndOpenMPClause(); + getDerived().getSema().OpenMP().EndOpenMPClause(); if (Clause) TClauses.push_back(Clause); } else { @@ -9040,8 +9052,9 @@ StmtResult TreeTransform::TransformOMPExecutableDirective( } StmtResult AssociatedStmt; if (D->hasAssociatedStmt() && D->getAssociatedStmt()) { - getDerived().getSema().ActOnOpenMPRegionStart(D->getDirectiveKind(), - /*CurScope=*/nullptr); + getDerived().getSema().OpenMP().ActOnOpenMPRegionStart( + D->getDirectiveKind(), + /*CurScope=*/nullptr); StmtResult Body; { Sema::CompoundScopeRAII CompoundScope(getSema()); @@ -9059,7 +9072,7 @@ StmtResult TreeTransform::TransformOMPExecutableDirective( Body = getDerived().RebuildOMPCanonicalLoop(Body.get()); } AssociatedStmt = - getDerived().getSema().ActOnOpenMPRegionEnd(Body, TClauses); + getDerived().getSema().OpenMP().ActOnOpenMPRegionEnd(Body, TClauses); if (AssociatedStmt.isInvalid()) { return StmtError(); } @@ -9100,10 +9113,10 @@ template StmtResult TreeTransform::TransformOMPParallelDirective(OMPParallelDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_parallel, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9111,10 +9124,10 @@ template StmtResult TreeTransform::TransformOMPSimdDirective(OMPSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_simd, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_simd, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9122,10 +9135,10 @@ template StmtResult TreeTransform::TransformOMPTileDirective(OMPTileDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(D->getDirectiveKind(), DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9133,10 +9146,10 @@ template StmtResult TreeTransform::TransformOMPUnrollDirective(OMPUnrollDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(D->getDirectiveKind(), DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9144,10 +9157,10 @@ template StmtResult TreeTransform::TransformOMPForDirective(OMPForDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_for, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_for, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9155,10 +9168,10 @@ template StmtResult TreeTransform::TransformOMPForSimdDirective(OMPForSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_for_simd, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_for_simd, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9166,10 +9179,10 @@ template StmtResult TreeTransform::TransformOMPSectionsDirective(OMPSectionsDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_sections, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_sections, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9177,10 +9190,10 @@ template StmtResult TreeTransform::TransformOMPSectionDirective(OMPSectionDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_section, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_section, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9188,10 +9201,10 @@ template StmtResult TreeTransform::TransformOMPScopeDirective(OMPScopeDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_scope, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_scope, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9199,10 +9212,10 @@ template StmtResult TreeTransform::TransformOMPSingleDirective(OMPSingleDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_single, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_single, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9210,20 +9223,20 @@ template StmtResult TreeTransform::TransformOMPMasterDirective(OMPMasterDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_master, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_master, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } template StmtResult TreeTransform::TransformOMPCriticalDirective(OMPCriticalDirective *D) { - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_critical, D->getDirectiveName(), nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9231,10 +9244,10 @@ template StmtResult TreeTransform::TransformOMPParallelForDirective( OMPParallelForDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel_for, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_parallel_for, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9242,10 +9255,10 @@ template StmtResult TreeTransform::TransformOMPParallelForSimdDirective( OMPParallelForSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel_for_simd, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_parallel_for_simd, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9253,10 +9266,10 @@ template StmtResult TreeTransform::TransformOMPParallelMasterDirective( OMPParallelMasterDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel_master, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_parallel_master, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9264,10 +9277,10 @@ template StmtResult TreeTransform::TransformOMPParallelMaskedDirective( OMPParallelMaskedDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel_masked, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_parallel_masked, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9275,10 +9288,10 @@ template StmtResult TreeTransform::TransformOMPParallelSectionsDirective( OMPParallelSectionsDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel_sections, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_parallel_sections, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9286,10 +9299,10 @@ template StmtResult TreeTransform::TransformOMPTaskDirective(OMPTaskDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_task, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_task, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9297,10 +9310,10 @@ template StmtResult TreeTransform::TransformOMPTaskyieldDirective( OMPTaskyieldDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_taskyield, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_taskyield, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9308,10 +9321,10 @@ template StmtResult TreeTransform::TransformOMPBarrierDirective(OMPBarrierDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_barrier, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_barrier, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9319,10 +9332,10 @@ template StmtResult TreeTransform::TransformOMPTaskwaitDirective(OMPTaskwaitDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_taskwait, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_taskwait, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9330,10 +9343,10 @@ template StmtResult TreeTransform::TransformOMPErrorDirective(OMPErrorDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_error, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_error, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9341,10 +9354,10 @@ template StmtResult TreeTransform::TransformOMPTaskgroupDirective( OMPTaskgroupDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_taskgroup, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_taskgroup, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9352,10 +9365,10 @@ template StmtResult TreeTransform::TransformOMPFlushDirective(OMPFlushDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_flush, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_flush, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9363,10 +9376,10 @@ template StmtResult TreeTransform::TransformOMPDepobjDirective(OMPDepobjDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_depobj, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_depobj, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9374,10 +9387,10 @@ template StmtResult TreeTransform::TransformOMPScanDirective(OMPScanDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_scan, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_scan, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9385,10 +9398,10 @@ template StmtResult TreeTransform::TransformOMPOrderedDirective(OMPOrderedDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_ordered, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_ordered, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9396,10 +9409,10 @@ template StmtResult TreeTransform::TransformOMPAtomicDirective(OMPAtomicDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_atomic, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_atomic, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9407,10 +9420,10 @@ template StmtResult TreeTransform::TransformOMPTargetDirective(OMPTargetDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_target, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_target, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9418,10 +9431,10 @@ template StmtResult TreeTransform::TransformOMPTargetDataDirective( OMPTargetDataDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_target_data, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_target_data, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9429,10 +9442,10 @@ template StmtResult TreeTransform::TransformOMPTargetEnterDataDirective( OMPTargetEnterDataDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_target_enter_data, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_target_enter_data, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9440,10 +9453,10 @@ template StmtResult TreeTransform::TransformOMPTargetExitDataDirective( OMPTargetExitDataDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_target_exit_data, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_target_exit_data, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9451,10 +9464,10 @@ template StmtResult TreeTransform::TransformOMPTargetParallelDirective( OMPTargetParallelDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_target_parallel, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_target_parallel, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9462,10 +9475,10 @@ template StmtResult TreeTransform::TransformOMPTargetParallelForDirective( OMPTargetParallelForDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_target_parallel_for, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_target_parallel_for, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9473,10 +9486,10 @@ template StmtResult TreeTransform::TransformOMPTargetUpdateDirective( OMPTargetUpdateDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_target_update, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_target_update, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9484,10 +9497,10 @@ template StmtResult TreeTransform::TransformOMPTeamsDirective(OMPTeamsDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_teams, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_teams, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9495,10 +9508,10 @@ template StmtResult TreeTransform::TransformOMPCancellationPointDirective( OMPCancellationPointDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_cancellation_point, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_cancellation_point, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9506,10 +9519,10 @@ template StmtResult TreeTransform::TransformOMPCancelDirective(OMPCancelDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_cancel, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_cancel, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9517,10 +9530,10 @@ template StmtResult TreeTransform::TransformOMPTaskLoopDirective(OMPTaskLoopDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_taskloop, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_taskloop, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9528,10 +9541,10 @@ template StmtResult TreeTransform::TransformOMPTaskLoopSimdDirective( OMPTaskLoopSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_taskloop_simd, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_taskloop_simd, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9539,10 +9552,10 @@ template StmtResult TreeTransform::TransformOMPMasterTaskLoopDirective( OMPMasterTaskLoopDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_master_taskloop, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_master_taskloop, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9550,10 +9563,10 @@ template StmtResult TreeTransform::TransformOMPMaskedTaskLoopDirective( OMPMaskedTaskLoopDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_masked_taskloop, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_masked_taskloop, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9561,10 +9574,10 @@ template StmtResult TreeTransform::TransformOMPMasterTaskLoopSimdDirective( OMPMasterTaskLoopSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_master_taskloop_simd, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_master_taskloop_simd, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9572,10 +9585,10 @@ template StmtResult TreeTransform::TransformOMPMaskedTaskLoopSimdDirective( OMPMaskedTaskLoopSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_masked_taskloop_simd, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_masked_taskloop_simd, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9583,10 +9596,10 @@ template StmtResult TreeTransform::TransformOMPParallelMasterTaskLoopDirective( OMPParallelMasterTaskLoopDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_parallel_master_taskloop, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9594,10 +9607,10 @@ template StmtResult TreeTransform::TransformOMPParallelMaskedTaskLoopDirective( OMPParallelMaskedTaskLoopDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_parallel_masked_taskloop, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9606,10 +9619,10 @@ StmtResult TreeTransform::TransformOMPParallelMasterTaskLoopSimdDirective( OMPParallelMasterTaskLoopSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_parallel_master_taskloop_simd, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9618,10 +9631,10 @@ StmtResult TreeTransform::TransformOMPParallelMaskedTaskLoopSimdDirective( OMPParallelMaskedTaskLoopSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_parallel_masked_taskloop_simd, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9629,10 +9642,10 @@ template StmtResult TreeTransform::TransformOMPDistributeDirective( OMPDistributeDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_distribute, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_distribute, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9640,10 +9653,10 @@ template StmtResult TreeTransform::TransformOMPDistributeParallelForDirective( OMPDistributeParallelForDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_distribute_parallel_for, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9652,10 +9665,10 @@ StmtResult TreeTransform::TransformOMPDistributeParallelForSimdDirective( OMPDistributeParallelForSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_distribute_parallel_for_simd, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9663,10 +9676,10 @@ template StmtResult TreeTransform::TransformOMPDistributeSimdDirective( OMPDistributeSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_distribute_simd, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_distribute_simd, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9674,10 +9687,10 @@ template StmtResult TreeTransform::TransformOMPTargetParallelForSimdDirective( OMPTargetParallelForSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_target_parallel_for_simd, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9685,10 +9698,10 @@ template StmtResult TreeTransform::TransformOMPTargetSimdDirective( OMPTargetSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_target_simd, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_target_simd, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9696,10 +9709,10 @@ template StmtResult TreeTransform::TransformOMPTeamsDistributeDirective( OMPTeamsDistributeDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_teams_distribute, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_teams_distribute, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9707,10 +9720,10 @@ template StmtResult TreeTransform::TransformOMPTeamsDistributeSimdDirective( OMPTeamsDistributeSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_teams_distribute_simd, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9718,11 +9731,11 @@ template StmtResult TreeTransform::TransformOMPTeamsDistributeParallelForSimdDirective( OMPTeamsDistributeParallelForSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_teams_distribute_parallel_for_simd, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9730,10 +9743,10 @@ template StmtResult TreeTransform::TransformOMPTeamsDistributeParallelForDirective( OMPTeamsDistributeParallelForDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_teams_distribute_parallel_for, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9741,10 +9754,10 @@ template StmtResult TreeTransform::TransformOMPTargetTeamsDirective( OMPTargetTeamsDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_target_teams, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_target_teams, DirName, nullptr, D->getBeginLoc()); auto Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9752,10 +9765,10 @@ template StmtResult TreeTransform::TransformOMPTargetTeamsDistributeDirective( OMPTargetTeamsDistributeDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_target_teams_distribute, DirName, nullptr, D->getBeginLoc()); auto Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9764,11 +9777,11 @@ StmtResult TreeTransform::TransformOMPTargetTeamsDistributeParallelForDirective( OMPTargetTeamsDistributeParallelForDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_target_teams_distribute_parallel_for, DirName, nullptr, D->getBeginLoc()); auto Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9777,11 +9790,11 @@ StmtResult TreeTransform:: TransformOMPTargetTeamsDistributeParallelForSimdDirective( OMPTargetTeamsDistributeParallelForSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_target_teams_distribute_parallel_for_simd, DirName, nullptr, D->getBeginLoc()); auto Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9790,10 +9803,10 @@ StmtResult TreeTransform::TransformOMPTargetTeamsDistributeSimdDirective( OMPTargetTeamsDistributeSimdDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock( + getDerived().getSema().OpenMP().StartOpenMPDSABlock( OMPD_target_teams_distribute_simd, DirName, nullptr, D->getBeginLoc()); auto Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9801,10 +9814,10 @@ template StmtResult TreeTransform::TransformOMPInteropDirective(OMPInteropDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_interop, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_interop, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9812,10 +9825,10 @@ template StmtResult TreeTransform::TransformOMPDispatchDirective(OMPDispatchDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_dispatch, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_dispatch, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9823,10 +9836,10 @@ template StmtResult TreeTransform::TransformOMPMaskedDirective(OMPMaskedDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_masked, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_masked, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9834,10 +9847,10 @@ template StmtResult TreeTransform::TransformOMPGenericLoopDirective( OMPGenericLoopDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_loop, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_loop, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9845,10 +9858,10 @@ template StmtResult TreeTransform::TransformOMPTeamsGenericLoopDirective( OMPTeamsGenericLoopDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_teams_loop, DirName, nullptr, - D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_teams_loop, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9856,10 +9869,10 @@ template StmtResult TreeTransform::TransformOMPTargetTeamsGenericLoopDirective( OMPTargetTeamsGenericLoopDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_target_teams_loop, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_target_teams_loop, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9867,10 +9880,10 @@ template StmtResult TreeTransform::TransformOMPParallelGenericLoopDirective( OMPParallelGenericLoopDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel_loop, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_parallel_loop, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -9879,10 +9892,10 @@ StmtResult TreeTransform::TransformOMPTargetParallelGenericLoopDirective( OMPTargetParallelGenericLoopDirective *D) { DeclarationNameInfo DirName; - getDerived().getSema().StartOpenMPDSABlock(OMPD_target_parallel_loop, DirName, - nullptr, D->getBeginLoc()); + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_target_parallel_loop, DirName, nullptr, D->getBeginLoc()); StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().EndOpenMPDSABlock(Res.get()); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); return Res; } @@ -10972,7 +10985,7 @@ TreeTransform::TransformOMPExclusiveClause(OMPExclusiveClause *C) { template OMPClause *TreeTransform::TransformOMPUsesAllocatorsClause( OMPUsesAllocatorsClause *C) { - SmallVector Data; + SmallVector Data; Data.reserve(C->getNumberOfAllocators()); for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); @@ -10985,7 +10998,7 @@ OMPClause *TreeTransform::TransformOMPUsesAllocatorsClause( if (AllocatorTraits.isInvalid()) continue; } - Sema::UsesAllocatorsData &NewD = Data.emplace_back(); + SemaOpenMP::UsesAllocatorsData &NewD = Data.emplace_back(); NewD.Allocator = Allocator.get(); NewD.AllocatorTraits = AllocatorTraits.get(); NewD.LParenLoc = D.LParenLoc; @@ -11075,6 +11088,77 @@ OMPClause *TreeTransform::TransformOMPXBareClause(OMPXBareClause *C) { //===----------------------------------------------------------------------===// // OpenACC transformation //===----------------------------------------------------------------------===// +namespace { +template +class OpenACCClauseTransform final + : public OpenACCClauseVisitor> { + TreeTransform &Self; + SemaOpenACC::OpenACCParsedClause &ParsedClause; + OpenACCClause *NewClause = nullptr; + +public: + OpenACCClauseTransform(TreeTransform &Self, + SemaOpenACC::OpenACCParsedClause &PC) + : Self(Self), ParsedClause(PC) {} + + OpenACCClause *CreatedClause() const { return NewClause; } + +#define VISIT_CLAUSE(CLAUSE_NAME) \ + void Visit##CLAUSE_NAME##Clause(const OpenACC##CLAUSE_NAME##Clause &Clause); +#include "clang/Basic/OpenACCClauses.def" +}; + +template +void OpenACCClauseTransform::VisitDefaultClause( + const OpenACCDefaultClause &C) { + ParsedClause.setDefaultDetails(C.getDefaultClauseKind()); + + NewClause = OpenACCDefaultClause::Create( + Self.getSema().getASTContext(), ParsedClause.getDefaultClauseKind(), + ParsedClause.getBeginLoc(), ParsedClause.getLParenLoc(), + ParsedClause.getEndLoc()); +} + +template +void OpenACCClauseTransform::VisitIfClause(const OpenACCIfClause &C) { + Expr *Cond = const_cast(C.getConditionExpr()); + assert(Cond && "If constructed with invalid Condition"); + Sema::ConditionResult Res = Self.TransformCondition( + Cond->getExprLoc(), /*Var=*/nullptr, Cond, Sema::ConditionKind::Boolean); + + if (Res.isInvalid() || !Res.get().second) + return; + + ParsedClause.setConditionDetails(Res.get().second); + + NewClause = OpenACCIfClause::Create( + Self.getSema().getASTContext(), ParsedClause.getBeginLoc(), + ParsedClause.getLParenLoc(), ParsedClause.getConditionExpr(), + ParsedClause.getEndLoc()); +} + +template +void OpenACCClauseTransform::VisitSelfClause( + const OpenACCSelfClause &C) { + + if (C.hasConditionExpr()) { + Expr *Cond = const_cast(C.getConditionExpr()); + Sema::ConditionResult Res = + Self.TransformCondition(Cond->getExprLoc(), /*Var=*/nullptr, Cond, + Sema::ConditionKind::Boolean); + + if (Res.isInvalid() || !Res.get().second) + return; + + ParsedClause.setConditionDetails(Res.get().second); + } + + NewClause = OpenACCSelfClause::Create( + Self.getSema().getASTContext(), ParsedClause.getBeginLoc(), + ParsedClause.getLParenLoc(), ParsedClause.getConditionExpr(), + ParsedClause.getEndLoc()); +} +} // namespace template OpenACCClause *TreeTransform::TransformOpenACCClause( ArrayRef ExistingClauses, @@ -11087,33 +11171,10 @@ OpenACCClause *TreeTransform::TransformOpenACCClause( if (const auto *WithParms = dyn_cast(OldClause)) ParsedClause.setLParenLoc(WithParms->getLParenLoc()); - switch (OldClause->getClauseKind()) { - case OpenACCClauseKind::Default: - // There is nothing to do here as nothing dependent can appear in this - // clause. So just set the values so Sema can set the right value. - ParsedClause.setDefaultDetails( - cast(OldClause)->getDefaultClauseKind()); - break; - case OpenACCClauseKind::If: { - Expr *Cond = const_cast( - cast(OldClause)->getConditionExpr()); - assert(Cond && "If constructed with invalid Condition"); - Sema::ConditionResult Res = - TransformCondition(Cond->getExprLoc(), /*Var=*/nullptr, Cond, - Sema::ConditionKind::Boolean); - - if (Res.isInvalid() || !Res.get().second) - return nullptr; - - ParsedClause.setConditionDetails(Res.get().second); - break; - } - default: - assert(false && "Unhandled OpenACC clause in TreeTransform"); - return nullptr; - } + OpenACCClauseTransform Transform{*this, ParsedClause}; + Transform.Visit(OldClause); - return getSema().OpenACC().ActOnClause(ExistingClauses, ParsedClause); + return Transform.CreatedClause(); } template @@ -11667,7 +11728,7 @@ template ExprResult TreeTransform::TransformOMPIteratorExpr(OMPIteratorExpr *E) { unsigned NumIterators = E->numOfIterators(); - SmallVector Data(NumIterators); + SmallVector Data(NumIterators); bool ErrorFound = false; bool NeedToRebuild = getDerived().AlwaysRebuild(); @@ -11802,7 +11863,8 @@ TreeTransform::TransformMemberExpr(MemberExpr *E) { // Skip for member expression of (this->f), rebuilt thisi->f is needed // for Openmp where the field need to be privatizized in the case. if (!(isa(E->getBase()) && - getSema().isOpenMPRebuildMemberExpr(cast(Member)))) { + getSema().OpenMP().isOpenMPRebuildMemberExpr( + cast(Member)))) { // Mark it referenced in the new context regardless. // FIXME: this is a bit instantiation-specific. SemaRef.MarkMemberReferenced(E); @@ -12802,6 +12864,19 @@ TreeTransform::TransformCXXNewExpr(CXXNewExpr *E) { ArraySize = NewArraySize.get(); } + // Per C++0x [expr.new]p5, the type being constructed may be a + // typedef of an array type. + QualType AllocType = AllocTypeInfo->getType(); + if (ArraySize) { + if (const ConstantArrayType *Array = + SemaRef.Context.getAsConstantArrayType(AllocType)) { + ArraySize = IntegerLiteral::Create(SemaRef.Context, Array->getSize(), + SemaRef.Context.getSizeType(), + E->getBeginLoc()); + AllocType = Array->getElementType(); + } + } + // Transform the placement arguments (if any). bool ArgumentChanged = false; SmallVector PlacementArgs; @@ -12863,7 +12938,6 @@ TreeTransform::TransformCXXNewExpr(CXXNewExpr *E) { return E; } - QualType AllocType = AllocTypeInfo->getType(); if (!ArraySize) { // If no array size was specified, but the new expression was // instantiated with an array type (e.g., "new T" where T is diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index f47d540ea4b86d..b28df03b4a95e9 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -31,7 +31,6 @@ #include "clang/AST/ExternalASTSource.h" #include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/ODRDiagsEmitter.h" -#include "clang/AST/ODRHash.h" #include "clang/AST/OpenACCClause.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/RawCommentList.h" @@ -11781,6 +11780,12 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() { return OpenACCIfClause::Create(getContext(), BeginLoc, LParenLoc, CondExpr, EndLoc); } + case OpenACCClauseKind::Self: { + SourceLocation LParenLoc = readSourceLocation(); + Expr *CondExpr = readBool() ? readSubExpr() : nullptr; + return OpenACCSelfClause::Create(getContext(), BeginLoc, LParenLoc, + CondExpr, EndLoc); + } case OpenACCClauseKind::Finalize: case OpenACCClauseKind::IfPresent: case OpenACCClauseKind::Seq: @@ -11789,7 +11794,6 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() { case OpenACCClauseKind::Worker: case OpenACCClauseKind::Vector: case OpenACCClauseKind::NoHost: - case OpenACCClauseKind::Self: case OpenACCClauseKind::Copy: case OpenACCClauseKind::UseDevice: case OpenACCClauseKind::Attach: diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index ce6fa1feb1eeb3..b2a078b6d80f46 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7524,6 +7524,14 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { AddStmt(const_cast(IC->getConditionExpr())); return; } + case OpenACCClauseKind::Self: { + const auto *SC = cast(C); + writeSourceLocation(SC->getLParenLoc()); + writeBool(SC->hasConditionExpr()); + if (SC->hasConditionExpr()) + AddStmt(const_cast(SC->getConditionExpr())); + return; + } case OpenACCClauseKind::Finalize: case OpenACCClauseKind::IfPresent: case OpenACCClauseKind::Seq: @@ -7532,7 +7540,6 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { case OpenACCClauseKind::Worker: case OpenACCClauseKind::Vector: case OpenACCClauseKind::NoHost: - case OpenACCClauseKind::Self: case OpenACCClauseKind::Copy: case OpenACCClauseKind::UseDevice: case OpenACCClauseKind::Attach: diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index d0d49bcdf991a9..c6db107e0ca429 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -16,7 +16,6 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclVisitor.h" #include "clang/AST/Expr.h" -#include "clang/AST/ODRHash.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/PrettyDeclStackTrace.h" #include "clang/Basic/SourceManager.h" diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index e3816181e2b2b9..a736a7b0ef726c 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -19,7 +19,6 @@ #include "clang/AST/ExprOpenMP.h" #include "clang/AST/StmtVisitor.h" #include "clang/Lex/Token.h" -#include "clang/Sema/DeclSpec.h" #include "clang/Serialization/ASTRecordWriter.h" #include "llvm/Bitstream/BitstreamWriter.h" using namespace clang; diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp index 2fece29f34487e..bed74399098d7f 100644 --- a/clang/lib/Serialization/GeneratePCH.cpp +++ b/clang/lib/Serialization/GeneratePCH.cpp @@ -17,7 +17,6 @@ #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/SemaConsumer.h" -#include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ASTWriter.h" #include "llvm/Bitstream/BitstreamWriter.h" diff --git a/clang/lib/Serialization/GlobalModuleIndex.cpp b/clang/lib/Serialization/GlobalModuleIndex.cpp index 8ff10f6a8621e8..f09ceb8d316206 100644 --- a/clang/lib/Serialization/GlobalModuleIndex.cpp +++ b/clang/lib/Serialization/GlobalModuleIndex.cpp @@ -13,7 +13,6 @@ #include "clang/Serialization/GlobalModuleIndex.h" #include "ASTReaderInternals.h" #include "clang/Basic/FileManager.h" -#include "clang/Lex/HeaderSearch.h" #include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ModuleFile.h" #include "clang/Serialization/PCHContainerOperations.h" diff --git a/clang/lib/Serialization/ModuleFileExtension.cpp b/clang/lib/Serialization/ModuleFileExtension.cpp index 95fff41e0d7a85..729529b5fca18c 100644 --- a/clang/lib/Serialization/ModuleFileExtension.cpp +++ b/clang/lib/Serialization/ModuleFileExtension.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// #include "clang/Serialization/ModuleFileExtension.h" -#include "llvm/ADT/Hashing.h" + using namespace clang; char ModuleFileExtension::ID = 0; diff --git a/clang/lib/Serialization/PCHContainerOperations.cpp b/clang/lib/Serialization/PCHContainerOperations.cpp index 56ca3394385b4f..4aedb7debcff28 100644 --- a/clang/lib/Serialization/PCHContainerOperations.cpp +++ b/clang/lib/Serialization/PCHContainerOperations.cpp @@ -12,8 +12,6 @@ #include "clang/Serialization/PCHContainerOperations.h" #include "clang/AST/ASTConsumer.h" -#include "clang/Lex/ModuleLoader.h" -#include "llvm/Bitstream/BitstreamReader.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/clang/lib/StaticAnalyzer/Checkers/cert/InvalidPtrChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/cert/InvalidPtrChecker.cpp index e5dd907c660d8e..fefe846b6911f7 100644 --- a/clang/lib/StaticAnalyzer/Checkers/cert/InvalidPtrChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/cert/InvalidPtrChecker.cpp @@ -48,14 +48,19 @@ class InvalidPtrChecker bool InvalidatingGetEnv = false; // GetEnv can be treated invalidating and non-invalidating as well. - const CallDescription GetEnvCall{{"getenv"}, 1}; + const CallDescription GetEnvCall{CDM::CLibrary, {"getenv"}, 1}; const CallDescriptionMap EnvpInvalidatingFunctions = { - {{{"setenv"}, 3}, &InvalidPtrChecker::EnvpInvalidatingCall}, - {{{"unsetenv"}, 1}, &InvalidPtrChecker::EnvpInvalidatingCall}, - {{{"putenv"}, 1}, &InvalidPtrChecker::EnvpInvalidatingCall}, - {{{"_putenv_s"}, 2}, &InvalidPtrChecker::EnvpInvalidatingCall}, - {{{"_wputenv_s"}, 2}, &InvalidPtrChecker::EnvpInvalidatingCall}, + {{CDM::CLibrary, {"setenv"}, 3}, + &InvalidPtrChecker::EnvpInvalidatingCall}, + {{CDM::CLibrary, {"unsetenv"}, 1}, + &InvalidPtrChecker::EnvpInvalidatingCall}, + {{CDM::CLibrary, {"putenv"}, 1}, + &InvalidPtrChecker::EnvpInvalidatingCall}, + {{CDM::CLibrary, {"_putenv_s"}, 2}, + &InvalidPtrChecker::EnvpInvalidatingCall}, + {{CDM::CLibrary, {"_wputenv_s"}, 2}, + &InvalidPtrChecker::EnvpInvalidatingCall}, }; void postPreviousReturnInvalidatingCall(const CallEvent &Call, @@ -63,13 +68,13 @@ class InvalidPtrChecker // SEI CERT ENV34-C const CallDescriptionMap PreviousCallInvalidatingFunctions = { - {{{"setlocale"}, 2}, + {{CDM::CLibrary, {"setlocale"}, 2}, &InvalidPtrChecker::postPreviousReturnInvalidatingCall}, - {{{"strerror"}, 1}, + {{CDM::CLibrary, {"strerror"}, 1}, &InvalidPtrChecker::postPreviousReturnInvalidatingCall}, - {{{"localeconv"}, 0}, + {{CDM::CLibrary, {"localeconv"}, 0}, &InvalidPtrChecker::postPreviousReturnInvalidatingCall}, - {{{"asctime"}, 1}, + {{CDM::CLibrary, {"asctime"}, 1}, &InvalidPtrChecker::postPreviousReturnInvalidatingCall}, }; @@ -205,8 +210,12 @@ void InvalidPtrChecker::postPreviousReturnInvalidatingCall( CE, LCtx, CE->getType(), C.blockCount()); State = State->BindExpr(CE, LCtx, RetVal); + const auto *SymRegOfRetVal = + dyn_cast_or_null(RetVal.getAsRegion()); + if (!SymRegOfRetVal) + return; + // Remember to this region. - const auto *SymRegOfRetVal = cast(RetVal.getAsRegion()); const MemRegion *MR = SymRegOfRetVal->getBaseRegion(); State = State->set(FD, MR); diff --git a/clang/test/AST/Interp/vectors.cpp b/clang/test/AST/Interp/vectors.cpp index 6c5d916f51f563..5c4694f122d812 100644 --- a/clang/test/AST/Interp/vectors.cpp +++ b/clang/test/AST/Interp/vectors.cpp @@ -1,10 +1,23 @@ // RUN: %clang_cc1 -fexperimental-new-constant-interpreter -verify=expected,both %s // RUN: %clang_cc1 -verify=ref,both %s -// ref-no-diagnostics - typedef int __attribute__((vector_size(16))) VI4; constexpr VI4 A = {1,2,3,4}; +static_assert(A[0] == 1, ""); // ref-error {{not an integral constant expression}} +static_assert(A[1] == 2, ""); // ref-error {{not an integral constant expression}} +static_assert(A[2] == 3, ""); // ref-error {{not an integral constant expression}} +static_assert(A[3] == 4, ""); // ref-error {{not an integral constant expression}} + +/// VectorSplat casts +typedef __attribute__(( ext_vector_type(4) )) float float4; +constexpr float4 vec4_0 = (float4)0.5f; +static_assert(vec4_0[0] == 0.5, ""); // ref-error {{not an integral constant expression}} +static_assert(vec4_0[1] == 0.5, ""); // ref-error {{not an integral constant expression}} +static_assert(vec4_0[2] == 0.5, ""); // ref-error {{not an integral constant expression}} +static_assert(vec4_0[3] == 0.5, ""); // ref-error {{not an integral constant expression}} +constexpr int vec4_0_discarded = ((float4)12.0f, 0); + + /// From constant-expression-cxx11.cpp namespace Vector { diff --git a/clang/test/Analysis/invalid-ptr-checker.cpp b/clang/test/Analysis/invalid-ptr-checker.cpp new file mode 100644 index 00000000000000..58bb45e0fb8421 --- /dev/null +++ b/clang/test/Analysis/invalid-ptr-checker.cpp @@ -0,0 +1,10 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core,security.cert.env.InvalidPtr -verify %s + +// expected-no-diagnostics + +namespace other { +int strerror(int errnum); // custom strerror +void no_crash_on_custom_strerror() { + (void)strerror(0); // no-crash +} +} // namespace other diff --git a/clang/test/CXX/drs/dr0xx.cpp b/clang/test/CXX/drs/dr0xx.cpp index a304862885c640..6c600bbc7c3f6e 100644 --- a/clang/test/CXX/drs/dr0xx.cpp +++ b/clang/test/CXX/drs/dr0xx.cpp @@ -5,6 +5,11 @@ // RUN: %clang_cc1 -std=c++20 %s -verify=expected,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors -triple %itanium_abi_triple // RUN: %clang_cc1 -std=c++23 %s -verify=expected,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors -triple %itanium_abi_triple +#if __cplusplus == 199711L +#define static_assert(...) __extension__ _Static_assert(__VA_ARGS__) +// cxx98-error@-1 {{variadic macros are a C99 feature}} +#endif + namespace cwg1 { // cwg1: no namespace X { extern "C" void cwg1_f(int a = 1); } namespace Y { extern "C" void cwg1_f(int a = 1); } @@ -897,7 +902,7 @@ namespace cwg54 { // cwg54: 2.8 namespace cwg55 { // cwg55: yes enum E { e = 5 }; - int test[(e + 1 == 6) ? 1 : -1]; + static_assert(e + 1 == 6, ""); } namespace cwg56 { // cwg56: yes @@ -1163,10 +1168,9 @@ namespace cwg75 { // cwg75: yes namespace cwg76 { // cwg76: yes const volatile int n = 1; - int arr[n]; // #cwg76-vla - // expected-error@#cwg76-vla {{variable length arrays in C++ are a Clang extension}} - // expected-note@#cwg76-vla {{read of volatile-qualified type 'const volatile int' is not allowed in a constant expression}} - // expected-error@#cwg76-vla {{variable length array declaration not allowed at file scope}} + static_assert(n, ""); + // expected-error@-1 {{static assertion expression is not an integral constant expression}} + // expected-note@-2 {{read of volatile-qualified type 'const volatile int' is not allowed in a constant expression}} } namespace cwg77 { // cwg77: yes diff --git a/clang/test/CXX/drs/dr16xx.cpp b/clang/test/CXX/drs/dr16xx.cpp index 6d7bb7619f8b8b..cf6b45ceabf2cc 100644 --- a/clang/test/CXX/drs/dr16xx.cpp +++ b/clang/test/CXX/drs/dr16xx.cpp @@ -153,10 +153,9 @@ namespace cwg1645 { // cwg1645: 3.9 namespace cwg1652 { // cwg1652: 3.6 int a, b; - int arr[&a + 1 == &b ? 1 : 2]; - // expected-error@-1 {{variable length arrays in C++ are a Clang extension}} + static_assert(&a + 1 == &b, ""); + // expected-error@-1 {{static assertion expression is not an integral constant expression}} // expected-note@-2 {{comparison against pointer '&a + 1' that points past the end of a complete object has unspecified value}} - // expected-error@-3 {{variable length array declaration not allowed at file scope}} } namespace cwg1653 { // cwg1653: 4 c++17 diff --git a/clang/test/CXX/drs/dr1xx.cpp b/clang/test/CXX/drs/dr1xx.cpp index 5b497dda047d6a..a8f9b705a98660 100644 --- a/clang/test/CXX/drs/dr1xx.cpp +++ b/clang/test/CXX/drs/dr1xx.cpp @@ -5,6 +5,17 @@ // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors // RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors +#if __cplusplus == 199711L +#define static_assert(...) __extension__ _Static_assert(__VA_ARGS__) +// cxx98-error@-1 {{variadic macros are a C99 feature}} +#endif + +#if __cplusplus == 199711L +#define __enable_constant_folding(x) (__builtin_constant_p(x) ? (x) : (x)) +#else +#define __enable_constant_folding +#endif + namespace cwg100 { // cwg100: yes template struct A {}; // #cwg100-A template struct B {}; // #cwg100-B @@ -736,8 +747,8 @@ namespace cwg147 { // cwg147: yes namespace cwg148 { // cwg148: yes struct A { int A::*p; }; - int check1[__is_pod(int(A::*)) ? 1 : -1]; - int check2[__is_pod(A) ? 1 : -1]; + static_assert(__is_pod(int(A::*)), ""); + static_assert(__is_pod(A), ""); } // cwg149: na @@ -745,13 +756,7 @@ namespace cwg148 { // cwg148: yes namespace cwg151 { // cwg151: 3.1 struct X {}; typedef int X::*p; -#if __cplusplus < 201103L -#define fold(x) (__builtin_constant_p(0) ? (x) : (x)) -#else -#define fold -#endif - int check[fold(p() == 0) ? 1 : -1]; -#undef fold + static_assert(__enable_constant_folding(p() == 0), ""); } namespace cwg152 { // cwg152: yes @@ -956,42 +961,42 @@ namespace cwg171 { namespace cwg172 { // cwg172: yes enum { zero }; - int check1[-1 < zero ? 1 : -1]; + static_assert(-1 < zero, ""); enum { x = -1, y = (unsigned int)-1 }; - int check2[sizeof(x) > sizeof(int) ? 1 : -1]; + static_assert(sizeof(x) > sizeof(int), ""); enum { a = (unsigned int)-1 / 2 }; - int check3a[sizeof(a) == sizeof(int) ? 1 : -1]; - int check3b[-a < 0 ? 1 : -1]; + static_assert(sizeof(a) == sizeof(int), ""); + static_assert(-a < 0, ""); enum { b = (unsigned int)-1 / 2 + 1 }; - int check4a[sizeof(b) == sizeof(unsigned int) ? 1 : -1]; - int check4b[-b > 0 ? 1 : -1]; + static_assert(sizeof(b) == sizeof(unsigned int), ""); + static_assert(-b > 0, ""); enum { c = (unsigned long)-1 / 2 }; - int check5a[sizeof(c) == sizeof(long) ? 1 : -1]; - int check5b[-c < 0 ? 1 : -1]; + static_assert(sizeof(c) == sizeof(long), ""); + static_assert(-c < 0, ""); enum { d = (unsigned long)-1 / 2 + 1 }; - int check6a[sizeof(d) == sizeof(unsigned long) ? 1 : -1]; - int check6b[-d > 0 ? 1 : -1]; + static_assert(sizeof(d) == sizeof(unsigned long), ""); + static_assert(-d > 0, ""); enum { e = (unsigned long long)-1 / 2 }; // cxx98-error@-1 {{'long long' is a C++11 extension}} - int check7a[sizeof(e) == sizeof(long) ? 1 : -1]; - int check7b[-e < 0 ? 1 : -1]; + static_assert(sizeof(e) == sizeof(long), ""); + static_assert(-e < 0, ""); enum { f = (unsigned long long)-1 / 2 + 1 }; // cxx98-error@-1 {{'long long' is a C++11 extension}} - int check8a[sizeof(f) == sizeof(unsigned long) ? 1 : -1]; - int check8b[-f > 0 ? 1 : -1]; + static_assert(sizeof(f) == sizeof(unsigned long), ""); + static_assert(-f > 0, ""); } namespace cwg173 { // cwg173: yes - int check[('0' + 1 == '1' && '0' + 2 == '2' && '0' + 3 == '3' && - '0' + 4 == '4' && '0' + 5 == '5' && '0' + 6 == '6' && - '0' + 7 == '7' && '0' + 8 == '8' && '0' + 9 == '9') ? 1 : -1]; + static_assert('0' + 1 == '1' && '0' + 2 == '2' && '0' + 3 == '3' && + '0' + 4 == '4' && '0' + 5 == '5' && '0' + 6 == '6' && + '0' + 7 == '7' && '0' + 8 == '8' && '0' + 9 == '9', ""); } // cwg174: sup 1012 @@ -1070,7 +1075,7 @@ namespace cwg177 { // cwg177: yes } namespace cwg178 { // cwg178: yes - int check[int() == 0 ? 1 : -1]; + static_assert(int() == 0, ""); #if __cplusplus >= 201103L static_assert(int{} == 0, ""); struct S { int a, b; }; @@ -1180,7 +1185,7 @@ namespace cwg187 { // cwg187: sup 481 namespace cwg188 { // cwg188: yes char c[10]; - int check[sizeof(0, c) == 10 ? 1 : -1]; + static_assert(sizeof(0, c) == 10, ""); } // cwg190 FIXME: add codegen test for tbaa diff --git a/clang/test/CXX/drs/dr2xx.cpp b/clang/test/CXX/drs/dr2xx.cpp index e655e7226d51d6..5d3e8ce4bea3bc 100644 --- a/clang/test/CXX/drs/dr2xx.cpp +++ b/clang/test/CXX/drs/dr2xx.cpp @@ -10,10 +10,15 @@ typedef __SIZE_TYPE__ size_t; // cxx98-error@-1 0-1 {{'long long' is a C++11 extension}} -#if __cplusplus < 201103L -#define fold(x) (__builtin_constant_p(x) ? (x) : (x)) +#if __cplusplus == 199711L +#define static_assert(...) __extension__ _Static_assert(__VA_ARGS__) +// cxx98-error@-1 {{variadic macros are a C99 feature}} +#endif + +#if __cplusplus == 199711L +#define __enable_constant_folding(x) (__builtin_constant_p(x) ? (x) : (x)) #else -#define fold +#define __enable_constant_folding #endif namespace cwg200 { // cwg200: dup 214 @@ -31,7 +36,7 @@ namespace cwg200 { // cwg200: dup 214 namespace cwg202 { // cwg202: 3.1 template T f(); template struct X { - int arr[fold(g == &f) ? 1 : -1]; + static_assert(__enable_constant_folding(g == &f), ""); }; template struct X; } @@ -1024,7 +1029,7 @@ namespace cwg275 { // cwg275: no namespace cwg277 { // cwg277: 3.1 typedef int *intp; int *p = intp(); - int a[fold(intp() ? -1 : 1)]; + static_assert(__enable_constant_folding(!intp()), ""); } namespace cwg280 { // cwg280: 2.9 diff --git a/clang/test/CXX/drs/dr3xx.cpp b/clang/test/CXX/drs/dr3xx.cpp index 6d1c6958ac8eb6..3e9228fe21fb64 100644 --- a/clang/test/CXX/drs/dr3xx.cpp +++ b/clang/test/CXX/drs/dr3xx.cpp @@ -5,6 +5,17 @@ // RUN: %clang_cc1 -std=c++11 -verify=expected,cxx98-14,cxx98-17,cxx98-20,cxx11-14,since-cxx11 -triple %itanium_abi_triple %s -fexceptions -fcxx-exceptions -pedantic-errors // RUN: %clang_cc1 -std=c++98 -verify=expected,cxx98-14,cxx98-17,cxx98-20,cxx98 -triple %itanium_abi_triple %s -fexceptions -fcxx-exceptions -pedantic-errors +#if __cplusplus == 199711L +#define static_assert(...) __extension__ _Static_assert(__VA_ARGS__) +// cxx98-error@-1 {{variadic macros are a C99 feature}} +#endif + +#if __cplusplus == 199711L +#define __enable_constant_folding(x) (__builtin_constant_p(x) ? (x) : (x)) +#else +#define __enable_constant_folding +#endif + namespace cwg300 { // cwg300: yes template void f(R (&)(A)) {} int g(int); @@ -396,7 +407,7 @@ namespace cwg324 { // cwg324: 3.6 namespace cwg326 { // cwg326: 3.1 struct S {}; - int test[__is_trivially_constructible(S, const S&) ? 1 : -1]; + static_assert(__is_trivially_constructible(S, const S&), ""); } namespace cwg327 { // cwg327: dup 538 @@ -653,7 +664,7 @@ namespace cwg339 { // cwg339: 2.8 template A make_A(); - int a[conv_int::value ? 1 : -1]; + static_assert(conv_int::value, ""); bool b = conv_int2(A<1>()); A<1> c = make_A(); } @@ -1099,21 +1110,14 @@ namespace cwg364 { // cwg364: yes #endif namespace cwg367 { // cwg367: yes - // FIXME: These diagnostics are terrible. Don't diagnose an ill-formed global - // array as being a VLA! - int a[true ? throw 0 : 4]; - // expected-error@-1 {{variable length arrays in C++ are a Clang extension}} - // expected-error@-2 {{variable length array declaration not allowed at file scope}} - int b[true ? 4 : throw 0]; - // cxx98-error@-1 {{variable length arrays in C++ are a Clang extension}} - // cxx98-error@-2 {{variable length array folded to constant array as an extension}} - int c[true ? *new int : 4]; - // expected-error@-1 {{variable length arrays in C++ are a Clang extension}} + static_assert(__enable_constant_folding(true ? throw 0 : 4), ""); + // expected-error@-1 {{expression is not an integral constant expression}} + static_assert(__enable_constant_folding(true ? 4 : throw 0), ""); + static_assert(__enable_constant_folding(true ? *new int : 4), ""); + // expected-error@-1 {{expression is not an integral constant expression}} // expected-note@-2 {{read of uninitialized object is not allowed in a constant expression}} - // expected-error@-3 {{variable length array declaration not allowed at file scope}} - int d[true ? 4 : *new int]; - // cxx98-error@-1 {{variable length arrays in C++ are a Clang extension}} - // cxx98-error@-2 {{variable length array folded to constant array as an extension}} + static_assert(__enable_constant_folding(true ? 4 : *new int), ""); + } namespace cwg368 { // cwg368: 3.6 @@ -1325,7 +1329,7 @@ namespace cwg383 { // cwg383: yes struct B { ~B(); }; union C { C &operator=(const C&); }; union D { ~D(); }; - int check[(__is_pod(A) || __is_pod(B) || __is_pod(C) || __is_pod(D)) ? -1 : 1]; + static_assert(!__is_pod(A) && !__is_pod(B) && !__is_pod(C) && !__is_pod(D), ""); } namespace cwg384 { // cwg384: yes diff --git a/clang/test/CXX/drs/dr4xx.cpp b/clang/test/CXX/drs/dr4xx.cpp index 611b791470785d..07162cc28f6b60 100644 --- a/clang/test/CXX/drs/dr4xx.cpp +++ b/clang/test/CXX/drs/dr4xx.cpp @@ -6,6 +6,11 @@ // RUN: env ASAN_OPTIONS=detect_stack_use_after_return=0 %clang_cc1 -std=c++23 %s -verify=expected,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors // RUN: env ASAN_OPTIONS=detect_stack_use_after_return=0 %clang_cc1 -std=c++2c %s -verify=expected,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +#if __cplusplus == 199711L +#define static_assert(...) __extension__ _Static_assert(__VA_ARGS__) +// cxx98-error@-1 {{variadic macros are a C99 feature}} +#endif + // FIXME: __SIZE_TYPE__ expands to 'long long' on some targets. __extension__ typedef __SIZE_TYPE__ size_t; @@ -217,7 +222,7 @@ namespace cwg407 { // cwg407: 3.8 } namespace cwg408 { // cwg408: 3.4 - template void g() { int arr[N != 1 ? 1 : -1]; } + template void g() { static_assert(N != 1, ""); } template<> void g<2>() { } template struct S { @@ -239,7 +244,7 @@ namespace cwg408 { // cwg408: 3.4 }; template int R::arr[1]; template void R::f() { - int arr[sizeof(arr) != sizeof(int) ? 1 : -1]; + static_assert(sizeof(arr) != sizeof(int), ""); } template<> int R::arr[2]; template void R::f(); @@ -842,11 +847,10 @@ namespace cwg451 { // cwg451: yes // expected-warning@-1 {{division by zero is undefined}} const int b = 1 / 0; // #cwg451-b // expected-warning@-1 {{division by zero is undefined}} - int arr[b]; // #cwg451-arr - // expected-error@-1 {{variable length arrays in C++ are a Clang extension}} + static_assert(b, ""); + // expected-error@-1 {{expression is not an integral constant expression}} // expected-note@-2 {{initializer of 'b' is not a constant expression}} // expected-note@#cwg451-b {{declared here}} - // expected-error@#cwg451-arr {{variable length array declaration not allowed at file scope}} } namespace cwg452 { // cwg452: yes @@ -876,11 +880,10 @@ namespace cwg456 { // cwg456: yes namespace cwg457 { // cwg457: yes const int a = 1; const volatile int b = 1; - int ax[a]; - int bx[b]; - // expected-error@-1 {{variable length arrays in C++ are a Clang extension}} + static_assert(a, ""); + static_assert(b, ""); + // expected-error@-1 {{expression is not an integral constant expression}} // expected-note@-2 {{read of volatile-qualified type 'const volatile int' is not allowed in a constant expression}} - // expected-error@-3 {{variable length array declaration not allowed at file scope}} enum E { ea = a, @@ -1276,20 +1279,18 @@ namespace cwg482 { // cwg482: 3.5 namespace cwg483 { // cwg483: yes namespace climits { - int check1[__SCHAR_MAX__ >= 127 ? 1 : -1]; - int check2[__SHRT_MAX__ >= 32767 ? 1 : -1]; - int check3[__INT_MAX__ >= 32767 ? 1 : -1]; - int check4[__LONG_MAX__ >= 2147483647 ? 1 : -1]; - int check5[__LONG_LONG_MAX__ >= 9223372036854775807 ? 1 : -1]; - // cxx98-error@-1 {{'long long' is a C++11 extension}} - // cxx98-error@-2 0-1{{'long long' is a C++11 extension}} + static_assert(__SCHAR_MAX__ >= 127, ""); + static_assert(__SHRT_MAX__ >= 32767, ""); + static_assert(__INT_MAX__ >= 32767, ""); + static_assert(__LONG_MAX__ >= 2147483647, ""); + static_assert(__LONG_LONG_MAX__ >= 9223372036854775807, ""); } namespace cstdint { - int check1[__PTRDIFF_WIDTH__ >= 16 ? 1 : -1]; - int check2[__SIG_ATOMIC_WIDTH__ >= 8 ? 1 : -1]; - int check3[__SIZE_WIDTH__ >= 16 ? 1 : -1]; - int check4[__WCHAR_WIDTH__ >= 8 ? 1 : -1]; - int check5[__WINT_WIDTH__ >= 16 ? 1 : -1]; + static_assert(__PTRDIFF_WIDTH__ >= 16, ""); + static_assert(__SIG_ATOMIC_WIDTH__ >= 8, ""); + static_assert(__SIZE_WIDTH__ >= 16, ""); + static_assert(__WCHAR_WIDTH__ >= 8, ""); + static_assert(__WINT_WIDTH__ >= 16, ""); } } @@ -1366,11 +1367,10 @@ namespace cwg486 { // cwg486: yes namespace cwg487 { // cwg487: yes enum E { e }; int operator+(int, E); // #cwg487-operator-plus - int i[4 + e]; // #cwg487-i - // expected-error@-1 {{variable length arrays in C++ are a Clang extension}} + static_assert(4 + e, ""); + // expected-error@-1 {{expression is not an integral constant expression}} // since-cxx11-note@-2 {{non-constexpr function 'operator+' cannot be used in a constant expression}} // since-cxx11-note@#cwg487-operator-plus {{declared here}} - // expected-error@#cwg487-i {{variable length array declaration not allowed at file scope}} } namespace cwg488 { // cwg488: yes c++11 @@ -1485,13 +1485,13 @@ namespace cwg495 { // cwg495: 3.5 namespace cwg496 { // cwg496: sup 2094 struct A { int n; }; struct B { volatile int n; }; - int check1[ __is_trivially_copyable(const int) ? 1 : -1]; + static_assert(__is_trivially_copyable(const int), ""); // This checks the cwg2094 behavior, not cwg496 - int check2[ __is_trivially_copyable(volatile int) ? 1 : -1]; - int check3[ __is_trivially_constructible(A, const A&) ? 1 : -1]; - int check4[ __is_trivially_constructible(B, const B&) ? 1 : -1]; - int check5[ __is_trivially_assignable(A, const A&) ? 1 : -1]; - int check6[ __is_trivially_assignable(B, const B&) ? 1 : -1]; + static_assert(__is_trivially_copyable(volatile int), ""); + static_assert(__is_trivially_constructible(A, const A&), ""); + static_assert(__is_trivially_constructible(B, const B&), ""); + static_assert(__is_trivially_assignable(A, const A&), ""); + static_assert(__is_trivially_assignable(B, const B&), ""); } namespace cwg497 { // cwg497: sup 253 diff --git a/clang/test/CXX/drs/dr5xx.cpp b/clang/test/CXX/drs/dr5xx.cpp index 0fe64102d70b00..9d890f981348a7 100644 --- a/clang/test/CXX/drs/dr5xx.cpp +++ b/clang/test/CXX/drs/dr5xx.cpp @@ -5,6 +5,11 @@ // RUN: %clang_cc1 -std=c++20 %s -verify=expected,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors // RUN: %clang_cc1 -std=c++23 %s -verify=expected,since-cxx23,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors +#if __cplusplus == 199711L +#define static_assert(...) __extension__ _Static_assert(__VA_ARGS__) +// cxx98-error@-1 {{variadic macros are a C99 feature}} +#endif + // FIXME: This is included to avoid a diagnostic with no source location // pointing at the implicit operator new. We can't match such a diagnostic // with -verify. @@ -819,7 +824,7 @@ namespace cwg565 { // cwg565: yes namespace cwg566 { // cwg566: yes #if __cplusplus >= 201103L - int check[int(-3.99) == -3 ? 1 : -1]; + static_assert(int(-3.99) == -3, ""); #endif } @@ -834,7 +839,7 @@ namespace cwg568 { // cwg568: 3.0 c++11 public: int n; }; - int check_trivial[__is_trivial(trivial) ? 1 : -1]; + static_assert(__is_trivial(trivial), ""); struct std_layout { std_layout(); @@ -843,7 +848,7 @@ namespace cwg568 { // cwg568: 3.0 c++11 private: int n; }; - int check_std_layout[__is_standard_layout(std_layout) ? 1 : -1]; + static_assert(__is_standard_layout(std_layout), ""); struct aggregate { int x; @@ -885,7 +890,7 @@ namespace cwg570 { // cwg570: dup 633 namespace cwg572 { // cwg572: yes enum E { a = 1, b = 2 }; - int check[a + b == 3 ? 1 : -1]; + static_assert(a + b == 3, ""); } namespace cwg573 { // cwg573: no diff --git a/clang/test/CXX/drs/dr6xx.cpp b/clang/test/CXX/drs/dr6xx.cpp index 9d3613ae8589ea..069102d9c59750 100644 --- a/clang/test/CXX/drs/dr6xx.cpp +++ b/clang/test/CXX/drs/dr6xx.cpp @@ -144,7 +144,7 @@ namespace cwg608 { // cwg608: yes struct D : B, C {}; } -int cwg610[-0u == 0u ? 1 : -1]; // cwg610: yes +static_assert(-0u == 0u, ""); // cwg610: yes namespace cwg611 { // cwg611: yes int k; @@ -190,8 +190,8 @@ namespace cwg613 { // cwg613: yes c++11 } } -int cwg614_a[(-1) / 2 == 0 ? 1 : -1]; // cwg614: yes -int cwg614_b[(-1) % 2 == -1 ? 1 : -1]; +static_assert((-1) / 2 == 0, ""); // cwg614: yes +static_assert((-1) % 2 == -1, ""); namespace cwg615 { // cwg615: yes int f(); diff --git a/clang/test/ClangScanDeps/error.cpp b/clang/test/ClangScanDeps/error.cpp index 0095a6c900c3b3..593dbf35edca52 100644 --- a/clang/test/ClangScanDeps/error.cpp +++ b/clang/test/ClangScanDeps/error.cpp @@ -1,23 +1,10 @@ // RUN: rm -rf %t // RUN: split-file %s %t -//--- missing_tu.json.in -[{ - "directory": "DIR", - "command": "clang -fsyntax-only DIR/missing_tu.c", - "file": "DIR/missing_tu.c" -}] -//--- missing_header.json.in -[{ - "directory": "DIR", - "command": "clang -fsyntax-only DIR/missing_header.c", - "file": "DIR/missing_header.c" -}] //--- missing_header.c #include "missing.h" -// RUN: sed -e "s|DIR|%/t|g" %t/missing_tu.json.in > %t/missing_tu.json -// RUN: not clang-scan-deps -compilation-database %t/missing_tu.json 2>%t/missing_tu.errs +// RUN: not clang-scan-deps -- %clang -c %t/missing_tu.c 2>%t/missing_tu.errs // RUN: echo EOF >> %t/missing_tu.errs // RUN: cat %t/missing_tu.errs | sed 's:\\\\\?:/:g' | FileCheck %s --check-prefix=CHECK-MISSING-TU -DPREFIX=%/t // CHECK-MISSING-TU: Error while scanning dependencies for [[PREFIX]]/missing_tu.c @@ -26,8 +13,7 @@ // CHECK-MISSING-TU-NEXT: error: // CHECK-MISSING-TU-NEXT: EOF -// RUN: sed -e "s|DIR|%/t|g" %t/missing_header.json.in > %t/missing_header.json -// RUN: not clang-scan-deps -compilation-database %t/missing_header.json 2>%t/missing_header.errs +// RUN: not clang-scan-deps -- %clang -c %t/missing_header.c 2>%t/missing_header.errs // RUN: echo EOF >> %t/missing_header.errs // RUN: cat %t/missing_header.errs | sed 's:\\\\\?:/:g' | FileCheck %s --check-prefix=CHECK-MISSING-HEADER -DPREFIX=%/t // CHECK-MISSING-HEADER: Error while scanning dependencies for [[PREFIX]]/missing_header.c diff --git a/clang/test/ClangScanDeps/module-format.c b/clang/test/ClangScanDeps/module-format.c index 001a011ae0b597..0a6abec80dd909 100644 --- a/clang/test/ClangScanDeps/module-format.c +++ b/clang/test/ClangScanDeps/module-format.c @@ -16,7 +16,7 @@ // RUN: rm -f %t/cdb_pch.json // RUN: sed "s|DIR|%/t|g" %S/Inputs/modules-pch/cdb_pch.json > %t/cdb_pch.json // RUN: clang-scan-deps -compilation-database %t/cdb_pch.json -format experimental-full \ -// RUN: -module-files-dir %t/build > %t/result_pch.json +// RUN: -module-files-dir %t/build -o %t/result_pch.json // Explicitly build the PCH: // diff --git a/clang/test/CodeGen/alias.cpp b/clang/test/CodeGen/alias.cpp index 17c1e1ae32f035..a468c31d369ed0 100644 --- a/clang/test/CodeGen/alias.cpp +++ b/clang/test/CodeGen/alias.cpp @@ -1,27 +1,42 @@ -// RUN: %clang_cc1 -triple x86_64-linux -verify -emit-llvm-only %s -// RUN: not %clang_cc1 -triple x86_64-linux -emit-llvm-only -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux -verify -emit-llvm-only -DERR %s +// RUN: not %clang_cc1 -triple x86_64-linux -emit-llvm-only -fdiagnostics-parseable-fixits -DERR %s 2>&1 | FileCheck %s --check-prefix=FIXIT +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm %s -o - | FileCheck %s +#ifdef ERR void *f1_ifunc(void) { return nullptr; } void f1(void) __attribute__((alias("f1_ifunc"))); // expected-error@-1 {{alias must point to a defined variable or function}} // expected-note@-2 {{must refer to its mangled name}} // expected-note@-3 {{function by that name is mangled as}} -// CHECK: fix-it:"{{.*}}":{[[@LINE-4]]:30-[[@LINE-4]]:47}:"alias(\"_Z8f1_ifuncv\")" +// FIXIT: fix-it:"{{.*}}":{[[@LINE-4]]:30-[[@LINE-4]]:47}:"alias(\"_Z8f1_ifuncv\")" void *f6_resolver_resolver(void) { return 0; } void *f6_resolver(void) __attribute__((alias("f6_resolver_resolver"))); // expected-error@-1 {{alias must point to a defined variable or function}} // expected-note@-2 {{must refer to its mangled name}} // expected-note@-3 {{function by that name is mangled as}} -// CHECK: fix-it:"{{.*}}":{[[@LINE-4]]:40-[[@LINE-4]]:69}:"alias(\"_Z20f6_resolver_resolverv\")" +// FIXIT: fix-it:"{{.*}}":{[[@LINE-4]]:40-[[@LINE-4]]:69}:"alias(\"_Z20f6_resolver_resolverv\")" void f6(void) __attribute__((alias("f6_resolver"))); // expected-error@-1 {{alias must point to a defined variable or function}} // expected-note@-2 {{must refer to its mangled name}} // expected-note@-3 {{function by that name is mangled as}} -// CHECK: fix-it:"{{.*}}":{[[@LINE-4]]:30-[[@LINE-4]]:50}:"alias(\"_Z11f6_resolverv\")" +// FIXIT: fix-it:"{{.*}}":{[[@LINE-4]]:30-[[@LINE-4]]:50}:"alias(\"_Z11f6_resolverv\")" __attribute__((unused, alias("resolver"), deprecated("hahahaha, isn't C great?"))) void func(); // expected-error@-2 {{alias must point to a defined variable or function}} // expected-note@-3 {{must refer to its mangled name}} +#endif +// CHECK: @_ZN4libc4log2Ed ={{.*}} alias double (double), ptr @log2 +// CHECK: define{{.*}} @log2( +namespace libc { double log2(double x); } +extern "C" double log2(double); +namespace std { using ::log2; } +using std::log2; + +namespace libc { +decltype(libc::log2) __log2_impl__ __asm__("log2"); +decltype(libc::log2) log2 [[gnu::alias("log2")]]; +double __log2_impl__(double x) { return x; } +} diff --git a/clang/test/CodeGen/builtin-allow-runtime-check.cpp b/clang/test/CodeGen/builtin-allow-runtime-check.cpp new file mode 100644 index 00000000000000..db3f59a9d48a1d --- /dev/null +++ b/clang/test/CodeGen/builtin-allow-runtime-check.cpp @@ -0,0 +1,29 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -cc1 -triple x86_64-pc-linux-gnu -emit-llvm -o - %s | FileCheck %s + +static_assert(__has_builtin(__builtin_allow_runtime_check), ""); + +// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z4testv( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.allow.runtime.check(metadata !"mycheck") +// CHECK-NEXT: ret i1 [[TMP0]] +// +bool test() { + return __builtin_allow_runtime_check("mycheck"); +} + +// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z10test_twicev( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.allow.runtime.check(metadata !"mycheck") +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.allow.runtime.check(metadata !"mycheck") +// CHECK-NEXT: [[CONV1:%.*]] = zext i1 [[TMP1]] to i32 +// CHECK-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[CONV1]] +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[OR]], 0 +// CHECK-NEXT: ret i1 [[TOBOOL]] +// +bool test_twice() { + return __builtin_allow_runtime_check("mycheck") | __builtin_allow_runtime_check("mycheck"); +} diff --git a/clang/test/CodeGenCXX/control-flow-in-stmt-expr.cpp b/clang/test/CodeGenCXX/control-flow-in-stmt-expr.cpp deleted file mode 100644 index 0a51b0e4121c33..00000000000000 --- a/clang/test/CodeGenCXX/control-flow-in-stmt-expr.cpp +++ /dev/null @@ -1,409 +0,0 @@ -// RUN: %clang_cc1 --std=c++20 -fexceptions -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefixes=EH %s -// RUN: %clang_cc1 --std=c++20 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefixes=NOEH,CHECK %s - -struct Printy { - Printy(const char *name) : name(name) {} - ~Printy() {} - const char *name; -}; - -int foo() { return 2; } - -struct Printies { - Printy a; - Printy b; - Printy c; -}; - -void ParenInit() { - // CHECK-LABEL: define dso_local void @_Z9ParenInitv() - // CHECK: [[CLEANUP_DEST:%.+]] = alloca i32, align 4 - Printies ps(Printy("a"), - // CHECK: call void @_ZN6PrintyC1EPKc - ({ - if (foo()) return; - // CHECK: if.then: - // CHECK-NEXT: store i32 1, ptr [[CLEANUP_DEST]], align 4 - // CHECK-NEXT: br label %cleanup - Printy("b"); - // CHECK: if.end: - // CHECK-NEXT: call void @_ZN6PrintyC1EPKc - }), - ({ - if (foo()) return; - // CHECK: if.then{{.*}}: - // CHECK-NEXT: store i32 1, ptr [[CLEANUP_DEST]], align 4 - // CHECK-NEXT: call void @_ZN6PrintyD1Ev - // CHECK-NEXT: br label %cleanup - Printy("c"); - // CHECK: if.end{{.*}}: - // CHECK-NEXT: call void @_ZN6PrintyC1EPKc - // CHECK-NEXT: call void @_ZN8PrintiesD1Ev - // CHECK-NEXT: br label %return - })); - // CHECK: cleanup: - // CHECK-NEXT: call void @_ZN6PrintyD1Ev - // CHECK-NEXT: br label %return -} - -void break_in_stmt_expr() { - // Verify that the "break" in "if.then".calls dtor before jumping to "for.end". - - // CHECK-LABEL: define dso_local void @_Z18break_in_stmt_exprv() - Printies p{Printy("a"), - // CHECK: call void @_ZN6PrintyC1EPKc - ({ - for (;;) { - Printies ps{ - Printy("b"), - // CHECK: for.cond: - // CHECK: call void @_ZN6PrintyC1EPKc - ({ - if (foo()) { - break; - // CHECK: if.then: - // CHECK-NEXT: call void @_ZN6PrintyD1Ev - // CHECK-NEXT: br label %for.end - } - Printy("c"); - // CHECK: if.end: - // CHECK-NEXT: call void @_ZN6PrintyC1EPKc - }), - Printy("d")}; - // CHECK: call void @_ZN6PrintyC1EPKc - // CHECK-NEXT: call void @_ZN8PrintiesD1Ev - // CHECK-NEXT: br label %for.cond - } - Printy("e"); - // CHECK: for.end: - // CHECK-NEXT: call void @_ZN6PrintyC1EPKc - }), - Printy("f")}; - // CHECK: call void @_ZN6PrintyC1EPKc - // CHECK-NEXT: call void @_ZN8PrintiesD1Ev -} - -void goto_in_stmt_expr() { - // Verify that: - // - correct branch fixups for deactivated normal cleanups are generated correctly. - - // CHECK-LABEL: define dso_local void @_Z17goto_in_stmt_exprv() - // CHECK: [[CLEANUP_DEST_SLOT:%cleanup.dest.slot.*]] = alloca i32, align 4 - { - Printies p1{Printy("a"), // CHECK: call void @_ZN6PrintyC1EPKc - ({ - { - Printies p2{Printy("b"), - // CHECK: call void @_ZN6PrintyC1EPKc - ({ - if (foo() == 1) { - goto in; - // CHECK: if.then: - // CHECK-NEXT: store i32 2, ptr [[CLEANUP_DEST_SLOT]], align 4 - // CHECK-NEXT: br label %[[CLEANUP1:.+]] - } - if (foo() == 2) { - goto out; - // CHECK: if.then{{.*}}: - // CHECK-NEXT: store i32 3, ptr [[CLEANUP_DEST_SLOT]], align 4 - // CHECK-NEXT: br label %[[CLEANUP1]] - } - Printy("c"); - // CHECK: if.end{{.*}}: - // CHECK-NEXT: call void @_ZN6PrintyC1EPKc - }), - Printy("d")}; - // CHECK: call void @_ZN6PrintyC1EPKc - // CHECK-NEXT: call void @_ZN8PrintiesD1Ev - // CHECK-NEXT: br label %in - - } - in: - Printy("e"); - // CHECK: in: ; preds = %if.end{{.*}}, %[[CLEANUP1]] - // CHECK-NEXT: call void @_ZN6PrintyC1EPKc - }), - Printy("f")}; - // CHECK: call void @_ZN6PrintyC1EPKc - // CHECK-NEXT: call void @_ZN8PrintiesD1Ev - // CHECK-NEXT: br label %out - } -out: - return; - // CHECK: out: - // CHECK-NEXT: ret void - - // CHECK: [[CLEANUP1]]: ; preds = %if.then{{.*}}, %if.then - // CHECK-NEXT: call void @_ZN6PrintyD1Ev - // CHECK-NEXT: %cleanup.dest = load i32, ptr [[CLEANUP_DEST_SLOT]], align 4 - // CHECK-NEXT: switch i32 %cleanup.dest, label %[[CLEANUP2:.+]] [ - // CHECK-NEXT: i32 2, label %in - // CHECK-NEXT: ] - - // CHECK: [[CLEANUP2]]: ; preds = %[[CLEANUP1]] - // CHECK-NEXT: call void @_ZN6PrintyD1Ev - // CHECK-NEXT: %cleanup.dest{{.*}} = load i32, ptr [[CLEANUP_DEST_SLOT]], align 4 - // CHECK-NEXT: switch i32 %cleanup.dest{{.*}}, label %unreachable [ - // CHECK-NEXT: i32 3, label %out - // CHECK-NEXT: ] -} - -void ArrayInit() { - // Printy arr[4] = {ctorA, ctorB, stmt-exprC, stmt-exprD}; - // Verify that: - // - We do the necessary stores for array cleanups (endOfInit and last constructed element). - // - We update the array init element correctly for ctorA, ctorB and stmt-exprC. - // - stmt-exprC and stmt-exprD share the array body dtor code (see %cleanup). - - // CHECK-LABEL: define dso_local void @_Z9ArrayInitv() - // CHECK: %arrayinit.endOfInit = alloca ptr, align 8 - // CHECK: %cleanup.dest.slot = alloca i32, align 4 - // CHECK: %arrayinit.begin = getelementptr inbounds [4 x %struct.Printy], ptr %arr, i64 0, i64 0 - // CHECK: store ptr %arrayinit.begin, ptr %arrayinit.endOfInit, align 8 - Printy arr[4] = { - Printy("a"), - // CHECK: call void @_ZN6PrintyC1EPKc(ptr noundef nonnull align 8 dereferenceable(8) %arrayinit.begin, ptr noundef @.str) - // CHECK: [[ARRAYINIT_ELEMENT1:%.+]] = getelementptr inbounds %struct.Printy, ptr %arrayinit.begin, i64 1 - // CHECK: store ptr [[ARRAYINIT_ELEMENT1]], ptr %arrayinit.endOfInit, align 8 - Printy("b"), - // CHECK: call void @_ZN6PrintyC1EPKc(ptr noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT1]], ptr noundef @.str.1) - // CHECK: [[ARRAYINIT_ELEMENT2:%.+]] = getelementptr inbounds %struct.Printy, ptr [[ARRAYINIT_ELEMENT1]], i64 1 - // CHECK: store ptr [[ARRAYINIT_ELEMENT2]], ptr %arrayinit.endOfInit, align 8 - ({ - // CHECK: br i1 {{.*}}, label %if.then, label %if.end - if (foo()) { - return; - // CHECK: if.then: - // CHECK-NEXT: store i32 1, ptr %cleanup.dest.slot, align 4 - // CHECK-NEXT: br label %cleanup - } - // CHECK: if.end: - Printy("c"); - // CHECK-NEXT: call void @_ZN6PrintyC1EPKc - // CHECK-NEXT: %arrayinit.element2 = getelementptr inbounds %struct.Printy, ptr %arrayinit.element1, i64 1 - // CHECK-NEXT: store ptr %arrayinit.element2, ptr %arrayinit.endOfInit, align 8 - }), - ({ - // CHECK: br i1 {{%.+}} label %[[IF_THEN2:.+]], label %[[IF_END2:.+]] - if (foo()) { - return; - // CHECK: [[IF_THEN2]]: - // CHECK-NEXT: store i32 1, ptr %cleanup.dest.slot, align 4 - // CHECK-NEXT: br label %cleanup - } - // CHECK: [[IF_END2]]: - Printy("d"); - // CHECK-NEXT: call void @_ZN6PrintyC1EPKc - // CHECK-NEXT: %array.begin = getelementptr inbounds [4 x %struct.Printy], ptr %arr, i32 0, i32 0 - // CHECK-NEXT: %0 = getelementptr inbounds %struct.Printy, ptr %array.begin, i64 4 - // CHECK-NEXT: br label %[[ARRAY_DESTROY_BODY1:.+]] - }), - }; - - // CHECK: [[ARRAY_DESTROY_BODY1]]: - // CHECK-NEXT: %arraydestroy.elementPast{{.*}} = phi ptr [ %0, %[[IF_END2]] ], [ %arraydestroy.element{{.*}}, %[[ARRAY_DESTROY_BODY1]] ] - // CHECK-NEXT: %arraydestroy.element{{.*}} = getelementptr inbounds %struct.Printy, ptr %arraydestroy.elementPast{{.*}}, i64 -1 - // CHECK-NEXT: call void @_ZN6PrintyD1Ev - // CHECK-NEXT: %arraydestroy.done{{.*}} = icmp eq ptr %arraydestroy.element{{.*}}, %array.begin - // CHECK-NEXT: br i1 %arraydestroy.done{{.*}}, label %[[ARRAY_DESTROY_DONE1:.+]], label %[[ARRAY_DESTROY_BODY1]] - - // CHECK: [[ARRAY_DESTROY_DONE1]]: - // CHECK-NEXT: ret void - - // CHECK: cleanup: - // CHECK-NEXT: %1 = load ptr, ptr %arrayinit.endOfInit, align 8 - // CHECK-NEXT: %arraydestroy.isempty = icmp eq ptr %arrayinit.begin, %1 - // CHECK-NEXT: br i1 %arraydestroy.isempty, label %[[ARRAY_DESTROY_DONE2:.+]], label %[[ARRAY_DESTROY_BODY2:.+]] - - // CHECK: [[ARRAY_DESTROY_BODY2]]: - // CHECK-NEXT: %arraydestroy.elementPast = phi ptr [ %1, %cleanup ], [ %arraydestroy.element, %[[ARRAY_DESTROY_BODY2]] ] - // CHECK-NEXT: %arraydestroy.element = getelementptr inbounds %struct.Printy, ptr %arraydestroy.elementPast, i64 -1 - // CHECK-NEXT: call void @_ZN6PrintyD1Ev(ptr noundef nonnull align 8 dereferenceable(8) %arraydestroy.element) - // CHECK-NEXT: %arraydestroy.done = icmp eq ptr %arraydestroy.element, %arrayinit.begin - // CHECK-NEXT: br i1 %arraydestroy.done, label %[[ARRAY_DESTROY_DONE2]], label %[[ARRAY_DESTROY_BODY2]] - - // CHECK: [[ARRAY_DESTROY_DONE2]]: - // CHECK-NEXT: br label %[[ARRAY_DESTROY_DONE1]] -} - -void ArraySubobjects() { - struct S { - Printy arr1[2]; - Printy arr2[2]; - Printy p; - }; - // CHECK-LABEL: define dso_local void @_Z15ArraySubobjectsv() - // CHECK: %arrayinit.endOfInit = alloca ptr, align 8 - S s{{Printy("a"), Printy("b")}, - // CHECK: call void @_ZN6PrintyC1EPKc - // CHECK: call void @_ZN6PrintyC1EPKc - {Printy("a"), - // CHECK: [[ARRAYINIT_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.Printy] - // CHECK: store ptr [[ARRAYINIT_BEGIN]], ptr %arrayinit.endOfInit, align 8 - // CHECK: call void @_ZN6PrintyC1EPKc - // CHECK: [[ARRAYINIT_ELEMENT:%.+]] = getelementptr inbounds %struct.Printy - // CHECK: store ptr [[ARRAYINIT_ELEMENT]], ptr %arrayinit.endOfInit, align 8 - ({ - if (foo()) { - return; - // CHECK: if.then: - // CHECK-NEXT: [[V0:%.+]] = load ptr, ptr %arrayinit.endOfInit, align 8 - // CHECK-NEXT: %arraydestroy.isempty = icmp eq ptr [[ARRAYINIT_BEGIN]], [[V0]] - // CHECK-NEXT: br i1 %arraydestroy.isempty, label %[[ARRAY_DESTROY_DONE:.+]], label %[[ARRAY_DESTROY_BODY:.+]] - } - Printy("b"); - }) - }, - Printy("c") - // CHECK: if.end: - // CHECK-NEXT: call void @_ZN6PrintyC1EPKc - // CHECK: call void @_ZN6PrintyC1EPKc - // CHECK-NEXT: call void @_ZZ15ArraySubobjectsvEN1SD1Ev - // CHECK-NEXT: br label %return - }; - // CHECK: return: - // CHECK-NEXT: ret void - - // CHECK: [[ARRAY_DESTROY_BODY]]: - // CHECK-NEXT: %arraydestroy.elementPast = phi ptr [ %0, %if.then ], [ %arraydestroy.element, %[[ARRAY_DESTROY_BODY]] ] - // CHECK-NEXT: %arraydestroy.element = getelementptr inbounds %struct.Printy, ptr %arraydestroy.elementPast, i64 -1 - // CHECK-NEXT: call void @_ZN6PrintyD1Ev(ptr noundef nonnull align 8 dereferenceable(8) %arraydestroy.element) - // CHECK-NEXT: %arraydestroy.done = icmp eq ptr %arraydestroy.element, [[ARRAYINIT_BEGIN]] - // CHECK-NEXT: br i1 %arraydestroy.done, label %[[ARRAY_DESTROY_DONE]], label %[[ARRAY_DESTROY_BODY]] - - // CHECK: [[ARRAY_DESTROY_DONE]] - // CHECK-NEXT: [[ARRAY_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.Printy], ptr %arr1, i32 0, i32 0 - // CHECK-NEXT: [[V1:%.+]] = getelementptr inbounds %struct.Printy, ptr [[ARRAY_BEGIN]], i64 2 - // CHECK-NEXT: br label %[[ARRAY_DESTROY_BODY2:.+]] - - // CHECK: [[ARRAY_DESTROY_BODY2]]: - // CHECK-NEXT: %arraydestroy.elementPast5 = phi ptr [ %1, %[[ARRAY_DESTROY_DONE]] ], [ %arraydestroy.element6, %[[ARRAY_DESTROY_BODY2]] ] - // CHECK-NEXT: %arraydestroy.element6 = getelementptr inbounds %struct.Printy, ptr %arraydestroy.elementPast5, i64 -1 - // CHECK-NEXT: call void @_ZN6PrintyD1Ev(ptr noundef nonnull align 8 dereferenceable(8) %arraydestroy.element6) - // CHECK-NEXT: %arraydestroy.done7 = icmp eq ptr %arraydestroy.element6, [[ARRAY_BEGIN]] - // CHECK-NEXT: br i1 %arraydestroy.done7, label %[[ARRAY_DESTROY_DONE2:.+]], label %[[ARRAY_DESTROY_BODY2]] - - - // CHECK: [[ARRAY_DESTROY_DONE2]]: - // CHECK-NEXT: br label %return -} - -void LambdaInit() { - // CHECK-LABEL: define dso_local void @_Z10LambdaInitv() - auto S = [a = Printy("a"), b = ({ - if (foo()) { - return; - // CHECK: if.then: - // CHECK-NEXT: call void @_ZN6PrintyD1Ev - // CHECK-NEXT: br label %return - } - Printy("b"); - })]() { return a; }; -} - -void LifetimeExtended() { - // CHECK-LABEL: define dso_local void @_Z16LifetimeExtendedv - struct PrintyRefBind { - const Printy &a; - const Printy &b; - }; - PrintyRefBind ps = {Printy("a"), ({ - if (foo()) { - return; - // CHECK: if.then: - // CHECK-NEXT: call void @_ZN6PrintyD1Ev - // CHECK-NEXT: br label %return - } - Printy("b"); - })}; -} - -void NewArrayInit() { - // CHECK-LABEL: define dso_local void @_Z12NewArrayInitv() - // CHECK: %array.init.end = alloca ptr, align 8 - // CHECK: store ptr %0, ptr %array.init.end, align 8 - Printy *array = new Printy[3]{ - "a", - // CHECK: call void @_ZN6PrintyC1EPKc - // CHECK: store ptr %array.exp.next, ptr %array.init.end, align 8 - "b", - // CHECK: call void @_ZN6PrintyC1EPKc - // CHECK: store ptr %array.exp.next1, ptr %array.init.end, align 8 - ({ - if (foo()) { - return; - // CHECK: if.then: - // CHECK: br i1 %arraydestroy.isempty, label %arraydestroy.done{{.*}}, label %arraydestroy.body - } - "b"; - // CHECK: if.end: - // CHECK: call void @_ZN6PrintyC1EPKc - })}; - // CHECK: arraydestroy.body: - // CHECK-NEXT: %arraydestroy.elementPast = phi ptr [ %{{.*}}, %if.then ], [ %arraydestroy.element, %arraydestroy.body ] - // CHECK-NEXT: %arraydestroy.element = getelementptr inbounds %struct.Printy, ptr %arraydestroy.elementPast, i64 -1 - // CHECK-NEXT: call void @_ZN6PrintyD1Ev(ptr noundef nonnull align 8 dereferenceable(8) %arraydestroy.element) - // CHECK-NEXT: %arraydestroy.done = icmp eq ptr %arraydestroy.element, %0 - // CHECK-NEXT: br i1 %arraydestroy.done, label %arraydestroy.done{{.*}}, label %arraydestroy.body - - // CHECK: arraydestroy.done{{.*}}: ; preds = %arraydestroy.body, %if.then - // CHECK-NEXT: br label %return -} - -void DestroyInConditionalCleanup() { - // EH-LABEL: DestroyInConditionalCleanupv() - // NOEH-LABEL: DestroyInConditionalCleanupv() - struct A { - A() {} - ~A() {} - }; - - struct Value { - Value(A) {} - ~Value() {} - }; - - struct V2 { - Value K; - Value V; - }; - // Verify we use conditional cleanups. - (void)(foo() ? V2{A(), A()} : V2{A(), A()}); - // NOEH: cond.true: - // NOEH: call void @_ZZ27DestroyInConditionalCleanupvEN1AC1Ev - // NOEH: store ptr %{{.*}}, ptr %cond-cleanup.save - - // EH: cond.true: - // EH: invoke void @_ZZ27DestroyInConditionalCleanupvEN1AC1Ev - // EH: store ptr %{{.*}}, ptr %cond-cleanup.save -} - -void ArrayInitWithContinue() { - // CHECK-LABEL: @_Z21ArrayInitWithContinuev - // Verify that we start to emit the array destructor. - // CHECK: %arrayinit.endOfInit = alloca ptr, align 8 - for (int i = 0; i < 1; ++i) { - Printy arr[2] = {"a", ({ - if (foo()) { - continue; - } - "b"; - })}; - } -} - -struct [[clang::trivial_abi]] HasTrivialABI { - HasTrivialABI(); - ~HasTrivialABI(); -}; -void AcceptTrivialABI(HasTrivialABI, int); -void TrivialABI() { - // CHECK-LABEL: define dso_local void @_Z10TrivialABIv() - AcceptTrivialABI(HasTrivialABI(), ({ - if (foo()) return; - // CHECK: if.then: - // CHECK-NEXT: call void @_ZN13HasTrivialABID1Ev - // CHECK-NEXT: br label %return - 0; - })); -} diff --git a/clang/test/CodeGenCoroutines/coro-suspend-cleanups.cpp b/clang/test/CodeGenCoroutines/coro-suspend-cleanups.cpp deleted file mode 100644 index 06cc2069dbe9ae..00000000000000 --- a/clang/test/CodeGenCoroutines/coro-suspend-cleanups.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// RUN: %clang_cc1 --std=c++20 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s - -#include "Inputs/coroutine.h" - -struct Printy { - Printy(const char *name) : name(name) {} - ~Printy() {} - const char *name; -}; - -struct coroutine { - struct promise_type; - std::coroutine_handle handle; - ~coroutine() { - if (handle) handle.destroy(); - } -}; - -struct coroutine::promise_type { - coroutine get_return_object() { - return {std::coroutine_handle::from_promise(*this)}; - } - std::suspend_never initial_suspend() noexcept { return {}; } - std::suspend_always final_suspend() noexcept { return {}; } - void return_void() {} - void unhandled_exception() {} -}; - -struct Awaiter : std::suspend_always { - Printy await_resume() { return {"awaited"}; } -}; - -int foo() { return 2; } - -coroutine ArrayInitCoro() { - // Verify that: - // - We do the necessary stores for array cleanups. - // - Array cleanups are called by await.cleanup. - // - We activate the cleanup after the first element and deactivate it in await.ready (see cleanup.isactive). - - // CHECK-LABEL: define dso_local void @_Z13ArrayInitCorov - // CHECK: %arrayinit.endOfInit = alloca ptr, align 8 - // CHECK: %cleanup.isactive = alloca i1, align 1 - Printy arr[2] = { - Printy("a"), - // CHECK: %arrayinit.begin = getelementptr inbounds [2 x %struct.Printy], ptr %arr.reload.addr, i64 0, i64 0 - // CHECK-NEXT: %arrayinit.begin.spill.addr = getelementptr inbounds %_Z13ArrayInitCorov.Frame, ptr %0, i32 0, i32 10 - // CHECK-NEXT: store ptr %arrayinit.begin, ptr %arrayinit.begin.spill.addr, align 8 - // CHECK-NEXT: store i1 true, ptr %cleanup.isactive.reload.addr, align 1 - // CHECK-NEXT: store ptr %arrayinit.begin, ptr %arrayinit.endOfInit.reload.addr, align 8 - // CHECK-NEXT: call void @_ZN6PrintyC1EPKc(ptr noundef nonnull align 8 dereferenceable(8) %arrayinit.begin, ptr noundef @.str) - // CHECK-NEXT: %arrayinit.element = getelementptr inbounds %struct.Printy, ptr %arrayinit.begin, i64 1 - // CHECK-NEXT: %arrayinit.element.spill.addr = getelementptr inbounds %_Z13ArrayInitCorov.Frame, ptr %0, i32 0, i32 11 - // CHECK-NEXT: store ptr %arrayinit.element, ptr %arrayinit.element.spill.addr, align 8 - // CHECK-NEXT: store ptr %arrayinit.element, ptr %arrayinit.endOfInit.reload.addr, align 8 - co_await Awaiter{} - // CHECK-NEXT: @_ZNSt14suspend_always11await_readyEv - // CHECK-NEXT: br i1 %{{.+}}, label %await.ready, label %CoroSave30 - }; - // CHECK: await.cleanup: ; preds = %AfterCoroSuspend{{.*}} - // CHECK-NEXT: br label %cleanup{{.*}}.from.await.cleanup - - // CHECK: cleanup{{.*}}.from.await.cleanup: ; preds = %await.cleanup - // CHECK: br label %cleanup{{.*}} - - // CHECK: await.ready: - // CHECK-NEXT: %arrayinit.element.reload.addr = getelementptr inbounds %_Z13ArrayInitCorov.Frame, ptr %0, i32 0, i32 11 - // CHECK-NEXT: %arrayinit.element.reload = load ptr, ptr %arrayinit.element.reload.addr, align 8 - // CHECK-NEXT: call void @_ZN7Awaiter12await_resumeEv - // CHECK-NEXT: store i1 false, ptr %cleanup.isactive.reload.addr, align 1 - // CHECK-NEXT: br label %cleanup{{.*}}.from.await.ready - - // CHECK: cleanup{{.*}}: ; preds = %cleanup{{.*}}.from.await.ready, %cleanup{{.*}}.from.await.cleanup - // CHECK: %cleanup.is_active = load i1, ptr %cleanup.isactive.reload.addr, align 1 - // CHECK-NEXT: br i1 %cleanup.is_active, label %cleanup.action, label %cleanup.done - - // CHECK: cleanup.action: - // CHECK: %arraydestroy.isempty = icmp eq ptr %arrayinit.begin.reload{{.*}}, %{{.*}} - // CHECK-NEXT: br i1 %arraydestroy.isempty, label %arraydestroy.done{{.*}}, label %arraydestroy.body.from.cleanup.action - // Ignore rest of the array cleanup. -} - -coroutine ArrayInitWithCoReturn() { - // CHECK-LABEL: define dso_local void @_Z21ArrayInitWithCoReturnv - // Verify that we start to emit the array destructor. - // CHECK: %arrayinit.endOfInit = alloca ptr, align 8 - Printy arr[2] = {"a", ({ - if (foo()) { - co_return; - } - "b"; - })}; -} diff --git a/clang/test/Driver/riscv-features.c b/clang/test/Driver/riscv-features.c index ce4947d2bc47b4..5e1db5ba1ed3e9 100644 --- a/clang/test/Driver/riscv-features.c +++ b/clang/test/Driver/riscv-features.c @@ -38,8 +38,8 @@ // RUN: %clang --target=riscv32-unknown-elf -### %s -mno-strict-align 2>&1 | FileCheck %s -check-prefix=FAST-UNALIGNED-ACCESS // RUN: %clang --target=riscv32-unknown-elf -### %s -mstrict-align 2>&1 | FileCheck %s -check-prefix=NO-FAST-UNALIGNED-ACCESS -// FAST-UNALIGNED-ACCESS: "-target-feature" "+fast-unaligned-access" -// NO-FAST-UNALIGNED-ACCESS: "-target-feature" "-fast-unaligned-access" +// FAST-UNALIGNED-ACCESS: "-target-feature" "+unaligned-scalar-mem" "-target-feature" "+unaligned-vector-mem" +// NO-FAST-UNALIGNED-ACCESS: "-target-feature" "-unaligned-scalar-mem" "-target-feature" "-unaligned-vector-mem" // RUN: %clang --target=riscv32-unknown-elf -### %s 2>&1 | FileCheck %s -check-prefix=NOUWTABLE // RUN: %clang --target=riscv32-unknown-elf -fasynchronous-unwind-tables -### %s 2>&1 | FileCheck %s -check-prefix=UWTABLE diff --git a/clang/test/Driver/windows-seh-async-verify.cpp b/clang/test/Driver/windows-seh-async-verify.cpp index 5fda6a77dba049..ace93cf44a31d2 100644 --- a/clang/test/Driver/windows-seh-async-verify.cpp +++ b/clang/test/Driver/windows-seh-async-verify.cpp @@ -1,7 +1,7 @@ -// RUN: %clang --target=x86_64-pc-windows -fasync-exceptions -fsyntax-only %s -### 2>&1 | FileCheck %s -// RUN: %clang_cl --target=x86_64-pc-windows /EHa -fsyntax-only %s -### 2>&1 | FileCheck %s -// RUN: %clang --target=x86_64-pc-windows-gnu -fasync-exceptions -fsyntax-only %s -### 2>&1 | FileCheck %s --check-prefixes=GNU-ALL,GNU -// RUN: %clang_cl --target=x86_64-pc-windows-gnu /EHa -fsyntax-only %s -### 2>&1 | FileCheck %s --check-prefixes=GNU-ALL,CL-GNU +// RUN: %clang --target=x86_64-pc-windows -fasync-exceptions -fsyntax-only -### %s 2>&1 | FileCheck %s +// RUN: %clang_cl --target=x86_64-pc-windows /EHa -fsyntax-only -### -- %s 2>&1 | FileCheck %s +// RUN: %clang --target=x86_64-pc-windows-gnu -fasync-exceptions -fsyntax-only -### %s 2>&1 | FileCheck %s --check-prefixes=GNU-ALL,GNU +// RUN: %clang_cl --target=x86_64-pc-windows-gnu /EHa -fsyntax-only -### -- %s 2>&1 | FileCheck %s --check-prefixes=GNU-ALL,CL-GNU // CHECK-NOT: warning // GNU: warning: argument unused during compilation: '-fasync-exceptions' [-Wunused-command-line-argument] diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index 2e5f521a5feaed..1271868a53b866 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -1685,7 +1685,7 @@ extern "C" __device__ double test_j1(double x) { // DEFAULT-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // DEFAULT-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // DEFAULT-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// DEFAULT-NEXT: [[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float +// DEFAULT-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] // DEFAULT-NEXT: [[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] // DEFAULT-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] @@ -1718,7 +1718,7 @@ extern "C" __device__ double test_j1(double x) { // FINITEONLY-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// FINITEONLY-NEXT: [[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float +// FINITEONLY-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract float [[CONV_I]], [[Y]] // FINITEONLY-NEXT: [[MUL8_I:%.*]] = fmul nnan ninf contract float [[__X1_0_I3]], [[DIV_I]] // FINITEONLY-NEXT: [[SUB_I]] = fsub nnan ninf contract float [[MUL8_I]], [[__X0_0_I2]] @@ -1751,7 +1751,7 @@ extern "C" __device__ double test_j1(double x) { // APPROX-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // APPROX-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // APPROX-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// APPROX-NEXT: [[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float +// APPROX-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] // APPROX-NEXT: [[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] // APPROX-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] @@ -1788,7 +1788,7 @@ extern "C" __device__ float test_jnf(int x, float y) { // DEFAULT-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // DEFAULT-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // DEFAULT-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// DEFAULT-NEXT: [[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to double +// DEFAULT-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] // DEFAULT-NEXT: [[MUL8_I:%.*]] = fmul contract double [[__X1_0_I3]], [[DIV_I]] // DEFAULT-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] @@ -1821,7 +1821,7 @@ extern "C" __device__ float test_jnf(int x, float y) { // FINITEONLY-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// FINITEONLY-NEXT: [[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to double +// FINITEONLY-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract double [[CONV_I]], [[Y]] // FINITEONLY-NEXT: [[MUL8_I:%.*]] = fmul nnan ninf contract double [[__X1_0_I3]], [[DIV_I]] // FINITEONLY-NEXT: [[SUB_I]] = fsub nnan ninf contract double [[MUL8_I]], [[__X0_0_I2]] @@ -1854,7 +1854,7 @@ extern "C" __device__ float test_jnf(int x, float y) { // APPROX-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // APPROX-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // APPROX-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// APPROX-NEXT: [[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to double +// APPROX-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] // APPROX-NEXT: [[MUL8_I:%.*]] = fmul contract double [[__X1_0_I3]], [[DIV_I]] // APPROX-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] @@ -4222,7 +4222,7 @@ extern "C" __device__ double test_y1(double x) { // DEFAULT-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // DEFAULT-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // DEFAULT-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// DEFAULT-NEXT: [[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float +// DEFAULT-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] // DEFAULT-NEXT: [[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] // DEFAULT-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] @@ -4255,7 +4255,7 @@ extern "C" __device__ double test_y1(double x) { // FINITEONLY-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// FINITEONLY-NEXT: [[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float +// FINITEONLY-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract float [[CONV_I]], [[Y]] // FINITEONLY-NEXT: [[MUL8_I:%.*]] = fmul nnan ninf contract float [[__X1_0_I3]], [[DIV_I]] // FINITEONLY-NEXT: [[SUB_I]] = fsub nnan ninf contract float [[MUL8_I]], [[__X0_0_I2]] @@ -4288,7 +4288,7 @@ extern "C" __device__ double test_y1(double x) { // APPROX-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // APPROX-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // APPROX-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// APPROX-NEXT: [[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float +// APPROX-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] // APPROX-NEXT: [[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] // APPROX-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] @@ -4325,7 +4325,7 @@ extern "C" __device__ float test_ynf(int x, float y) { // DEFAULT-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // DEFAULT-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // DEFAULT-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// DEFAULT-NEXT: [[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to double +// DEFAULT-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // DEFAULT-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] // DEFAULT-NEXT: [[MUL8_I:%.*]] = fmul contract double [[__X1_0_I3]], [[DIV_I]] // DEFAULT-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] @@ -4358,7 +4358,7 @@ extern "C" __device__ float test_ynf(int x, float y) { // FINITEONLY-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// FINITEONLY-NEXT: [[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to double +// FINITEONLY-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // FINITEONLY-NEXT: [[DIV_I:%.*]] = fdiv nnan ninf contract double [[CONV_I]], [[Y]] // FINITEONLY-NEXT: [[MUL8_I:%.*]] = fmul nnan ninf contract double [[__X1_0_I3]], [[DIV_I]] // FINITEONLY-NEXT: [[SUB_I]] = fsub nnan ninf contract double [[MUL8_I]], [[__X0_0_I2]] @@ -4391,7 +4391,7 @@ extern "C" __device__ float test_ynf(int x, float y) { // APPROX-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // APPROX-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // APPROX-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// APPROX-NEXT: [[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to double +// APPROX-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] // APPROX-NEXT: [[MUL8_I:%.*]] = fmul contract double [[__X1_0_I3]], [[DIV_I]] // APPROX-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] diff --git a/clang/test/ParserOpenACC/parse-clauses.c b/clang/test/ParserOpenACC/parse-clauses.c index 2369df58308a72..4462f0df540f2d 100644 --- a/clang/test/ParserOpenACC/parse-clauses.c +++ b/clang/test/ParserOpenACC/parse-clauses.c @@ -376,16 +376,13 @@ void SelfClause() { #pragma acc serial self(i > j, seq for(;;){} - // expected-warning@+2{{left operand of comma operator has no effect}} - // expected-warning@+1{{OpenACC clause 'self' not yet implemented, clause ignored}} + // expected-warning@+1{{left operand of comma operator has no effect}} #pragma acc serial self(i, j) for(;;){} - // expected-warning@+1{{OpenACC clause 'self' not yet implemented, clause ignored}} #pragma acc serial self(i > j) for(;;){} - // expected-warning@+2{{OpenACC clause 'self' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial self(1+5>3), seq for(;;){} diff --git a/clang/test/Sema/builtin-allow-runtime-check.c b/clang/test/Sema/builtin-allow-runtime-check.c new file mode 100644 index 00000000000000..b6568610000755 --- /dev/null +++ b/clang/test/Sema/builtin-allow-runtime-check.c @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -fsyntax-only -triple x86_64-pc-linux-gnu -verify %s +// RUN: %clang_cc1 -fsyntax-only -triple aarch64-linux-gnu -verify %s + +extern const char *str; + +int main(void) { + int r = 0; + + r |= __builtin_allow_runtime_check(); // expected-error {{too few arguments to function call}} + + r |= __builtin_allow_runtime_check(str); // expected-error {{expression is not a string literal}} + + r |= __builtin_allow_runtime_check(5); // expected-error {{incompatible integer to pointer conversion}} expected-error {{expression is not a string literal}} + + r |= __builtin_allow_runtime_check("a", "b"); // expected-error {{too many arguments to function call}} + + r |= __builtin_allow_runtime_check(""); + + r |= __builtin_allow_runtime_check("check"); + + str = __builtin_allow_runtime_check("check2"); // expected-error {{incompatible integer to pointer conversion}} + + return r; +} diff --git a/clang/test/Sema/recover-expr-gh88008-nocrash.c b/clang/test/Sema/recover-expr-gh88008-nocrash.c new file mode 100644 index 00000000000000..5500b33dd0e85d --- /dev/null +++ b/clang/test/Sema/recover-expr-gh88008-nocrash.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 %s -verify -fsyntax-only -std=c90 + +struct S { + int v; +}; + +struct T; // expected-note {{forward declaration of 'struct T'}} + +void gh88008_nocrash(struct T *t) { + struct S s = { .v = t->y }; // expected-error {{incomplete definition of type 'struct T'}} +} diff --git a/clang/test/SemaCXX/PR41441.cpp b/clang/test/SemaCXX/PR41441.cpp new file mode 100644 index 00000000000000..d0f2917e52f211 --- /dev/null +++ b/clang/test/SemaCXX/PR41441.cpp @@ -0,0 +1,23 @@ +// RUN: %clang --target=x86_64-pc-linux -S -fno-discard-value-names -emit-llvm -o - %s | FileCheck %s + +namespace std { + using size_t = decltype(sizeof(int)); +}; +void* operator new[](std::size_t, void*) noexcept; + +// CHECK: call void @llvm.memset.p0.i64(ptr align 1 %x, i8 0, i64 8, i1 false) +// CHECK: call void @llvm.memset.p0.i64(ptr align 16 %x, i8 0, i64 32, i1 false) +template +void f() +{ + typedef TYPE TArray[8]; + + TArray x; + new(&x) TArray(); +} + +int main() +{ + f(); + f(); +} diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-suggestions-crashes.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-suggestions-crashes.cpp new file mode 100644 index 00000000000000..bf4faec184ee17 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-suggestions-crashes.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage \ +// RUN: -fsafe-buffer-usage-suggestions \ +// RUN: %s -verify %s + +char * unsafe_pointer; // expected-warning{{'unsafe_pointer' is an unsafe pointer used for buffer access}} + +void test(char * param) { +} + +void dre_parenthesized() { + test(&(unsafe_pointer)[1]); // no-crash // expected-note{{used in buffer access here}} +} diff --git a/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp b/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp index 018f0b68c78109..6d2efcf81eb6e4 100644 --- a/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp +++ b/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp @@ -110,6 +110,50 @@ void TemplFunc() { // CHECK-NEXT: CXXBoolLiteralExpr // CHECK-NEXT: NullStmt +#pragma acc serial self + while(true); + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial + // CHECK-NEXT: self clause + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + +#pragma acc kernels self(T::SomeFloat) + while(true); + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}kernels + // CHECK-NEXT: self clause + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '' lvalue + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T' + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + +#pragma acc parallel self(T::SomeFloat) if (T::SomeFloat) + while(true); + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel + // CHECK-NEXT: self clause + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '' lvalue + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T' + // CHECK-NEXT: if clause + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '' lvalue + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T' + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + +#pragma acc serial if(T::SomeFloat) self(T::SomeFloat) + while(true); + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial + // CHECK-NEXT: if clause + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '' lvalue + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T' + // CHECK-NEXT: self clause + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '' lvalue + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T' + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + // Match the instantiation: // CHECK: FunctionDecl{{.*}}TemplFunc{{.*}}implicit_instantiation // CHECK-NEXT: TemplateArgument type 'InstTy' @@ -171,6 +215,53 @@ void TemplFunc() { // CHECK-NEXT: WhileStmt // CHECK-NEXT: CXXBoolLiteralExpr // CHECK-NEXT: NullStmt + + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial + // CHECK-NEXT: self clause + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}kernels + // CHECK-NEXT: self clause + // CHECK-NEXT: ImplicitCastExpr{{.*}}'bool' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'float' + // CHECK-NEXT: DeclRefExpr{{.*}} 'const float' lvalue Var{{.*}} 'SomeFloat' 'const float' + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'InstTy' + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel + // CHECK-NEXT: self clause + // CHECK-NEXT: ImplicitCastExpr{{.*}}'bool' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'float' + // CHECK-NEXT: DeclRefExpr{{.*}} 'const float' lvalue Var{{.*}} 'SomeFloat' 'const float' + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'InstTy' + // CHECK-NEXT: if clause + // CHECK-NEXT: ImplicitCastExpr{{.*}}'bool' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'float' + // CHECK-NEXT: DeclRefExpr{{.*}} 'const float' lvalue Var{{.*}} 'SomeFloat' 'const float' + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'InstTy' + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + + // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial + // CHECK-NEXT: if clause + // CHECK-NEXT: ImplicitCastExpr{{.*}}'bool' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'float' + // CHECK-NEXT: DeclRefExpr{{.*}} 'const float' lvalue Var{{.*}} 'SomeFloat' 'const float' + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'InstTy' + // CHECK-NEXT: self clause + // CHECK-NEXT: ImplicitCastExpr{{.*}}'bool' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'float' + // CHECK-NEXT: DeclRefExpr{{.*}} 'const float' lvalue Var{{.*}} 'SomeFloat' 'const float' + // CHECK-NEXT: NestedNameSpecifier TypeSpec 'InstTy' + // CHECK-NEXT: WhileStmt + // CHECK-NEXT: CXXBoolLiteralExpr + // CHECK-NEXT: NullStmt + } struct BoolConversion{ operator bool() const;}; diff --git a/clang/test/SemaOpenACC/compute-construct-self-clause.c b/clang/test/SemaOpenACC/compute-construct-self-clause.c new file mode 100644 index 00000000000000..fbed2953419a2e --- /dev/null +++ b/clang/test/SemaOpenACC/compute-construct-self-clause.c @@ -0,0 +1,82 @@ +// RUN: %clang_cc1 %s -fopenacc -verify + +void BoolExpr(int *I, float *F) { + typedef struct {} SomeStruct; + struct C{}; + // expected-error@+1{{expected expression}} +#pragma acc parallel self (struct C f()) + while(0); + + // expected-error@+1{{unexpected type name 'SomeStruct': expected expression}} +#pragma acc serial self (SomeStruct) + while(0); + + // expected-error@+1{{unexpected type name 'SomeStruct': expected expression}} +#pragma acc serial self (SomeStruct()) + while(0); + + SomeStruct S; + // expected-error@+1{{statement requires expression of scalar type ('SomeStruct' invalid)}} +#pragma acc serial self (S) + while(0); + +#pragma acc parallel self (I) + while(0); + +#pragma acc serial self (F) + while(0); + +#pragma acc kernels self (*I < *F) + while(0); +} + +void WarnMaybeNotUsed(int val1, int val2) { + + // expected-warning@+2{{OpenACC construct 'self' has no effect when an 'if' clause evaluates to true}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel self if(val1) + while(0); + + // expected-warning@+2{{OpenACC construct 'self' has no effect when an 'if' clause evaluates to true}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel self(val1) if(val1) + while(0); + + // expected-warning@+2{{OpenACC construct 'self' has no effect when an 'if' clause evaluates to true}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel if(val1) self + while(0); + + // expected-warning@+2{{OpenACC construct 'self' has no effect when an 'if' clause evaluates to true}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel if(val1) self(val2) + while(0); + + // The below don't warn because one side or the other has an error, thus is + // not added to the AST. + + // expected-error@+1{{use of undeclared identifier 'invalid'}} +#pragma acc parallel self if(invalid) + while(0); + + // expected-error@+1{{use of undeclared identifier 'invalid'}} +#pragma acc parallel self(invalid) if(val1) + while(0); + + // expected-error@+2{{expected expression}} + // expected-error@+1{{use of undeclared identifier 'invalid'}} +#pragma acc parallel self() if(invalid) + while(0); + + // expected-error@+1{{use of undeclared identifier 'invalid'}} +#pragma acc parallel if(invalid) self + while(0); + + // expected-error@+1{{use of undeclared identifier 'invalid'}} +#pragma acc parallel if(val2) self(invalid) + while(0); + + // expected-error@+1{{use of undeclared identifier 'invalid'}} +#pragma acc parallel if(invalid) self(val1) + while(0); +} diff --git a/clang/test/SemaOpenACC/compute-construct-self-clause.cpp b/clang/test/SemaOpenACC/compute-construct-self-clause.cpp new file mode 100644 index 00000000000000..60edbdc2b1191b --- /dev/null +++ b/clang/test/SemaOpenACC/compute-construct-self-clause.cpp @@ -0,0 +1,99 @@ +// RUN: %clang_cc1 %s -fopenacc -verify + +struct NoBoolConversion{}; +struct BoolConversion{ + operator bool(); +}; + +template +void BoolExpr() { + // expected-error@+1{{value of type 'NoBoolConversion' is not contextually convertible to 'bool'}} +#pragma acc parallel self (NoBoolConversion{}) + while(0); + // expected-error@+2{{no member named 'NotValid' in 'NoBoolConversion'}} + // expected-note@#INST{{in instantiation of function template specialization}} +#pragma acc parallel self (T::NotValid) + while(0); + +#pragma acc parallel self (BoolConversion{}) + while(0); + + // expected-error@+1{{value of type 'NoBoolConversion' is not contextually convertible to 'bool'}} +#pragma acc parallel self (T{}) + while(0); + +#pragma acc parallel self (U{}) + while(0); +} + +struct HasBool { + static constexpr bool B = true; +}; + +template +void WarnMaybeNotUsed() { + // expected-warning@+2{{OpenACC construct 'self' has no effect when an 'if' clause evaluates to true}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel self if(T::B) + while(0); + + // expected-warning@+2{{OpenACC construct 'self' has no effect when an 'if' clause evaluates to true}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel self(T::B) if(T::B) + while(0); + + // expected-warning@+2{{OpenACC construct 'self' has no effect when an 'if' clause evaluates to true}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel if(T::B) self + while(0); + + // expected-warning@+2{{OpenACC construct 'self' has no effect when an 'if' clause evaluates to true}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel if(T::B) self(T::B) + while(0); + + // We still warn in the cases of dependent failures, since the diagnostic + // happens immediately rather than during instantiation. + + // expected-error@+4{{no member named 'Invalid' in 'HasBool'}} + // expected-note@#NOT_USED_INST{{in instantiation of function template specialization 'WarnMaybeNotUsed' requested here}} + // expected-warning@+2{{OpenACC construct 'self' has no effect when an 'if' clause evaluates to true}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel self if(T::Invalid) + while(0); + + // expected-error@+3{{no member named 'Invalid' in 'HasBool'}} + // expected-warning@+2{{OpenACC construct 'self' has no effect when an 'if' clause evaluates to true}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel self(T::Invalid) if(T::B) + while(0); + + // expected-error@+3{{no member named 'Invalid' in 'HasBool'}} + // expected-warning@+2{{OpenACC construct 'self' has no effect when an 'if' clause evaluates to true}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel self(T::B) if(T::Invalid) + while(0); + + // expected-error@+3{{no member named 'Invalid' in 'HasBool'}} + // expected-warning@+2{{OpenACC construct 'self' has no effect when an 'if' clause evaluates to true}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel if(T::Invalid) self + while(0); + + // expected-error@+3{{no member named 'Invalid' in 'HasBool'}} + // expected-warning@+2{{OpenACC construct 'self' has no effect when an 'if' clause evaluates to true}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel if(T::Invalid) self(T::B) + while(0); + + // expected-error@+3{{no member named 'Invalid' in 'HasBool'}} + // expected-warning@+2{{OpenACC construct 'self' has no effect when an 'if' clause evaluates to true}} + // expected-note@+1{{previous clause is here}} +#pragma acc parallel if(T::B) self(T::Invalid) + while(0); +} + +void Instantiate() { + BoolExpr(); // #INST + WarnMaybeNotUsed(); // #NOT_USED_INST +} diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index eaa76dd43e41dd..f42af7e330e17a 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -72,6 +72,7 @@ enum ResourceDirRecipeKind { RDRK_InvokeCompiler, }; +static std::string OutputFileName = "-"; static ScanningMode ScanMode = ScanningMode::DependencyDirectivesScan; static ScanningOutputFormat Format = ScanningOutputFormat::Make; static ScanningOptimizations OptimizeArgs; @@ -98,8 +99,8 @@ static bool RoundTripArgs = DoRoundTripDefault; static void ParseArgs(int argc, char **argv) { ScanDepsOptTable Tbl; llvm::StringRef ToolName = argv[0]; - llvm::BumpPtrAllocator A; - llvm::StringSaver Saver{A}; + llvm::BumpPtrAllocator Alloc; + llvm::StringSaver Saver{Alloc}; llvm::opt::InputArgList Args = Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) { llvm::errs() << Msg << '\n'; @@ -175,6 +176,9 @@ static void ParseArgs(int argc, char **argv) { if (const llvm::opt::Arg *A = Args.getLastArg(OPT_module_files_dir_EQ)) ModuleFilesDir = A->getValue(); + if (const llvm::opt::Arg *A = Args.getLastArg(OPT_o)) + OutputFileName = A->getValue(); + EagerLoadModules = Args.hasArg(OPT_eager_load_pcm); if (const llvm::opt::Arg *A = Args.getLastArg(OPT_j)) { @@ -186,14 +190,8 @@ static void ParseArgs(int argc, char **argv) { } } - if (const llvm::opt::Arg *A = Args.getLastArg(OPT_compilation_database_EQ)) { + if (const llvm::opt::Arg *A = Args.getLastArg(OPT_compilation_database_EQ)) CompilationDB = A->getValue(); - } else if (Format != ScanningOutputFormat::P1689) { - llvm::errs() << ToolName - << ": for the --compiilation-database option: must be " - "specified at least once!"; - std::exit(1); - } if (const llvm::opt::Arg *A = Args.getLastArg(OPT_module_name_EQ)) ModuleName = A->getValue(); @@ -225,9 +223,8 @@ static void ParseArgs(int argc, char **argv) { RoundTripArgs = Args.hasArg(OPT_round_trip_args); - if (auto *A = Args.getLastArgNoClaim(OPT_DASH_DASH)) - CommandLine.insert(CommandLine.end(), A->getValues().begin(), - A->getValues().end()); + if (const llvm::opt::Arg *A = Args.getLastArgNoClaim(OPT_DASH_DASH)) + CommandLine.assign(A->getValues().begin(), A->getValues().end()); } class SharedStream { @@ -426,6 +423,11 @@ class FullDeps { } void printFullOutput(raw_ostream &OS) { + // Skip sorting modules and constructing the JSON object if the output + // cannot be observed anyway. This makes timings less noisy. + if (&OS == &llvm::nulls()) + return; + // Sort the modules by name to get a deterministic order. std::vector ModuleIDs; for (auto &&M : Modules) @@ -694,38 +696,28 @@ static std::string getModuleCachePath(ArrayRef Args) { return std::string(Path); } -// getCompilationDataBase - If -compilation-database is set, load the -// compilation database from the specified file. Otherwise if the we're -// generating P1689 format, trying to generate the compilation database -// form specified command line after the positional parameter "--". +/// Attempts to construct the compilation database from '-compilation-database' +/// or from the arguments following the positional '--'. static std::unique_ptr -getCompilationDataBase(int argc, char **argv, std::string &ErrorMessage) { +getCompilationDatabase(int argc, char **argv, std::string &ErrorMessage) { ParseArgs(argc, argv); + if (!(CommandLine.empty() ^ CompilationDB.empty())) { + llvm::errs() << "The compilation command line must be provided either via " + "'-compilation-database' or after '--'."; + return nullptr; + } + if (!CompilationDB.empty()) return tooling::JSONCompilationDatabase::loadFromFile( CompilationDB, ErrorMessage, tooling::JSONCommandLineSyntax::AutoDetect); - if (Format != ScanningOutputFormat::P1689) { - llvm::errs() << "the --compilation-database option: must be specified at " - "least once!"; - return nullptr; - } - - // Trying to get the input file, the output file and the command line options - // from the positional parameter "--". - char **DoubleDash = std::find(argv, argv + argc, StringRef("--")); - if (DoubleDash == argv + argc) { - llvm::errs() << "The command line arguments is required after '--' in " - "P1689 per file mode."; - return nullptr; - } - llvm::IntrusiveRefCntPtr Diags = CompilerInstance::createDiagnostics(new DiagnosticOptions); driver::Driver TheDriver(CommandLine[0], llvm::sys::getDefaultTargetTriple(), *Diags); + TheDriver.setCheckInputsExist(false); std::unique_ptr C( TheDriver.BuildCompilation(CommandLine)); if (!C || C->getJobs().empty()) @@ -740,7 +732,8 @@ getCompilationDataBase(int argc, char **argv, std::string &ErrorMessage) { FrontendOptions &FEOpts = CI->getFrontendOpts(); if (FEOpts.Inputs.size() != 1) { - llvm::errs() << "Only one input file is allowed in P1689 per file mode."; + llvm::errs() + << "Exactly one input file is required in the per-file mode ('--').\n"; return nullptr; } @@ -749,8 +742,9 @@ getCompilationDataBase(int argc, char **argv, std::string &ErrorMessage) { auto LastCmd = C->getJobs().end(); LastCmd--; if (LastCmd->getOutputFilenames().size() != 1) { - llvm::errs() << "The command line should provide exactly one output file " - "in P1689 per file mode.\n"; + llvm::errs() + << "Exactly one output file is required in the per-file mode ('--').\n"; + return nullptr; } StringRef OutputFile = LastCmd->getOutputFilenames().front(); @@ -790,7 +784,7 @@ getCompilationDataBase(int argc, char **argv, std::string &ErrorMessage) { int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { std::string ErrorMessage; std::unique_ptr Compilations = - getCompilationDataBase(argc, argv, ErrorMessage); + getCompilationDatabase(argc, argv, ErrorMessage); if (!Compilations) { llvm::errs() << ErrorMessage << "\n"; return 1; @@ -864,8 +858,25 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { }); SharedStream Errs(llvm::errs()); - // Print out the dependency results to STDOUT by default. - SharedStream DependencyOS(llvm::outs()); + + std::optional FileOS; + llvm::raw_ostream &ThreadUnsafeDependencyOS = [&]() -> llvm::raw_ostream & { + if (OutputFileName == "-") + return llvm::outs(); + + if (OutputFileName == "/dev/null") + return llvm::nulls(); + + std::error_code EC; + FileOS.emplace(OutputFileName, EC); + if (EC) { + llvm::errs() << "Failed to open output file '" << OutputFileName + << "': " << llvm::errorCodeToError(EC) << '\n'; + std::exit(1); + } + return *FileOS; + }(); + SharedStream DependencyOS(ThreadUnsafeDependencyOS); std::vector Inputs = AdjustingCompilations->getAllCompileCommands(); @@ -1006,9 +1017,9 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { HadErrors = true; if (Format == ScanningOutputFormat::Full) - FD->printFullOutput(llvm::outs()); + FD->printFullOutput(ThreadUnsafeDependencyOS); else if (Format == ScanningOutputFormat::P1689) - PD.printDependencies(llvm::outs()); + PD.printDependencies(ThreadUnsafeDependencyOS); return HadErrors; } diff --git a/clang/tools/clang-scan-deps/Opts.td b/clang/tools/clang-scan-deps/Opts.td index 5cd5d1a9fb37bc..4837ce6f070d73 100644 --- a/clang/tools/clang-scan-deps/Opts.td +++ b/clang/tools/clang-scan-deps/Opts.td @@ -11,6 +11,8 @@ multiclass Eq { def help : Flag<["--"], "help">, HelpText<"Display this help">; def version : Flag<["--"], "version">, HelpText<"Display the version">; +def o : Arg<"o", "Destination of the primary output">; + defm mode : Eq<"mode", "The preprocessing mode used to compute the dependencies">; defm format : Eq<"format", "The output format for the dependencies">; @@ -37,4 +39,4 @@ def verbose : F<"v", "Use verbose output">; def round_trip_args : F<"round-trip-args", "verify that command-line arguments are canonical by parsing and re-serializing">; -def DASH_DASH : Option<["--"], "", KIND_REMAINING_ARGS>; \ No newline at end of file +def DASH_DASH : Option<["--"], "", KIND_REMAINING_ARGS>; diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index f304786ff9dffd..2ef599d2cd26fa 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2791,6 +2791,10 @@ void OpenACCClauseEnqueue::VisitDefaultClause(const OpenACCDefaultClause &C) {} void OpenACCClauseEnqueue::VisitIfClause(const OpenACCIfClause &C) { Visitor.AddStmt(C.getConditionExpr()); } +void OpenACCClauseEnqueue::VisitSelfClause(const OpenACCSelfClause &C) { + if (C.hasConditionExpr()) + Visitor.AddStmt(C.getConditionExpr()); +} } // namespace void EnqueueVisitor::EnqueueChildren(const OpenACCClause *C) { diff --git a/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp b/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp index f58ce4aebcbfc8..9c1dc1a76db63d 100644 --- a/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp +++ b/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp @@ -977,6 +977,36 @@ TEST(ExprMutationAnalyzerTest, FollowFuncArgModified) { "void f() { int x; g(x); }"); Results = match(withEnclosingCompound(declRefTo("x")), AST->getASTContext()); EXPECT_THAT(mutatedBy(Results, AST.get()), ElementsAre("g(x)")); + + AST = buildASTFromCode( + StdRemoveReference + StdForward + + "template void f1(T &&a);" + "template void f2(T &&a);" + "template void f1(T &&a) { f2(std::forward(a)); }" + "template void f2(T &&a) { f1(std::forward(a)); }" + "void f() { int x; f1(x); }"); + Results = match(withEnclosingCompound(declRefTo("x")), AST->getASTContext()); + EXPECT_FALSE(isMutated(Results, AST.get())); + + AST = buildASTFromCode( + StdRemoveReference + StdForward + + "template void f1(T &&a);" + "template void f2(T &&a);" + "template void f1(T &&a) { f2(std::forward(a)); }" + "template void f2(T &&a) { f1(std::forward(a)); a++; }" + "void f() { int x; f1(x); }"); + Results = match(withEnclosingCompound(declRefTo("x")), AST->getASTContext()); + EXPECT_THAT(mutatedBy(Results, AST.get()), ElementsAre("f1(x)")); + + AST = buildASTFromCode( + StdRemoveReference + StdForward + + "template void f1(T &&a);" + "template void f2(T &&a);" + "template void f1(T &&a) { f2(std::forward(a)); a++; }" + "template void f2(T &&a) { f1(std::forward(a)); }" + "void f() { int x; f1(x); }"); + Results = match(withEnclosingCompound(declRefTo("x")), AST->getASTContext()); + EXPECT_THAT(mutatedBy(Results, AST.get()), ElementsAre("f1(x)")); } TEST(ExprMutationAnalyzerTest, FollowFuncArgNotModified) { diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index d8bcc3da4b8b1c..d7a51b009712f6 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -3182,6 +3182,32 @@ TEST(TransferTest, ResultObjectLocationForStdInitializerListExpr) { }); } +TEST(TransferTest, ResultObjectLocationForStmtExpr) { + std::string Code = R"( + struct S {}; + void target() { + S s = ({ S(); }); + // [[p]] + } + )"; + using ast_matchers::cxxConstructExpr; + using ast_matchers::match; + using ast_matchers::selectFirst; + using ast_matchers::traverse; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto *Construct = selectFirst( + "construct", match(cxxConstructExpr().bind("construct"), ASTCtx)); + + EXPECT_EQ(&Env.getResultObjectLocation(*Construct), + &getLocForDecl(ASTCtx, Env, "s")); + }); +} + TEST(TransferTest, ResultObjectLocationPropagatesThroughConditionalOperator) { std::string Code = R"( struct A { diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index 04e1acc2705004..56f1fdf9ef574f 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -2266,7 +2266,7 @@ static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) { InIfdef = false; } if (!InIfdef && IsA64) { - OS << "#ifdef __aarch64__\n"; + OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; InIfdef = true; } @@ -2299,7 +2299,7 @@ static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) { InIfdef = false; } if (!InIfdef && IsA64) { - OS << "#ifdef __aarch64__\n"; + OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; InIfdef = true; } @@ -2381,7 +2381,7 @@ void NeonEmitter::run(raw_ostream &OS) { OS << "#include \n"; // For now, signedness of polynomial types depends on target - OS << "#ifdef __aarch64__\n"; + OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; OS << "typedef uint8_t poly8_t;\n"; OS << "typedef uint16_t poly16_t;\n"; OS << "typedef uint64_t poly64_t;\n"; @@ -2582,7 +2582,7 @@ void NeonEmitter::runVectorTypes(raw_ostream &OS) { OS << "typedef float float32_t;\n"; OS << "typedef __fp16 float16_t;\n"; - OS << "#ifdef __aarch64__\n"; + OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; OS << "typedef double float64_t;\n"; OS << "#endif\n\n"; diff --git a/flang/cmake/modules/AddFlangOffloadRuntime.cmake b/flang/cmake/modules/AddFlangOffloadRuntime.cmake index 6fb6213e90fc49..e34d3851187acf 100644 --- a/flang/cmake/modules/AddFlangOffloadRuntime.cmake +++ b/flang/cmake/modules/AddFlangOffloadRuntime.cmake @@ -10,7 +10,7 @@ set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING set(FLANG_OMP_DEVICE_ARCHITECTURES "all" CACHE STRING "List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')") -macro(enable_cuda_compilation files) +macro(enable_cuda_compilation name files) if (FLANG_EXPERIMENTAL_CUDA_RUNTIME) if (BUILD_SHARED_LIBS) message(FATAL_ERROR @@ -52,6 +52,10 @@ macro(enable_cuda_compilation files) include_directories(AFTER ${FLANG_LIBCUDACXX_PATH}/include) add_compile_definitions(RT_USE_LIBCUDACXX=1) endif() + + # Add an OBJECT library consisting of CUDA PTX. + llvm_add_library(${name}PTX OBJECT PARTIAL_SOURCES_INTENDED ${files}) + set_property(TARGET obj.${name}PTX PROPERTY CUDA_PTX_COMPILATION ON) endif() endmacro() diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index c181c7ed62dff3..580e840587abb2 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -3222,4 +3222,29 @@ def fir_CUDAAllocateOp : fir_Op<"cuda_allocate", [AttrSizedOperandSegments, let hasVerifier = 1; } +def fir_CUDADeallocateOp : fir_Op<"cuda_deallocate", + [MemoryEffects<[MemFree]>]> { + let summary = "Perform the device deallocation of data of an allocatable"; + + let description = [{ + The fir.cuda_deallocate operation performs the deallocation on the device + of the data of an allocatable. + }]; + + let arguments = (ins Arg:$box, + Arg, "", [MemWrite]>:$errmsg, + fir_CUDADataAttributeAttr:$cuda_attr, + UnitAttr:$hasStat); + + let results = (outs AnyIntegerType:$stat); + + let assemblyFormat = [{ + $box `:` qualified(type($box)) + ( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )? + attr-dict `->` type($stat) + }]; + + let hasVerifier = 1; +} + #endif diff --git a/flang/lib/Decimal/CMakeLists.txt b/flang/lib/Decimal/CMakeLists.txt index 3d562b8e3ce1e5..880b190f1c5815 100644 --- a/flang/lib/Decimal/CMakeLists.txt +++ b/flang/lib/Decimal/CMakeLists.txt @@ -55,7 +55,7 @@ set(sources ) include(AddFlangOffloadRuntime) -enable_cuda_compilation("${sources}") +enable_cuda_compilation(FortranDecimal "${sources}") enable_omp_offload_compilation("${sources}") add_flang_library(FortranDecimal INSTALL_WITH_TOOLCHAIN ${sources}) diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 88710880174d21..be27256d911b31 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -4012,6 +4012,19 @@ mlir::LogicalResult fir::CUDAAllocateOp::verify() { return mlir::success(); } +mlir::LogicalResult fir::CUDADeallocateOp::verify() { + if (!fir::unwrapRefType(getBox().getType()).isa()) + return emitOpError( + "expect box to be a reference to class or box type value"); + if (getErrmsg() && + !fir::unwrapRefType(getErrmsg().getType()).isa()) + return emitOpError( + "expect errmsg to be a reference to/or a box type value"); + if (getErrmsg() && !getHasStat()) + return emitOpError("expect stat attribute when errmsg is provided"); + return mlir::success(); +} + //===----------------------------------------------------------------------===// // FIROpsDialect //===----------------------------------------------------------------------===// diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt index 2a65a22ab674c4..bdd0e07bbfd4d1 100644 --- a/flang/runtime/CMakeLists.txt +++ b/flang/runtime/CMakeLists.txt @@ -224,7 +224,7 @@ set(supported_files utf.cpp ) -enable_cuda_compilation("${supported_files}") +enable_cuda_compilation(FortranRuntime "${supported_files}") enable_omp_offload_compilation("${supported_files}") if (NOT TARGET FortranFloat128Math) diff --git a/flang/test/Driver/msvc-dependent-lib-flags.f90 b/flang/test/Driver/msvc-dependent-lib-flags.f90 index 643dbe9e949cbb..6cfc969e92b20a 100644 --- a/flang/test/Driver/msvc-dependent-lib-flags.f90 +++ b/flang/test/Driver/msvc-dependent-lib-flags.f90 @@ -1,7 +1,7 @@ -! RUN: %flang -### --target=aarch64-windows-msvc %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC -! RUN: %flang -### --target=aarch64-windows-msvc -fms-runtime-lib=static_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DEBUG -! RUN: %flang -### --target=aarch64-windows-msvc -fms-runtime-lib=dll %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL -! RUN: %flang -### --target=aarch64-windows-msvc -fms-runtime-lib=dll_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL-DEBUG +! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC +! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=static_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DEBUG +! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=dll %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL +! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=dll_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL-DEBUG ! MSVC: -fc1 ! MSVC-SAME: --dependent-lib=clang_rt.builtins.lib diff --git a/flang/test/Fir/cuf-invalid.fir b/flang/test/Fir/cuf-invalid.fir index 9c5ffe7176a3bd..5d3aa55cf346a4 100644 --- a/flang/test/Fir/cuf-invalid.fir +++ b/flang/test/Fir/cuf-invalid.fir @@ -48,3 +48,40 @@ func.func @_QPsub1() { %13 = fir.cuda_allocate %11 : !fir.ref> errmsg(%1 : !fir.ref) {cuda_attr = #fir.cuda, hasStat} -> i32 return } + +// ----- + +func.func @_QPsub1() { + %1 = fir.alloca i32 + // expected-error@+1{{'fir.cuda_deallocate' op expect box to be a reference to class or box type value}} + %2 = fir.cuda_deallocate %1 : !fir.ref {cuda_attr = #fir.cuda} -> i32 + return +} + +// ----- + +func.func @_QPsub1() { + %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} + %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %1 = fir.alloca i32 + %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> + // expected-error@+1{{'fir.cuda_deallocate' op expect errmsg to be a reference to/or a box type value}} + %13 = fir.cuda_deallocate %11 : !fir.ref> errmsg(%1 : !fir.ref) {cuda_attr = #fir.cuda, hasStat} -> i32 + return +} + +// ----- + +func.func @_QPsub1() { + %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} + %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %c100 = arith.constant 100 : index + %7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"} + %8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) + %9 = fir.embox %8#1 : (!fir.ref>) -> !fir.box> + %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> + %16 = fir.convert %9 : (!fir.box>) -> !fir.box + // expected-error@+1{{'fir.cuda_deallocate' op expect stat attribute when errmsg is provided}} + %13 = fir.cuda_deallocate %11 : !fir.ref> errmsg(%16 : !fir.box) {cuda_attr = #fir.cuda} -> i32 + return +} diff --git a/flang/test/Fir/cuf.mlir b/flang/test/Fir/cuf.mlir index 67eff31b35b2b8..71f0652067facf 100644 --- a/flang/test/Fir/cuf.mlir +++ b/flang/test/Fir/cuf.mlir @@ -7,10 +7,12 @@ func.func @_QPsub1() { %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> %13 = fir.cuda_allocate %11 : !fir.ref> {cuda_attr = #fir.cuda} -> i32 + %14 = fir.cuda_deallocate %11 : !fir.ref> {cuda_attr = #fir.cuda} -> i32 return } // CHECK: fir.cuda_allocate %{{.*}} : !fir.ref> {cuda_attr = #fir.cuda} -> i32 +// CHECK: fir.cuda_deallocate %{{.*}} : !fir.ref> {cuda_attr = #fir.cuda} -> i32 // ----- @@ -66,5 +68,9 @@ func.func @_QPsub1() { %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> %16 = fir.convert %9 : (!fir.box>) -> !fir.box %13 = fir.cuda_allocate %11 : !fir.ref> errmsg(%16 : !fir.box) {cuda_attr = #fir.cuda, hasStat} -> i32 + %14 = fir.cuda_deallocate %11 : !fir.ref> errmsg(%16 : !fir.box) {cuda_attr = #fir.cuda, hasStat} -> i32 return } + +// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref> errmsg(%{{.*}} : !fir.box) {cuda_attr = #fir.cuda, hasStat} -> i32 +// CHECK: fir.cuda_deallocate %{{.*}} : !fir.ref> errmsg(%{{.*}} : !fir.box) {cuda_attr = #fir.cuda, hasStat} -> i32 diff --git a/flang/test/Semantics/OpenMP/clause-validity01.f90 b/flang/test/Semantics/OpenMP/clause-validity01.f90 index 74f154bb0ad67d..21b99cb82549ad 100644 --- a/flang/test/Semantics/OpenMP/clause-validity01.f90 +++ b/flang/test/Semantics/OpenMP/clause-validity01.f90 @@ -342,8 +342,8 @@ a = 1.0 !ERROR: COPYPRIVATE clause is not allowed on the END WORKSHARE directive !$omp end workshare nowait copyprivate(a) + !ERROR: NOWAIT clause is not allowed on the OMP WORKSHARE directive, use it on OMP END WORKSHARE directive !$omp workshare nowait - !ERROR: NOWAIT clause is not allowed on the WORKSHARE directive, use it on OMP END WORKSHARE directive !$omp end workshare !$omp end parallel diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt index ecb952b60cc061..f53766777e7530 100644 --- a/libc/hdr/types/CMakeLists.txt +++ b/libc/hdr/types/CMakeLists.txt @@ -28,7 +28,7 @@ add_proxy_header_library( fenv_t.h FULL_BUILD_DEPENDS libc.include.llvm-libc-types.fenv_t - libc.incude.fenv + libc.include.fenv ) add_proxy_header_library( @@ -37,5 +37,5 @@ add_proxy_header_library( fexcept_t.h FULL_BUILD_DEPENDS libc.include.llvm-libc-types.fexcept_t - libc.incude.fenv + libc.include.fenv ) diff --git a/libc/src/fenv/CMakeLists.txt b/libc/src/fenv/CMakeLists.txt index a28a7ca4c2d821..17e99474120627 100644 --- a/libc/src/fenv/CMakeLists.txt +++ b/libc/src/fenv/CMakeLists.txt @@ -17,7 +17,6 @@ add_entrypoint_object( HDRS fesetround.h DEPENDS - libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -30,7 +29,6 @@ add_entrypoint_object( HDRS feclearexcept.h DEPENDS - libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -43,7 +41,6 @@ add_entrypoint_object( HDRS feraiseexcept.h DEPENDS - libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -56,7 +53,6 @@ add_entrypoint_object( HDRS fetestexcept.h DEPENDS - libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -69,7 +65,6 @@ add_entrypoint_object( HDRS fegetenv.h DEPENDS - libc.hdr.fenv_macros libc.hdr.types.fenv_t libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS @@ -83,7 +78,6 @@ add_entrypoint_object( HDRS fesetenv.h DEPENDS - libc.hdr.fenv_macros libc.hdr.types.fenv_t libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS @@ -111,7 +105,6 @@ add_entrypoint_object( HDRS fesetexcept.h DEPENDS - libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -166,7 +159,6 @@ add_entrypoint_object( HDRS feenableexcept.h DEPENDS - libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -179,7 +171,6 @@ add_entrypoint_object( HDRS fedisableexcept.h DEPENDS - libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -192,7 +183,6 @@ add_entrypoint_object( HDRS fegetexcept.h DEPENDS - libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 diff --git a/libc/src/fenv/fegetexceptflag.h b/libc/src/fenv/fegetexceptflag.h index ad72161e536f83..fcb9598658d43b 100644 --- a/libc/src/fenv/fegetexceptflag.h +++ b/libc/src/fenv/fegetexceptflag.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_FENV_FEGETEXCEPTFLAG_H #define LLVM_LIBC_SRC_FENV_FEGETEXCEPTFLAG_H -#include +#include "hdr/types/fexcept_t.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/fenv/fesetexceptflag.h b/libc/src/fenv/fesetexceptflag.h index 15e62eda1b840c..a018358dc9dfcc 100644 --- a/libc/src/fenv/fesetexceptflag.h +++ b/libc/src/fenv/fesetexceptflag.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_FENV_FESETEXCEPTFLAG_H #define LLVM_LIBC_SRC_FENV_FESETEXCEPTFLAG_H -#include +#include "hdr/types/fexcept_t.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/fenv/feupdateenv.cpp b/libc/src/fenv/feupdateenv.cpp index 7e81b9476da91b..06648635381555 100644 --- a/libc/src/fenv/feupdateenv.cpp +++ b/libc/src/fenv/feupdateenv.cpp @@ -10,7 +10,7 @@ #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/common.h" -#include +#include "hdr/types/fenv_t.h" namespace LIBC_NAMESPACE { diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 7528228b3b7f9b..f605c3bbbe9dce 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -50,7 +50,7 @@ if( LIBCLC_STANDALONE_BUILD OR CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DI endif() # Import required tools as targets - foreach( tool clang llvm-as llvm-link opt ) + foreach( tool IN ITEMS clang llvm-as llvm-link opt ) find_program( LLVM_TOOL_${tool} ${tool} PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH ) add_executable( libclc::${tool} IMPORTED GLOBAL ) set_target_properties( libclc::${tool} PROPERTIES IMPORTED_LOCATION ${LLVM_TOOL_${tool}} ) @@ -68,7 +68,7 @@ else() message(FATAL_ERROR "Clang is not enabled, but is required to build libclc in-tree") endif() - foreach( tool clang llvm-as llvm-link opt ) + foreach( tool IN ITEMS clang llvm-as llvm-link opt ) add_executable(libclc::${tool} ALIAS ${tool}) endforeach() endif() @@ -181,7 +181,7 @@ install( FILES ${CMAKE_CURRENT_BINARY_DIR}/libclc.pc DESTINATION "${CMAKE_INSTAL install( DIRECTORY generic/include/clc DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" ) if( ENABLE_RUNTIME_SUBNORMAL ) - foreach( file subnormal_use_default subnormal_disable ) + foreach( file IN ITEMS subnormal_use_default subnormal_disable ) link_bc( TARGET ${file} INPUTS ${PROJECT_SOURCE_DIR}/generic/lib/${file}.ll @@ -326,7 +326,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) # Generated files are given just as file names, which we must make # absolute to the binary directory. set( input_file ${CMAKE_CURRENT_BINARY_DIR}/${file} ) - set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${file}.o" ) + set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${file}.bc" ) else() # Other files are originally relative to each SOURCE file, which are # then make relative to the libclc root directory. We must normalize @@ -336,7 +336,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) get_filename_component( abs_path ${file} ABSOLUTE BASE_DIR ${PROJECT_SOURCE_DIR} ) file( RELATIVE_PATH root_rel_path ${PROJECT_SOURCE_DIR} ${abs_path} ) set( input_file ${PROJECT_SOURCE_DIR}/${file} ) - set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${root_rel_path}.o" ) + set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${root_rel_path}.bc" ) endif() get_filename_component( file_dir ${file} DIRECTORY ) @@ -364,7 +364,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) set( spv_suffix ${arch_suffix}.spv ) add_custom_command( OUTPUT ${spv_suffix} COMMAND ${LLVM_SPIRV} ${spvflags} -o ${spv_suffix} ${builtins_link_lib} - DEPENDS ${builtins_link_lib_tgt} + DEPENDS ${builtins_link_lib} ) add_custom_target( "prepare-${spv_suffix}" ALL DEPENDS "${spv_suffix}" ) install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${spv_suffix} @@ -376,7 +376,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc COMMAND libclc::opt ${opt_flags} -o ${builtins_opt_lib_tgt}.bc ${builtins_link_lib} - DEPENDS libclc::opt ${builtins_link_lib_tgt} + DEPENDS libclc::opt ${builtins_link_lib} ) add_custom_target( ${builtins_opt_lib_tgt} ALL DEPENDS ${builtins_opt_lib_tgt}.bc @@ -385,12 +385,13 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) PROPERTIES TARGET_FILE ${builtins_opt_lib_tgt}.bc ) + set( builtins_opt_lib $ ) + # Add prepare target set( obj_suffix ${arch_suffix}.bc ) add_custom_command( OUTPUT ${obj_suffix} - COMMAND prepare_builtins -o ${obj_suffix} - $ - DEPENDS ${builtins_opt_lib_tgt} prepare_builtins ) + COMMAND prepare_builtins -o ${obj_suffix} ${builtins_opt_lib} + DEPENDS ${builtins_opt_lib} prepare_builtins ) add_custom_target( prepare-${obj_suffix} ALL DEPENDS ${obj_suffix} ) # nvptx-- targets don't include workitem builtins diff --git a/libcxx/.clang-format b/libcxx/.clang-format index 39ae1322ffa8a6..c37ab817bca906 100644 --- a/libcxx/.clang-format +++ b/libcxx/.clang-format @@ -24,6 +24,7 @@ AttributeMacros: [ '_LIBCPP_CONSTEXPR_SINCE_CXX23', '_LIBCPP_CONSTEXPR', '_LIBCPP_CONSTINIT', + '_LIBCPP_DEPRECATED_ATOMIC_SYNC', '_LIBCPP_DEPRECATED_IN_CXX11', '_LIBCPP_DEPRECATED_IN_CXX14', '_LIBCPP_DEPRECATED_IN_CXX17', diff --git a/libcxx/docs/BuildingLibcxx.rst b/libcxx/docs/BuildingLibcxx.rst index 28145ed1049e0f..a0a0cdb4339749 100644 --- a/libcxx/docs/BuildingLibcxx.rst +++ b/libcxx/docs/BuildingLibcxx.rst @@ -206,6 +206,12 @@ libc++ specific options Toggle the installation of the libc++ headers. +.. option:: LIBCXX_INSTALL_MODULES:BOOL + + **Default**: ``OFF`` + + Toggle the installation of the experimental libc++ module sources. + .. option:: LIBCXX_ENABLE_SHARED:BOOL **Default**: ``ON`` diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst index e5db17daa48233..53cc7a77d1af48 100644 --- a/libcxx/docs/ReleaseNotes/19.rst +++ b/libcxx/docs/ReleaseNotes/19.rst @@ -75,6 +75,10 @@ Improvements and New Features Deprecations and Removals ------------------------- +- The C++20 synchronization library (````, ````, ``atomic::wait``, etc.) has been deprecated + in language modes prior to C++20. If you are using these features prior to C++20, please update to ``-std=c++20``. + In LLVM 20, the C++20 synchronization library will be removed entirely in language modes prior to C++20. + - TODO: The ``LIBCXX_ENABLE_ASSERTIONS`` CMake variable that was used to enable the safe mode has been deprecated and setting it triggers an error; use the ``LIBCXX_HARDENING_MODE`` CMake variable with the value ``extensive`` instead. Similarly, the ``_LIBCPP_ENABLE_ASSERTIONS`` macro has been deprecated (setting it to ``1`` still enables the extensive mode in @@ -93,7 +97,7 @@ Deprecations and Removals - The ``_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS`` and ``_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_VOID_SPECIALIZATION`` macros have been removed in LLVM 19. -- TODO: The ``_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES`` and ``_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES`` macros have +- The ``_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES`` and ``_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES`` macros have been removed in LLVM 19. C++17 and C++20 removed features can still be re-enabled individually. - The ``_LIBCPP_INLINE_VISIBILITY`` and ``_VSTD`` macros have been removed in LLVM 19. diff --git a/libcxx/docs/UsingLibcxx.rst b/libcxx/docs/UsingLibcxx.rst index c0e85ad4d5e247..8f945656de1ca6 100644 --- a/libcxx/docs/UsingLibcxx.rst +++ b/libcxx/docs/UsingLibcxx.rst @@ -208,12 +208,6 @@ safety annotations. C++17 Specific Configuration Macros ----------------------------------- -**_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES**: - This macro is used to re-enable all the features removed in C++17. The effect - is equivalent to manually defining each macro listed below. - This macro is deprecated and will be removed in LLVM-19. Use the - individual macros listed below. - **_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR**: This macro is used to re-enable `auto_ptr`. @@ -238,12 +232,6 @@ C++20 Specific Configuration Macros This macro is used to re-enable the function ``std::shared_ptr<...>::unique()``. -**_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES**: - This macro is used to re-enable all the features removed in C++20. The effect - is equivalent to manually defining each macro listed below. - This macro is deprecated and will be removed in LLVM-19. Use the - individual macros listed below. - **_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS**: This macro is used to re-enable the `argument_type`, `result_type`, `first_argument_type`, and `second_argument_type` members of class diff --git a/libcxx/include/__atomic/atomic.h b/libcxx/include/__atomic/atomic.h index 3dfb6937d0325e..bd3f659c22df01 100644 --- a/libcxx/include/__atomic/atomic.h +++ b/libcxx/include/__atomic/atomic.h @@ -462,22 +462,26 @@ atomic_wait_explicit(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __ // atomic_notify_one template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_one(volatile atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void +atomic_notify_one(volatile atomic<_Tp>* __o) _NOEXCEPT { __o->notify_one(); } template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_one(atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void +atomic_notify_one(atomic<_Tp>* __o) _NOEXCEPT { __o->notify_one(); } // atomic_notify_all template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_all(volatile atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void +atomic_notify_all(volatile atomic<_Tp>* __o) _NOEXCEPT { __o->notify_all(); } template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_all(atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void +atomic_notify_all(atomic<_Tp>* __o) _NOEXCEPT { __o->notify_all(); } diff --git a/libcxx/include/__atomic/atomic_flag.h b/libcxx/include/__atomic/atomic_flag.h index 084366237c16eb..3ec3366ecaaf98 100644 --- a/libcxx/include/__atomic/atomic_flag.h +++ b/libcxx/include/__atomic/atomic_flag.h @@ -49,22 +49,26 @@ struct atomic_flag { __cxx_atomic_store(&__a_, _LIBCPP_ATOMIC_FLAG_TYPE(false), __m); } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(bool __v, memory_order __m = memory_order_seq_cst) const - volatile _NOEXCEPT { + _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void + wait(bool __v, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT { std::__atomic_wait(*this, _LIBCPP_ATOMIC_FLAG_TYPE(__v), __m); } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void + _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(bool __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT { std::__atomic_wait(*this, _LIBCPP_ATOMIC_FLAG_TYPE(__v), __m); } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT { + _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT { + std::__atomic_notify_one(*this); + } + _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() volatile _NOEXCEPT { std::__atomic_notify_all(*this); } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); } + _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { + std::__atomic_notify_all(*this); + } #if _LIBCPP_STD_VER >= 20 _LIBCPP_HIDE_FROM_ABI constexpr atomic_flag() _NOEXCEPT : __a_(false) {} @@ -141,41 +145,43 @@ inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_clear_explicit(atomic_flag* __o, m __o->clear(__m); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_wait(const volatile atomic_flag* __o, bool __v) _NOEXCEPT { __o->wait(__v); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_wait(const atomic_flag* __o, bool __v) _NOEXCEPT { __o->wait(__v); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_wait_explicit(const volatile atomic_flag* __o, bool __v, memory_order __m) _NOEXCEPT { __o->wait(__v, __m); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_wait_explicit(const atomic_flag* __o, bool __v, memory_order __m) _NOEXCEPT { __o->wait(__v, __m); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_one(volatile atomic_flag* __o) _NOEXCEPT { __o->notify_one(); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_one(atomic_flag* __o) _NOEXCEPT { +inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +atomic_flag_notify_one(atomic_flag* __o) _NOEXCEPT { __o->notify_one(); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_all(volatile atomic_flag* __o) _NOEXCEPT { __o->notify_all(); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_all(atomic_flag* __o) _NOEXCEPT { +inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +atomic_flag_notify_all(atomic_flag* __o) _NOEXCEPT { __o->notify_all(); } diff --git a/libcxx/include/__availability b/libcxx/include/__availability index bb3ed0a8da521b..aa761eb5bfe5e3 100644 --- a/libcxx/include/__availability +++ b/libcxx/include/__availability @@ -160,6 +160,15 @@ # define _LIBCPP_AVAILABILITY_HAS_TZDB 1 # define _LIBCPP_AVAILABILITY_TZDB +// These macros determine whether we assume that std::bad_function_call and +// std::bad_expected_access provide a key function in the dylib. This allows +// centralizing their vtable and typeinfo instead of having all TUs provide +// a weak definition that then gets deduplicated. +# define _LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_KEY_FUNCTION 1 +# define _LIBCPP_AVAILABILITY_BAD_FUNCTION_CALL_KEY_FUNCTION +# define _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION 1 +# define _LIBCPP_AVAILABILITY_BAD_EXPECTED_ACCESS_KEY_FUNCTION + #elif defined(__APPLE__) # define _LIBCPP_AVAILABILITY_HAS_BAD_OPTIONAL_ACCESS \ @@ -290,6 +299,13 @@ # else # define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 1 # endif + +# define _LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_KEY_FUNCTION 0 +# define _LIBCPP_AVAILABILITY_BAD_FUNCTION_CALL_KEY_FUNCTION __attribute__((unavailable)) + +# define _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION 0 +# define _LIBCPP_AVAILABILITY_BAD_EXPECTED_ACCESS_KEY_FUNCTION __attribute__((unavailable)) + #else // ...New vendors can add availability markup here... diff --git a/libcxx/include/__chrono/formatter.h b/libcxx/include/__chrono/formatter.h index 4ad59382a4148a..d932a99f4b9983 100644 --- a/libcxx/include/__chrono/formatter.h +++ b/libcxx/include/__chrono/formatter.h @@ -10,6 +10,7 @@ #ifndef _LIBCPP___CHRONO_FORMATTER_H #define _LIBCPP___CHRONO_FORMATTER_H +#include <__algorithm/ranges_copy.h> #include <__chrono/calendar.h> #include <__chrono/concepts.h> #include <__chrono/convert_to_tm.h> @@ -79,7 +80,7 @@ namespace __formatter { // small). Therefore a duration uses its own conversion. template _LIBCPP_HIDE_FROM_ABI void -__format_sub_seconds(const chrono::duration<_Rep, _Period>& __value, basic_stringstream<_CharT>& __sstr) { +__format_sub_seconds(basic_stringstream<_CharT>& __sstr, const chrono::duration<_Rep, _Period>& __value) { __sstr << std::use_facet>(__sstr.getloc()).decimal_point(); using __duration = chrono::duration<_Rep, _Period>; @@ -110,13 +111,13 @@ __format_sub_seconds(const chrono::duration<_Rep, _Period>& __value, basic_strin } template -_LIBCPP_HIDE_FROM_ABI void __format_sub_seconds(const _Tp& __value, basic_stringstream<_CharT>& __sstr) { - __formatter::__format_sub_seconds(__value.time_since_epoch(), __sstr); +_LIBCPP_HIDE_FROM_ABI void __format_sub_seconds(basic_stringstream<_CharT>& __sstr, const _Tp& __value) { + __formatter::__format_sub_seconds(__sstr, __value.time_since_epoch()); } template _LIBCPP_HIDE_FROM_ABI void -__format_sub_seconds(const chrono::hh_mm_ss<_Duration>& __value, basic_stringstream<_CharT>& __sstr) { +__format_sub_seconds(basic_stringstream<_CharT>& __sstr, const chrono::hh_mm_ss<_Duration>& __value) { __sstr << std::use_facet>(__sstr.getloc()).decimal_point(); if constexpr (chrono::treat_as_floating_point_v) std::format_to(std::ostreambuf_iterator<_CharT>{__sstr}, @@ -143,7 +144,7 @@ consteval bool __use_fraction() { } template -_LIBCPP_HIDE_FROM_ABI void __format_year(int __year, basic_stringstream<_CharT>& __sstr) { +_LIBCPP_HIDE_FROM_ABI void __format_year(basic_stringstream<_CharT>& __sstr, int __year) { if (__year < 0) { __sstr << _CharT('-'); __year = -__year; @@ -159,7 +160,7 @@ _LIBCPP_HIDE_FROM_ABI void __format_year(int __year, basic_stringstream<_CharT>& } template -_LIBCPP_HIDE_FROM_ABI void __format_century(int __year, basic_stringstream<_CharT>& __sstr) { +_LIBCPP_HIDE_FROM_ABI void __format_century(basic_stringstream<_CharT>& __sstr, int __year) { // TODO FMT Write an issue // [tab:time.format.spec] // %C The year divided by 100 using floored division. If the result is a @@ -170,10 +171,45 @@ _LIBCPP_HIDE_FROM_ABI void __format_century(int __year, basic_stringstream<_Char __sstr << std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "{:02}"), __century); } +// Implements the %z format specifier according to [tab:time.format.spec], where +// '__modifier' signals %Oz or %Ez were used. (Both modifiers behave the same, +// so there is no need to distinguish between them.) +template +_LIBCPP_HIDE_FROM_ABI void +__format_zone_offset(basic_stringstream<_CharT>& __sstr, chrono::seconds __offset, bool __modifier) { + if (__offset < 0s) { + __sstr << _CharT('-'); + __offset = -__offset; + } else { + __sstr << _CharT('+'); + } + + chrono::hh_mm_ss __hms{__offset}; + std::ostreambuf_iterator<_CharT> __out_it{__sstr}; + if (__modifier) + std::format_to(__out_it, _LIBCPP_STATICALLY_WIDEN(_CharT, "{:%H:%M}"), __hms); + else + std::format_to(__out_it, _LIBCPP_STATICALLY_WIDEN(_CharT, "{:%H%M}"), __hms); +} + +// Helper to store the time zone information needed for formatting. +struct _LIBCPP_HIDE_FROM_ABI __time_zone { + // Typically these abbreviations are short and fit in the string's internal + // buffer. + string __abbrev; + chrono::seconds __offset; +}; + +template +_LIBCPP_HIDE_FROM_ABI __time_zone __convert_to_time_zone([[maybe_unused]] const _Tp& __value) { + return {"UTC", chrono::seconds{0}}; +} + template _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs( - const _Tp& __value, basic_stringstream<_CharT>& __sstr, basic_string_view<_CharT> __chrono_specs) { + basic_stringstream<_CharT>& __sstr, const _Tp& __value, basic_string_view<_CharT> __chrono_specs) { tm __t = std::__convert_to_tm(__value); + __time_zone __z = __formatter::__convert_to_time_zone(__value); const auto& __facet = std::use_facet>(__sstr.getloc()); for (auto __it = __chrono_specs.begin(); __it != __chrono_specs.end(); ++__it) { if (*__it == _CharT('%')) { @@ -196,7 +232,7 @@ _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs( // strftime's output is only defined in the range [00, 99]. int __year = __t.tm_year + 1900; if (__year < 1000 || __year > 9999) - __formatter::__format_century(__year, __sstr); + __formatter::__format_century(__sstr, __year); else __facet.put( {__sstr}, __sstr, _CharT(' '), std::addressof(__t), std::to_address(__s), std::to_address(__it + 1)); @@ -242,7 +278,7 @@ _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs( __facet.put( {__sstr}, __sstr, _CharT(' '), std::addressof(__t), std::to_address(__s), std::to_address(__it + 1)); if constexpr (__use_fraction<_Tp>()) - __formatter::__format_sub_seconds(__value, __sstr); + __formatter::__format_sub_seconds(__sstr, __value); break; // Unlike time_put and strftime the formatting library requires %Y @@ -283,22 +319,26 @@ _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs( // Depending on the platform's libc the range of supported years is // limited. Intead of of testing all conditions use the internal // implementation unconditionally. - __formatter::__format_year(__t.tm_year + 1900, __sstr); + __formatter::__format_year(__sstr, __t.tm_year + 1900); break; case _CharT('F'): { int __year = __t.tm_year + 1900; if (__year < 1000) { - __formatter::__format_year(__year, __sstr); + __formatter::__format_year(__sstr, __year); __sstr << std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "-{:02}-{:02}"), __t.tm_mon + 1, __t.tm_mday); } else __facet.put( {__sstr}, __sstr, _CharT(' '), std::addressof(__t), std::to_address(__s), std::to_address(__it + 1)); } break; + case _CharT('z'): + __formatter::__format_zone_offset(__sstr, __z.__offset, false); + break; + case _CharT('Z'): - // TODO FMT Add proper timezone support. - __sstr << _LIBCPP_STATICALLY_WIDEN(_CharT, "UTC"); + // __abbrev is always a char so the copy may convert. + ranges::copy(__z.__abbrev, std::ostreambuf_iterator<_CharT>{__sstr}); break; case _CharT('O'): @@ -310,13 +350,19 @@ _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs( ++__it; __facet.put( {__sstr}, __sstr, _CharT(' '), std::addressof(__t), std::to_address(__s), std::to_address(__it + 1)); - __formatter::__format_sub_seconds(__value, __sstr); + __formatter::__format_sub_seconds(__sstr, __value); break; } } + + // Oz produces the same output as Ez below. [[fallthrough]]; case _CharT('E'): ++__it; + if (*__it == 'z') { + __formatter::__format_zone_offset(__sstr, __z.__offset, true); + break; + } [[fallthrough]]; default: __facet.put( @@ -512,7 +558,7 @@ __format_chrono(const _Tp& __value, if constexpr (chrono::__is_duration<_Tp>::value) { if (__value < __value.zero()) __sstr << _CharT('-'); - __formatter::__format_chrono_using_chrono_specs(chrono::abs(__value), __sstr, __chrono_specs); + __formatter::__format_chrono_using_chrono_specs(__sstr, chrono::abs(__value), __chrono_specs); // TODO FMT When keeping the precision it will truncate the string. // Note that the behaviour what the precision does isn't specified. __specs.__precision_ = -1; @@ -556,7 +602,7 @@ __format_chrono(const _Tp& __value, __sstr << _CharT('-'); } - __formatter::__format_chrono_using_chrono_specs(__value, __sstr, __chrono_specs); + __formatter::__format_chrono_using_chrono_specs(__sstr, __value, __chrono_specs); } } diff --git a/libcxx/include/__config b/libcxx/include/__config index 82782b31c557b1..4ccef2ca0d73b4 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -16,17 +16,6 @@ # pragma GCC system_header #endif -#if defined(_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES) && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) -# pragma clang deprecated( \ - _LIBCPP_ENABLE_CXX17_REMOVED_FEATURES, \ - "_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES is deprecated in LLVM 18 and will be removed in LLVM 19") -#endif -#if defined(_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES) && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) -# pragma clang deprecated( \ - _LIBCPP_ENABLE_CXX20_REMOVED_FEATURES, \ - "_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES is deprecated in LLVM 18 and will be removed in LLVM 19") -#endif - #if defined(__apple_build_version__) // Given AppleClang XX.Y.Z, _LIBCPP_APPLE_CLANG_VER is XXYZ (e.g. AppleClang 14.0.3 => 1403) # define _LIBCPP_COMPILER_CLANG_BASED @@ -120,14 +109,11 @@ # define _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB # define _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB # define _LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE -// Define a key function for `bad_function_call` in the library, to centralize -// its vtable and typeinfo to libc++ rather than having all other libraries -// using that class define their own copies. -# define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION -// Override the default return value of exception::what() for -// bad_function_call::what() with a string that is specific to -// bad_function_call (see http://wg21.link/LWG2233). This is an ABI break -// because it changes the vtable layout of bad_function_call. +// Override the default return value of exception::what() for bad_function_call::what() +// with a string that is specific to bad_function_call (see http://wg21.link/LWG2233). +// This is an ABI break on platforms that sign and authenticate vtable function pointers +// because it changes the mangling of the virtual function located in the vtable, which +// changes how it gets signed. # define _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE // Enable optimized version of __do_get_(un)signed which avoids redundant copies. # define _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET @@ -197,19 +183,6 @@ # if defined(__FreeBSD__) && __FreeBSD__ < 14 # define _LIBCPP_DEPRECATED_ABI_DISABLE_PAIR_TRIVIAL_COPY_CTOR # endif -// For XCOFF linkers, we have problems if we see a weak hidden version of a symbol -// in user code (like you get with -fvisibility-inlines-hidden) and then a strong def -// in the library, so we need to always rely on the library version. -# if defined(_AIX) -# define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION -# endif -# endif - -# if defined(_LIBCPP_BUILDING_LIBRARY) || _LIBCPP_ABI_VERSION >= 2 -// Define a key function for `bad_function_call` in the library, to centralize -// its vtable and typeinfo to libc++ rather than having all other libraries -// using that class define their own copies. -# define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION # endif // We had some bugs where we use [[no_unique_address]] together with construct_at, @@ -972,6 +945,14 @@ typedef __char32_t char32_t; # define _LIBCPP_DEPRECATED_(m) # endif +# if _LIBCPP_STD_VER < 20 +# define _LIBCPP_DEPRECATED_ATOMIC_SYNC \ + _LIBCPP_DEPRECATED_("The C++20 synchronization library has been deprecated prior to C++20. Please update to " \ + "using -std=c++20 if you need to use these facilities.") +# else +# define _LIBCPP_DEPRECATED_ATOMIC_SYNC /* nothing */ +# endif + # if !defined(_LIBCPP_CXX03_LANG) # define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED # else @@ -1238,21 +1219,6 @@ typedef __char32_t char32_t; # define _LIBCPP_IF_WIDE_CHARACTERS(...) __VA_ARGS__ # endif -# if defined(_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES) -# define _LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR -# define _LIBCPP_ENABLE_CXX17_REMOVED_BINDERS -# define _LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE -# define _LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS -# define _LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION -# endif // _LIBCPP_ENABLE_CXX17_REMOVED_FEATURES - -# if defined(_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES) -# define _LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS -# define _LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS -# define _LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR -# define _LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS -# endif // _LIBCPP_ENABLE_CXX20_REMOVED_FEATURES - // clang-format off # define _LIBCPP_PUSH_MACROS _Pragma("push_macro(\"min\")") _Pragma("push_macro(\"max\")") _Pragma("push_macro(\"refresh\")") _Pragma("push_macro(\"move\")") _Pragma("push_macro(\"erase\")") # define _LIBCPP_POP_MACROS _Pragma("pop_macro(\"min\")") _Pragma("pop_macro(\"max\")") _Pragma("pop_macro(\"refresh\")") _Pragma("pop_macro(\"move\")") _Pragma("pop_macro(\"erase\")") diff --git a/libcxx/include/__expected/bad_expected_access.h b/libcxx/include/__expected/bad_expected_access.h index 9d490307b68081..ef29fa50883136 100644 --- a/libcxx/include/__expected/bad_expected_access.h +++ b/libcxx/include/__expected/bad_expected_access.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___EXPECTED_BAD_EXPECTED_ACCESS_H #define _LIBCPP___EXPECTED_BAD_EXPECTED_ACCESS_H +#include <__availability> #include <__config> #include <__exception/exception.h> #include <__utility/move.h> @@ -28,9 +29,11 @@ template class bad_expected_access; _LIBCPP_DIAGNOSTIC_PUSH +# if !_LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wweak-vtables") +# endif template <> -class bad_expected_access : public exception { +class _LIBCPP_EXPORTED_FROM_ABI bad_expected_access : public exception { protected: _LIBCPP_HIDE_FROM_ABI bad_expected_access() noexcept = default; _LIBCPP_HIDE_FROM_ABI bad_expected_access(const bad_expected_access&) noexcept = default; @@ -40,11 +43,11 @@ class bad_expected_access : public exception { _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~bad_expected_access() override = default; public: - // The way this has been designed (by using a class template below) means that we'll already - // have a profusion of these vtables in TUs, and the dynamic linker will already have a bunch - // of work to do. So it is not worth hiding the specialization in the dylib, given that - // it adds deployment target restrictions. +# if _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION + const char* what() const noexcept override; +# else _LIBCPP_HIDE_FROM_ABI_VIRTUAL const char* what() const noexcept override { return "bad access to std::expected"; } +# endif }; _LIBCPP_DIAGNOSTIC_POP diff --git a/libcxx/include/__functional/function.h b/libcxx/include/__functional/function.h index 1faa9e92ebd63e..36057706933d43 100644 --- a/libcxx/include/__functional/function.h +++ b/libcxx/include/__functional/function.h @@ -11,6 +11,7 @@ #define _LIBCPP___FUNCTIONAL_FUNCTION_H #include <__assert> +#include <__availability> #include <__config> #include <__exception/exception.h> #include <__functional/binary_function.h> @@ -55,7 +56,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD // bad_function_call _LIBCPP_DIAGNOSTIC_PUSH +# if !_LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_KEY_FUNCTION _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wweak-vtables") +# endif class _LIBCPP_EXPORTED_FROM_ABI bad_function_call : public exception { public: _LIBCPP_HIDE_FROM_ABI bad_function_call() _NOEXCEPT = default; @@ -64,7 +67,7 @@ class _LIBCPP_EXPORTED_FROM_ABI bad_function_call : public exception { // Note that when a key function is not used, every translation unit that uses // bad_function_call will end up containing a weak definition of the vtable and // typeinfo. -# ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION +# if _LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_KEY_FUNCTION ~bad_function_call() _NOEXCEPT override; # else _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~bad_function_call() _NOEXCEPT override {} diff --git a/libcxx/include/__fwd/ios.h b/libcxx/include/__fwd/ios.h index 82c865d58cc751..48350709d4ce25 100644 --- a/libcxx/include/__fwd/ios.h +++ b/libcxx/include/__fwd/ios.h @@ -18,6 +18,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD +class _LIBCPP_EXPORTED_FROM_ABI ios_base; + template > class _LIBCPP_TEMPLATE_VIS basic_ios; diff --git a/libcxx/include/barrier b/libcxx/include/barrier index c5fd84b91925b1..d776078267625a 100644 --- a/libcxx/include/barrier +++ b/libcxx/include/barrier @@ -257,7 +257,7 @@ public: # endif // !_LIBCPP_HAS_NO_TREE_BARRIER template -class barrier { +class _LIBCPP_DEPRECATED_ATOMIC_SYNC barrier { __barrier_base<_CompletionF> __b_; public: diff --git a/libcxx/include/iosfwd b/libcxx/include/iosfwd index 9af5e05031850d..2481667dd972cf 100644 --- a/libcxx/include/iosfwd +++ b/libcxx/include/iosfwd @@ -25,7 +25,6 @@ template<> struct char_traits; template class allocator; -class ios_base; template > class basic_ios; template > class basic_streambuf; @@ -124,8 +123,6 @@ using wosyncstream = basic_osyncstream; // C++20 _LIBCPP_BEGIN_NAMESPACE_STD -class _LIBCPP_EXPORTED_FROM_ABI ios_base; - template > class _LIBCPP_TEMPLATE_VIS istreambuf_iterator; template > diff --git a/libcxx/include/latch b/libcxx/include/latch index 3cc72583811434..1937617f7dcc61 100644 --- a/libcxx/include/latch +++ b/libcxx/include/latch @@ -66,7 +66,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -class latch { +class _LIBCPP_DEPRECATED_ATOMIC_SYNC latch { __atomic_base __a_; public: diff --git a/libcxx/include/semaphore b/libcxx/include/semaphore index 1375ec3f7c04b1..cb2f42c106ca85 100644 --- a/libcxx/include/semaphore +++ b/libcxx/include/semaphore @@ -127,7 +127,7 @@ private: }; template -class counting_semaphore { +class _LIBCPP_DEPRECATED_ATOMIC_SYNC counting_semaphore { __atomic_semaphore_base __semaphore_; public: @@ -172,7 +172,9 @@ public: } }; -using binary_semaphore = counting_semaphore<1>; +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +using binary_semaphore _LIBCPP_DEPRECATED_ATOMIC_SYNC = counting_semaphore<1>; +_LIBCPP_SUPPRESS_DEPRECATED_POP _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist index 46353986f5d7d7..64cf368e6e6849 100644 --- a/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -575,6 +575,7 @@ {'is_defined': True, 'name': '__ZNKSt3__118__time_get_storageIcE15__do_date_orderEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNKSt3__118__time_get_storageIwE15__do_date_orderEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info', 'type': 'FUNC'} +{'is_defined': True, 'name': '__ZNKSt3__119bad_expected_accessIvE4whatEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNKSt3__120__codecvt_utf8_utf16IDiE10do_unshiftER11__mbstate_tPcS4_RS4_', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNKSt3__120__codecvt_utf8_utf16IDiE11do_encodingEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNKSt3__120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'type': 'FUNC'} @@ -2073,6 +2074,7 @@ {'is_defined': True, 'name': '__ZTINSt3__117moneypunct_bynameIwLb1EEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTINSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTINSt3__119__shared_weak_countE', 'size': 0, 'type': 'OBJECT'} +{'is_defined': True, 'name': '__ZTINSt3__119bad_expected_accessIvEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTINSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTINSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTINSt3__120__codecvt_utf8_utf16IDiEE', 'size': 0, 'type': 'OBJECT'} @@ -2264,6 +2266,7 @@ {'is_defined': True, 'name': '__ZTSNSt3__117moneypunct_bynameIwLb0EEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTSNSt3__117moneypunct_bynameIwLb1EEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTSNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} +{'is_defined': True, 'name': '__ZTSNSt3__119bad_expected_accessIvEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTSNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTSNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTSNSt3__13pmr15memory_resourceE', 'size': 0, 'type': 'OBJECT'} @@ -2482,6 +2485,7 @@ {'is_defined': True, 'name': '__ZTVNSt3__117moneypunct_bynameIwLb1EEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTVNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTVNSt3__119__shared_weak_countE', 'size': 0, 'type': 'OBJECT'} +{'is_defined': True, 'name': '__ZTVNSt3__119bad_expected_accessIvEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTVNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTVNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTVNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 0, 'type': 'OBJECT'} diff --git a/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist index fec3a4505a0c6d..8751dffe230259 100644 --- a/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -209,6 +209,7 @@ {'is_defined': True, 'name': '_ZNKSt6__ndk118__time_get_storageIcE15__do_date_orderEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt6__ndk118__time_get_storageIwE15__do_date_orderEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt6__ndk119__shared_weak_count13__get_deleterERKSt9type_info', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNKSt6__ndk119bad_expected_accessIvE4whatEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt6__ndk120__codecvt_utf8_utf16IDiE10do_unshiftER9mbstate_tPcS4_RS4_', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt6__ndk120__codecvt_utf8_utf16IDiE11do_encodingEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt6__ndk120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'type': 'FUNC'} @@ -1722,6 +1723,7 @@ {'is_defined': True, 'name': '_ZTINSt6__ndk118__time_get_storageIwEE', 'size': 12, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt6__ndk118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 12, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt6__ndk119__shared_weak_countE', 'size': 24, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTINSt6__ndk119bad_expected_accessIvEE', 'size': 12, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt6__ndk119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 12, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt6__ndk119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 12, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt6__ndk120__codecvt_utf8_utf16IDiEE', 'size': 12, 'type': 'OBJECT'} @@ -1958,6 +1960,7 @@ {'is_defined': True, 'name': '_ZTSNSt6__ndk118__time_get_storageIwEE', 'size': 35, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt6__ndk118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 72, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt6__ndk119__shared_weak_countE', 'size': 33, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTSNSt6__ndk119bad_expected_accessIvEE', 'size': 36, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt6__ndk119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 73, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt6__ndk119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 73, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt6__ndk120__codecvt_utf8_utf16IDiEE', 'size': 38, 'type': 'OBJECT'} @@ -2188,6 +2191,7 @@ {'is_defined': True, 'name': '_ZTVNSt6__ndk117moneypunct_bynameIwLb1EEE', 'size': 56, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt6__ndk118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 60, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt6__ndk119__shared_weak_countE', 'size': 28, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTVNSt6__ndk119bad_expected_accessIvEE', 'size': 20, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt6__ndk119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 40, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt6__ndk119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 40, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt6__ndk120__codecvt_utf8_utf16IDiEE', 'size': 48, 'type': 'OBJECT'} diff --git a/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist index e52cf98dd4c4f1..7e223e66528847 100644 --- a/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -99,6 +99,7 @@ {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIcE15__do_date_orderEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIwE15__do_date_orderEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info', 'storage_mapping_class': 'DS', 'type': 'FUNC'} +{'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__119bad_expected_accessIvE4whatEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE10do_unshiftERPcS2_S2_S3_', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE11do_encodingEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} @@ -910,6 +911,7 @@ {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__117__widen_from_utf8ILm32EEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__117bad_function_callE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__119__shared_weak_countE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} +{'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__119bad_expected_accessIvEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IDiEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IDsEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IwEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} @@ -969,6 +971,7 @@ {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__117__widen_from_utf8ILm32EEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__117bad_function_callE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__119__shared_weak_countE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'} +{'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__119bad_expected_accessIvEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IDiEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IDsEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IwEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'} @@ -1031,6 +1034,7 @@ {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__117__widen_from_utf8ILm32EEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__117bad_function_callE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__119__shared_weak_countE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} +{'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__119bad_expected_accessIvEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IDiEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IDsEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IwEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} diff --git a/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist index 52a04706ddf20b..407d0456757af2 100644 --- a/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -99,6 +99,7 @@ {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIcE15__do_date_orderEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIwE15__do_date_orderEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info', 'storage_mapping_class': 'DS', 'type': 'FUNC'} +{'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__119bad_expected_accessIvE4whatEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE10do_unshiftERPcS2_S2_S3_', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE11do_encodingEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} @@ -910,6 +911,7 @@ {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__117__widen_from_utf8ILm32EEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__117bad_function_callE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__119__shared_weak_countE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} +{'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__119bad_expected_accessIvEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IDiEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IDsEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IwEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} @@ -969,6 +971,7 @@ {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__117__widen_from_utf8ILm32EEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__117bad_function_callE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__119__shared_weak_countE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'} +{'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__119bad_expected_accessIvEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IDiEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IDsEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IwEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'} @@ -1031,6 +1034,7 @@ {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__117__widen_from_utf8ILm32EEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__117bad_function_callE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__119__shared_weak_countE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} +{'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__119bad_expected_accessIvEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IDiEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IDsEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IwEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'} diff --git a/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist index c169b4a9925219..d578b41383c0e3 100644 --- a/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -575,6 +575,7 @@ {'is_defined': True, 'name': '__ZNKSt3__118__time_get_storageIcE15__do_date_orderEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNKSt3__118__time_get_storageIwE15__do_date_orderEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info', 'type': 'FUNC'} +{'is_defined': True, 'name': '__ZNKSt3__119bad_expected_accessIvE4whatEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNKSt3__120__codecvt_utf8_utf16IDiE10do_unshiftER11__mbstate_tPcS4_RS4_', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNKSt3__120__codecvt_utf8_utf16IDiE11do_encodingEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNKSt3__120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'type': 'FUNC'} @@ -2087,6 +2088,7 @@ {'is_defined': True, 'name': '__ZTINSt3__118__time_get_storageIwEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTINSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTINSt3__119__shared_weak_countE', 'size': 0, 'type': 'OBJECT'} +{'is_defined': True, 'name': '__ZTINSt3__119bad_expected_accessIvEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTINSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTINSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTINSt3__120__codecvt_utf8_utf16IDiEE', 'size': 0, 'type': 'OBJECT'} @@ -2291,6 +2293,7 @@ {'is_defined': True, 'name': '__ZTSNSt3__117moneypunct_bynameIwLb0EEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTSNSt3__117moneypunct_bynameIwLb1EEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTSNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} +{'is_defined': True, 'name': '__ZTSNSt3__119bad_expected_accessIvEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTSNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTSNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTSNSt3__13pmr15memory_resourceE', 'size': 0, 'type': 'OBJECT'} @@ -2516,6 +2519,7 @@ {'is_defined': True, 'name': '__ZTVNSt3__117moneypunct_bynameIwLb1EEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTVNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTVNSt3__119__shared_weak_countE', 'size': 0, 'type': 'OBJECT'} +{'is_defined': True, 'name': '__ZTVNSt3__119bad_expected_accessIvEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTVNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTVNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'} {'is_defined': True, 'name': '__ZTVNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 0, 'type': 'OBJECT'} diff --git a/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist index efa2189e9c9287..fc0f4fcf415e63 100644 --- a/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -209,6 +209,7 @@ {'is_defined': True, 'name': '_ZNKSt6__ndk118__time_get_storageIcE15__do_date_orderEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt6__ndk118__time_get_storageIwE15__do_date_orderEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt6__ndk119__shared_weak_count13__get_deleterERKSt9type_info', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNKSt6__ndk119bad_expected_accessIvE4whatEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt6__ndk120__codecvt_utf8_utf16IDiE10do_unshiftER9mbstate_tPcS4_RS4_', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt6__ndk120__codecvt_utf8_utf16IDiE11do_encodingEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt6__ndk120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'type': 'FUNC'} @@ -1722,6 +1723,7 @@ {'is_defined': True, 'name': '_ZTINSt6__ndk118__time_get_storageIwEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt6__ndk118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt6__ndk119__shared_weak_countE', 'size': 40, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTINSt6__ndk119bad_expected_accessIvEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt6__ndk119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt6__ndk119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt6__ndk120__codecvt_utf8_utf16IDiEE', 'size': 24, 'type': 'OBJECT'} @@ -1955,6 +1957,7 @@ {'is_defined': True, 'name': '_ZTSNSt6__ndk118__time_get_storageIwEE', 'size': 35, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt6__ndk118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 72, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt6__ndk119__shared_weak_countE', 'size': 33, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTSNSt6__ndk119bad_expected_accessIvEE', 'size': 36, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt6__ndk119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 73, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt6__ndk119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 73, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt6__ndk120__codecvt_utf8_utf16IDiEE', 'size': 38, 'type': 'OBJECT'} @@ -2182,6 +2185,7 @@ {'is_defined': True, 'name': '_ZTVNSt6__ndk117moneypunct_bynameIwLb1EEE', 'size': 112, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt6__ndk118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 120, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt6__ndk119__shared_weak_countE', 'size': 56, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTVNSt6__ndk119bad_expected_accessIvEE', 'size': 40, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt6__ndk119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt6__ndk119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt6__ndk120__codecvt_utf8_utf16IDiEE', 'size': 96, 'type': 'OBJECT'} diff --git a/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist index ebda5b0dfba57d..4022339562b3ad 100644 --- a/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -266,6 +266,7 @@ {'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIcE15__do_date_orderEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIwE15__do_date_orderEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNKSt3__119bad_expected_accessIvE4whatEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE10do_unshiftER11__mbstate_tPcS4_RS4_', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE11do_encodingEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'type': 'FUNC'} @@ -1695,6 +1696,7 @@ {'is_defined': True, 'name': '_ZTINSt3__118__time_get_storageIwEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__119__shared_weak_countE', 'size': 40, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTINSt3__119bad_expected_accessIvEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IDiEE', 'size': 24, 'type': 'OBJECT'} @@ -1829,6 +1831,7 @@ {'is_defined': True, 'name': '_ZTSNSt3__118__time_get_storageIwEE', 'size': 32, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 69, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__119__shared_weak_countE', 'size': 30, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTSNSt3__119bad_expected_accessIvEE', 'size': 33, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 70, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 70, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 35, 'type': 'OBJECT'} @@ -1962,6 +1965,7 @@ {'is_defined': True, 'name': '_ZTVNSt3__117moneypunct_bynameIwLb1EEE', 'size': 112, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 120, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__119__shared_weak_countE', 'size': 56, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTVNSt3__119bad_expected_accessIvEE', 'size': 40, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 96, 'type': 'OBJECT'} diff --git a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist index 6432ad3be35859..574c4504c59b8e 100644 --- a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -264,6 +264,7 @@ {'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIcE15__do_date_orderEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIwE15__do_date_orderEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNKSt3__119bad_expected_accessIvE4whatEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE10do_unshiftER11__mbstate_tPcS4_RS4_', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE11do_encodingEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'type': 'FUNC'} @@ -1696,6 +1697,7 @@ {'is_defined': True, 'name': '_ZTINSt3__118__time_get_storageIwEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__119__shared_weak_countE', 'size': 40, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTINSt3__119bad_expected_accessIvEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IDiEE', 'size': 24, 'type': 'OBJECT'} @@ -1830,6 +1832,7 @@ {'is_defined': True, 'name': '_ZTSNSt3__118__time_get_storageIwEE', 'size': 32, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 69, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__119__shared_weak_countE', 'size': 30, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTSNSt3__119bad_expected_accessIvEE', 'size': 33, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 70, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 70, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 35, 'type': 'OBJECT'} @@ -1963,6 +1966,7 @@ {'is_defined': True, 'name': '_ZTVNSt3__117moneypunct_bynameIwLb1EEE', 'size': 112, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 120, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__119__shared_weak_countE', 'size': 56, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTVNSt3__119bad_expected_accessIvEE', 'size': 40, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 96, 'type': 'OBJECT'} diff --git a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist index 1fe84e17b3f7f0..665546699e8ded 100644 --- a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist @@ -235,6 +235,7 @@ {'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIcE15__do_date_orderEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIwE15__do_date_orderEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNKSt3__119bad_expected_accessIvE4whatEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE10do_unshiftER11__mbstate_tPcS4_RS4_', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE11do_encodingEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'type': 'FUNC'} @@ -1667,6 +1668,7 @@ {'is_defined': True, 'name': '_ZTINSt3__118__time_get_storageIwEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__119__shared_weak_countE', 'size': 40, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTINSt3__119bad_expected_accessIvEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IDiEE', 'size': 24, 'type': 'OBJECT'} @@ -1801,6 +1803,7 @@ {'is_defined': True, 'name': '_ZTSNSt3__118__time_get_storageIwEE', 'size': 32, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 69, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__119__shared_weak_countE', 'size': 30, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTSNSt3__119bad_expected_accessIvEE', 'size': 33, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 70, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 70, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 35, 'type': 'OBJECT'} @@ -1934,6 +1937,7 @@ {'is_defined': True, 'name': '_ZTVNSt3__117moneypunct_bynameIwLb1EEE', 'size': 112, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 120, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__119__shared_weak_countE', 'size': 56, 'type': 'OBJECT'} +{'is_defined': True, 'name': '_ZTVNSt3__119bad_expected_accessIvEE', 'size': 40, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'} {'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 96, 'type': 'OBJECT'} diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index 208500ec14fcdc..a4a3fee8645710 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -10,6 +10,7 @@ set(LIBCXX_SOURCES chrono.cpp error_category.cpp exception.cpp + expected.cpp filesystem/filesystem_clock.cpp filesystem/filesystem_error.cpp filesystem/path_parser.h diff --git a/libcxx/src/expected.cpp b/libcxx/src/expected.cpp new file mode 100644 index 00000000000000..f30efb5164796b --- /dev/null +++ b/libcxx/src/expected.cpp @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +_LIBCPP_BEGIN_NAMESPACE_STD +const char* bad_expected_access::what() const noexcept { return "bad access to std::expected"; } +_LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/src/functional.cpp b/libcxx/src/functional.cpp index 570bb78e150b7d..ef53e3e84da0e0 100644 --- a/libcxx/src/functional.cpp +++ b/libcxx/src/functional.cpp @@ -10,9 +10,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION bad_function_call::~bad_function_call() noexcept {} -#endif #ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE const char* bad_function_call::what() const noexcept { return "std::bad_function_call"; } diff --git a/libcxx/test/libcxx/containers/sequences/vector/asan.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/asan.pass.cpp index 588ce2a3d17edc..614323b1ffd7be 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/asan.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/asan.pass.cpp @@ -29,8 +29,7 @@ void do_exit() { int main(int, char**) { -#if TEST_STD_VER >= 11 && TEST_CLANG_VER >= 1600 - // TODO(LLVM-18): Remove the special-casing +#if TEST_STD_VER >= 11 { typedef int T; typedef cpp17_input_iterator MyInputIter; @@ -52,7 +51,7 @@ int main(int, char**) assert(v[1] == 'b'); assert(is_contiguous_container_asan_correct(v)); } -#endif +#endif // TEST_STD_VER >= 11 { typedef cpp17_input_iterator MyInputIter; // Sould not trigger ASan. diff --git a/libcxx/test/libcxx/depr/enable_removed_cpp17_features.compile.pass.cpp b/libcxx/test/libcxx/depr/enable_removed_cpp17_features.compile.pass.cpp deleted file mode 100644 index 1b7acad3cfa464..00000000000000 --- a/libcxx/test/libcxx/depr/enable_removed_cpp17_features.compile.pass.cpp +++ /dev/null @@ -1,36 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// Test that defining _LIBCPP_ENABLE_CXX17_REMOVED_FEATURES correctly defines -// _LIBCPP_ENABLE_CXX17_REMOVED_FOO for each individual component macro. - -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES -Wno-deprecated-pragma - -#include <__config> - -#include "test_macros.h" - -#ifndef _LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR -# error _LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR must be defined -#endif - -#ifndef _LIBCPP_ENABLE_CXX17_REMOVED_BINDERS -# error _LIBCPP_ENABLE_CXX17_REMOVED_BINDERS must be defined -#endif - -#ifndef _LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE -# error _LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE must be defined -#endif - -#ifndef _LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS -#error _LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS must be defined -#endif - -#ifndef _LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR -#error _LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR must be defined -#endif diff --git a/libcxx/test/libcxx/depr/enable_removed_cpp17_features.deprecated.verify.cpp b/libcxx/test/libcxx/depr/enable_removed_cpp17_features.deprecated.verify.cpp deleted file mode 100644 index 059c1b3ead4f15..00000000000000 --- a/libcxx/test/libcxx/depr/enable_removed_cpp17_features.deprecated.verify.cpp +++ /dev/null @@ -1,20 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// <__config> - -// Ensure that defining _LIBCPP_ENABLE_CXX17_REMOVED_FEATURES yields a -// deprecation warning. We intend to issue a deprecation warning in LLVM 18 -// and remove the macro entirely in LLVM 19. As such, this test will be quite -// short lived. - -// UNSUPPORTED: clang-modules-build - -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES - -#include <__config> // expected-warning@* 1+ {{macro '_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES' has been marked as deprecated}} diff --git a/libcxx/test/libcxx/depr/enable_removed_cpp20_features.deprecated.verify.cpp b/libcxx/test/libcxx/depr/enable_removed_cpp20_features.deprecated.verify.cpp deleted file mode 100644 index 163ff7d8fbda03..00000000000000 --- a/libcxx/test/libcxx/depr/enable_removed_cpp20_features.deprecated.verify.cpp +++ /dev/null @@ -1,20 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// <__config> - -// Ensure that defining _LIBCPP_ENABLE_CXX20_REMOVED_FEATURES yields a -// deprecation warning. We intend to issue a deprecation warning in LLVM 18 -// and remove the macro entirely in LLVM 19. As such, this test will be quite -// short lived. - -// UNSUPPORTED: clang-modules-build - -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES - -#include // expected-warning@* 1+ {{macro '_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES' has been marked as deprecated}} diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp index 2b9f34b731f876..0ec530c922e707 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp @@ -7,9 +7,12 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// XFAIL: c++03 +// UNSUPPORTED: c++03 // XFAIL: !has-1024-bit-atomics +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp index dfa781c5660090..c21b67d479ae24 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp @@ -7,9 +7,12 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// XFAIL: c++03 +// UNSUPPORTED: c++03 // XFAIL: !has-1024-bit-atomics +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp index 38142b336e72ca..af99113f13499d 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp @@ -7,9 +7,12 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// XFAIL: c++03 +// UNSUPPORTED: c++03 // XFAIL: !has-1024-bit-atomics +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp index 2db95a0b67a7f0..bb8c64593b54b5 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp @@ -7,9 +7,12 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: no-threads -// XFAIL: c++03 +// UNSUPPORTED: c++03 // XFAIL: !has-1024-bit-atomics +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp index 18cdc6d654ac2b..d9d9c1dba6bbb8 100644 --- a/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp index 3fc48261de1b12..aff7b26e16f70a 100644 --- a/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp index 2aee8624ae3d52..8c45ba9278f289 100644 --- a/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/thread/thread.barrier/completion.pass.cpp b/libcxx/test/std/thread/thread.barrier/completion.pass.cpp index 7354dbe6ffe8ae..633a0c8bf23664 100644 --- a/libcxx/test/std/thread/thread.barrier/completion.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/completion.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/thread/thread.barrier/ctor.compile.pass.cpp b/libcxx/test/std/thread/thread.barrier/ctor.compile.pass.cpp index d47127a18613b7..fe7068d2a574ca 100644 --- a/libcxx/test/std/thread/thread.barrier/ctor.compile.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/ctor.compile.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // // explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF()); diff --git a/libcxx/test/std/thread/thread.barrier/max.pass.cpp b/libcxx/test/std/thread/thread.barrier/max.pass.cpp index ec03c5c87a09c1..b09a02e1bdef4c 100644 --- a/libcxx/test/std/thread/thread.barrier/max.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/max.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // #include diff --git a/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp b/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp index ddc06d2038cc82..8ca4f37b73b950 100644 --- a/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/thread/thread.latch/count_down.pass.cpp b/libcxx/test/std/thread/thread.latch/count_down.pass.cpp index 1503c09509a6c8..eb524abd24b98a 100644 --- a/libcxx/test/std/thread/thread.latch/count_down.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/count_down.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/thread/thread.latch/ctor.pass.cpp b/libcxx/test/std/thread/thread.latch/ctor.pass.cpp index 1983f6409cb5a5..bca4561bd2f742 100644 --- a/libcxx/test/std/thread/thread.latch/ctor.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/ctor.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // // inline constexpr explicit latch(ptrdiff_t __expected); diff --git a/libcxx/test/std/thread/thread.latch/max.pass.cpp b/libcxx/test/std/thread/thread.latch/max.pass.cpp index 8b9176c8cac570..bcf353ed9712ee 100644 --- a/libcxx/test/std/thread/thread.latch/max.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/max.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // #include diff --git a/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp b/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp index 70ef2cdf712544..8f354463a8697d 100644 --- a/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp b/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp index 3f6e3107e8bce0..22eed736c6b753 100644 --- a/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp b/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp index 111a650b5ea39c..c01c78506587cd 100644 --- a/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp b/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp index 28ccc0124d489e..dcc298ce11ce88 100644 --- a/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // // constexpr explicit counting_semaphore(ptrdiff_t desired); diff --git a/libcxx/test/std/thread/thread.semaphore/max.pass.cpp b/libcxx/test/std/thread/thread.semaphore/max.pass.cpp index ca7ad0c92e60e4..6f3ed5e345e0b5 100644 --- a/libcxx/test/std/thread/thread.semaphore/max.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/max.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // #include diff --git a/libcxx/test/std/thread/thread.semaphore/release.pass.cpp b/libcxx/test/std/thread/thread.semaphore/release.pass.cpp index bf3dd7f7d814fe..3c4d179e504332 100644 --- a/libcxx/test/std/thread/thread.semaphore/release.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/release.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp b/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp index 9fa01fc0359044..77f15ece221d43 100644 --- a/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp b/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp index 0d0f7792592fbe..ec159daf87a3fb 100644 --- a/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11 +// Until we drop support for the synchronization library in C++11/14/17 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // XFAIL: availability-synchronization_library-missing // diff --git a/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp index b07282593d759c..f57841cca86293 100644 --- a/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp @@ -904,12 +904,6 @@ static void test_valid_values_date_time() { template static void test_valid_values_time_zone() { -// The Apple CI gives %z='-0700' %Ez='-0700' %Oz='-0700' %Z='UTC' -// -0700 looks like the local time where the CI happens to reside, therefore -// omit this test on Apple. -// The Windows CI gives %z='-0000', but on local machines set to a different -// timezone, it gives e.g. %z='+0200'. -#if !defined(__APPLE__) && !defined(_WIN32) using namespace std::literals::chrono_literals; constexpr std::basic_string_view fmt = SV("{:%%z='%z'%t%%Ez='%Ez'%t%%Oz='%Oz'%t%%Z='%Z'%n}"); @@ -918,48 +912,23 @@ static void test_valid_values_time_zone() { const std::locale loc(LOCALE_ja_JP_UTF_8); std::locale::global(std::locale(LOCALE_fr_FR_UTF_8)); -# if defined(_AIX) // Non localized output using C-locale - check(SV("%z='UTC'\t%Ez='UTC'\t%Oz='UTC'\t%Z='UTC'\n"), + check(SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='UTC'\n"), fmt, file_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 // Use the global locale (fr_FR) - check(SV("%z='UTC'\t%Ez='UTC'\t%Oz='UTC'\t%Z='UTC'\n"), + check(SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='UTC'\n"), lfmt, file_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 - // Use supplied locale (ja_JP). This locale has a different alternate.a + // Use supplied locale (ja_JP). check(loc, - SV("%z='UTC'\t%Ez='UTC'\t%Oz='UTC'\t%Z='UTC'\n"), - lfmt, - file_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 -# else // defined(_AIX) - // Non localized output using C-locale - check(SV("%z='+0000'\t%Ez='+0000'\t%Oz='+0000'\t%Z='UTC'\n"), - fmt, - file_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 - - // Use the global locale (fr_FR) - check(SV("%z='+0000'\t%Ez='+0000'\t%Oz='+0000'\t%Z='UTC'\n"), + SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='UTC'\n"), lfmt, file_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 - // Use supplied locale (ja_JP). This locale has a different alternate.a -# if defined(__FreeBSD__) - check(loc, - SV("%z='+0000'\t%Ez='+0000'\t%Oz='+0000'\t%Z='UTC'\n"), - lfmt, - file_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 -# else - check(loc, - SV("%z='+0000'\t%Ez='+0000'\t%Oz='+〇'\t%Z='UTC'\n"), - lfmt, - file_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 -# endif -# endif // defined(_AIX) std::locale::global(std::locale::classic()); -#endif // !defined(__APPLE__) && !defined(_WIN32) } template diff --git a/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp index 2fed270cbade72..3a7d6f9a6b01fc 100644 --- a/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp @@ -900,12 +900,6 @@ static void test_valid_values_date_time() { template static void test_valid_values_time_zone() { -// The Apple CI gives %z='-0700' %Ez='-0700' %Oz='-0700' %Z='UTC' -// -0700 looks like the local time where the CI happens to reside, therefore -// omit this test on Apple. -// The Windows CI gives %z='-0000', but on local machines set to a different -// timezone, it gives e.g. %z='+0200'. -#if !defined(__APPLE__) && !defined(_WIN32) using namespace std::literals::chrono_literals; constexpr std::basic_string_view fmt = SV("{:%%z='%z'%t%%Ez='%Ez'%t%%Oz='%Oz'%t%%Z='%Z'%n}"); @@ -914,48 +908,23 @@ static void test_valid_values_time_zone() { const std::locale loc(LOCALE_ja_JP_UTF_8); std::locale::global(std::locale(LOCALE_fr_FR_UTF_8)); -# if defined(_AIX) // Non localized output using C-locale - check(SV("%z='UTC'\t%Ez='UTC'\t%Oz='UTC'\t%Z='UTC'\n"), + check(SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='UTC'\n"), fmt, std::chrono::sys_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 // Use the global locale (fr_FR) - check(SV("%z='UTC'\t%Ez='UTC'\t%Oz='UTC'\t%Z='UTC'\n"), + check(SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='UTC'\n"), lfmt, std::chrono::sys_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 - // Use supplied locale (ja_JP). This locale has a different alternate.a + // Use supplied locale (ja_JP). check(loc, - SV("%z='UTC'\t%Ez='UTC'\t%Oz='UTC'\t%Z='UTC'\n"), - lfmt, - std::chrono::sys_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 -# else // defined(_AIX) - // Non localized output using C-locale - check(SV("%z='+0000'\t%Ez='+0000'\t%Oz='+0000'\t%Z='UTC'\n"), - fmt, - std::chrono::sys_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 - - // Use the global locale (fr_FR) - check(SV("%z='+0000'\t%Ez='+0000'\t%Oz='+0000'\t%Z='UTC'\n"), + SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='UTC'\n"), lfmt, std::chrono::sys_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 - // Use supplied locale (ja_JP). This locale has a different alternate.a -# if defined(__FreeBSD__) - check(loc, - SV("%z='+0000'\t%Ez='+0000'\t%Oz='+0000'\t%Z='UTC'\n"), - lfmt, - std::chrono::sys_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 -# else - check(loc, - SV("%z='+0000'\t%Ez='+0000'\t%Oz='+〇'\t%Z='UTC'\n"), - lfmt, - std::chrono::sys_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970 -# endif -# endif // defined(_AIX) std::locale::global(std::locale::classic()); -#endif // !defined(__APPLE__) && !defined(_WIN32) } template diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.pass.cpp index a751a2fb6347b5..d27cf0bd89062e 100644 --- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.pass.cpp @@ -6,7 +6,10 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03, c++11, c++14, c++17 +// TODO TZDB review the test based on review comments in +// https://github.com/llvm/llvm-project/pull/85619 + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23, c++26 // UNSUPPORTED: no-filesystem, no-localization, no-tzdb // XFAIL: libcpp-has-no-incomplete-tzdb diff --git a/libcxx/utils/libcxx/test/modules.py b/libcxx/utils/libcxx/test/modules.py index 3f3c7999a1a21d..aab7651c7bb039 100644 --- a/libcxx/utils/libcxx/test/modules.py +++ b/libcxx/utils/libcxx/test/modules.py @@ -26,8 +26,6 @@ # The operators are added for private types like __iom_t10. SkipDeclarations["iomanip"] = ["std::operator<<", "std::operator>>"] -SkipDeclarations["iosfwd"] = ["std::ios_base", "std::vector"] - # This header also provides declarations in the namespace that might be # an error. SkipDeclarations["filesystem"] = [ @@ -54,8 +52,6 @@ "std::operator==", ] -# Declared in the forward header since std::string uses std::allocator -SkipDeclarations["string"] = ["std::allocator"] # TODO MODULES remove zombie names # https://libcxx.llvm.org/Status/Cxx20.html#note-p0619 SkipDeclarations["memory"] = [ @@ -63,9 +59,6 @@ "std::get_temporary_buffer", ] -# TODO MODULES this should be part of ios instead -SkipDeclarations["streambuf"] = ["std::basic_ios"] - # include/__type_traits/is_swappable.h SkipDeclarations["type_traits"] = [ "std::swap", diff --git a/lldb/test/API/functionalities/asan/TestMemoryHistory.py b/lldb/test/API/functionalities/asan/TestMemoryHistory.py index ee7939203ead18..41ab25823f5cc6 100644 --- a/lldb/test/API/functionalities/asan/TestMemoryHistory.py +++ b/lldb/test/API/functionalities/asan/TestMemoryHistory.py @@ -8,8 +8,7 @@ from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbplatform from lldbsuite.test import lldbutil - -from functionalities.libsanitizers.util import no_libsanitizers +from lldbsuite.test_event.build_exception import BuildError class AsanTestCase(TestBase): @skipIfFreeBSD # llvm.org/pr21136 runtimes not yet available by default @@ -21,7 +20,10 @@ def test(self): @skipIf(oslist=no_match(["macosx"])) def test_libsanitizers_asan(self): - self.build(make_targets=["libsanitizers"]) + try: + self.build(make_targets=["libsanitizers"]) + except BuildError as e: + self.skipTest("failed to build with libsanitizers") self.libsanitizer_tests() def setUp(self): @@ -36,9 +38,6 @@ def setUp(self): def libsanitizer_tests(self): target = self.createTestTarget() - if no_libsanitizers(self): - self.skipTest("libsanitizers not found") - self.runCmd( "env SanitizersAddress=1 MallocSanitizerZone=1 MallocSecureAllocator=0" ) diff --git a/lldb/test/API/functionalities/asan/TestReportData.py b/lldb/test/API/functionalities/asan/TestReportData.py index de0c1206a57ad6..5e4c179e2a4819 100644 --- a/lldb/test/API/functionalities/asan/TestReportData.py +++ b/lldb/test/API/functionalities/asan/TestReportData.py @@ -8,8 +8,7 @@ from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil - -from functionalities.libsanitizers.util import no_libsanitizers +from lldbsuite.test_event.build_exception import BuildError class AsanTestReportDataCase(TestBase): @skipIfFreeBSD # llvm.org/pr21136 runtimes not yet available by default @@ -22,7 +21,10 @@ def test(self): @skipIf(oslist=no_match(["macosx"])) def test_libsanitizers_asan(self): - self.build(make_targets=["libsanitizers"]) + try: + self.build(make_targets=["libsanitizers"]) + except BuildError as e: + self.skipTest("failed to build with libsanitizers") self.asan_tests(libsanitizers=True) def setUp(self): @@ -38,9 +40,6 @@ def setUp(self): def asan_tests(self, libsanitizers=False): target = self.createTestTarget() - if libsanitizers and no_libsanitizers(self): - self.skipTest("libsanitizers not found") - if libsanitizers: self.runCmd( "env SanitizersAddress=1 MallocSanitizerZone=1 MallocSecureAllocator=0" diff --git a/lldb/test/API/functionalities/libsanitizers/util.py b/lldb/test/API/functionalities/libsanitizers/util.py deleted file mode 100644 index ad68541aba8d05..00000000000000 --- a/lldb/test/API/functionalities/libsanitizers/util.py +++ /dev/null @@ -1,3 +0,0 @@ -def no_libsanitizers(testbase): - testbase.runCmd("image list libsystem_sanitizers.dylib", check=False) - return not "libsystem_sanitizers.dylib" in testbase.res.GetOutput() diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 9db0894162afca..e1c41b3b55ccfb 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -124,7 +124,7 @@ bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI); /// specified, perform context-sensitive analysis and return true if the /// pointer couldn't possibly be null at the specified instruction. /// Supports values with integer or pointer type and vectors of integers. -bool isKnownNonZero(const Value *V, unsigned Depth, const SimplifyQuery &Q); +bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth = 0); /// Return true if the two given values are negation. /// Currently can recoginze Value pair: diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index cfe1b11ade5a4e..8e6bef69218c2b 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -24,6 +24,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/FMF.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" @@ -311,6 +312,32 @@ class BinaryOperator : public Instruction { return BO; } + static BinaryOperator *CreateWithFMF(BinaryOps Opc, Value *V1, Value *V2, + FastMathFlags FMF, + const Twine &Name = "", + Instruction *InsertBefore = nullptr) { + BinaryOperator *BO = Create(Opc, V1, V2, Name, InsertBefore); + BO->setFastMathFlags(FMF); + return BO; + } + + static BinaryOperator *CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF, + const Twine &Name = "") { + return CreateWithFMF(Instruction::FAdd, V1, V2, FMF, Name); + } + static BinaryOperator *CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF, + const Twine &Name = "") { + return CreateWithFMF(Instruction::FSub, V1, V2, FMF, Name); + } + static BinaryOperator *CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, + const Twine &Name = "") { + return CreateWithFMF(Instruction::FMul, V1, V2, FMF, Name); + } + static BinaryOperator *CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, + const Twine &Name = "") { + return CreateWithFMF(Instruction::FDiv, V1, V2, FMF, Name); + } + static BinaryOperator *CreateFAddFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name = "") { diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index e46570af3873f9..f662febb9216bb 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -508,9 +508,9 @@ class InstrProfLookupTrait { using namespace support; offset_type KeyLen = - endian::readNext(D); + endian::readNext(D); offset_type DataLen = - endian::readNext(D); + endian::readNext(D); return std::make_pair(KeyLen, DataLen); } diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 0431c182276ec6..7f3956bd739390 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -16,6 +16,8 @@ namespace llvm { namespace memprof { +struct MemProfRecord; + // The versions of the indexed MemProf format enum IndexedVersion : uint64_t { // Version 0: This version didn't have a version field. @@ -66,7 +68,7 @@ struct PortableMemInfoBlock { switch (Id) { #define MIBEntryDef(NameTag, Name, Type) \ case Meta::Name: { \ - Name = endian::readNext(Ptr); \ + Name = endian::readNext(Ptr); \ } break; #include "llvm/ProfileData/MIBEntryDef.inc" #undef MIBEntryDef @@ -221,13 +223,12 @@ struct Frame { using namespace support; const uint64_t F = - endian::readNext(Ptr); + endian::readNext(Ptr); const uint32_t L = - endian::readNext(Ptr); + endian::readNext(Ptr); const uint32_t C = - endian::readNext(Ptr); - const bool I = - endian::readNext(Ptr); + endian::readNext(Ptr); + const bool I = endian::readNext(Ptr); return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C, /*IsInlineFrame=*/I); } @@ -369,14 +370,9 @@ struct IndexedMemProfRecord { size_t serializedSize(IndexedVersion Version) const; bool operator==(const IndexedMemProfRecord &Other) const { - if (Other.AllocSites.size() != AllocSites.size()) + if (Other.AllocSites != AllocSites) return false; - for (size_t I = 0; I < AllocSites.size(); I++) { - if (AllocSites[I] != Other.AllocSites[I]) - return false; - } - if (Other.CallSiteIds != CallSiteIds) return false; return true; @@ -392,6 +388,12 @@ struct IndexedMemProfRecord { const unsigned char *Buffer, IndexedVersion Version); + // Convert IndexedMemProfRecord to MemProfRecord. Callback is used to + // translate CallStackId to call stacks with frames inline. + MemProfRecord toMemProfRecord( + std::function(const CallStackId)> Callback) + const; + // Returns the GUID for the function name after canonicalization. For // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are // mapped to functions using this GUID. @@ -474,16 +476,15 @@ class RecordLookupTrait { using namespace support; offset_type KeyLen = - endian::readNext(D); + endian::readNext(D); offset_type DataLen = - endian::readNext(D); + endian::readNext(D); return std::make_pair(KeyLen, DataLen); } uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { using namespace support; - return endian::readNext(D); + return endian::readNext(D); } data_type ReadData(uint64_t K, const unsigned char *D, @@ -615,16 +616,15 @@ class FrameLookupTrait { using namespace support; offset_type KeyLen = - endian::readNext(D); + endian::readNext(D); offset_type DataLen = - endian::readNext(D); + endian::readNext(D); return std::make_pair(KeyLen, DataLen); } uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { using namespace support; - return endian::readNext(D); + return endian::readNext(D); } data_type ReadData(uint64_t K, const unsigned char *D, diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h index 89f49a20a6089f..7fa8af184dc93b 100644 --- a/llvm/include/llvm/ProfileData/MemProfReader.h +++ b/llvm/include/llvm/ProfileData/MemProfReader.h @@ -70,8 +70,20 @@ class MemProfReader { Callback = std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1); + auto CallStackCallback = [&](CallStackId CSId) { + llvm::SmallVector CallStack; + auto Iter = CSIdToCallStack.find(CSId); + assert(Iter != CSIdToCallStack.end()); + for (FrameId Id : Iter->second) + CallStack.push_back(Callback(Id)); + return CallStack; + }; + const IndexedMemProfRecord &IndexedRecord = Iter->second; - GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, Callback)}; + GuidRecord = { + Iter->first, + IndexedRecord.toMemProfRecord(CallStackCallback), + }; Iter++; return Error::success(); } @@ -84,8 +96,15 @@ class MemProfReader { // Initialize the MemProfReader with the frame mappings and profile contents. MemProfReader( llvm::DenseMap FrameIdMap, + llvm::MapVector ProfData); + + // Initialize the MemProfReader with the frame mappings, call stack mappings, + // and profile contents. + MemProfReader( + llvm::DenseMap FrameIdMap, + llvm::DenseMap> CSIdMap, llvm::MapVector ProfData) - : IdToFrame(std::move(FrameIdMap)), + : IdToFrame(std::move(FrameIdMap)), CSIdToCallStack(std::move(CSIdMap)), FunctionProfileData(std::move(ProfData)) {} protected: @@ -97,6 +116,8 @@ class MemProfReader { } // A mapping from FrameId (a hash of the contents) to the frame. llvm::DenseMap IdToFrame; + // A mapping from CallStackId to the call stack. + llvm::DenseMap> CSIdToCallStack; // A mapping from function GUID, hash of the canonical function symbol to the // memprof profile data for that function, i.e allocation and callsite info. llvm::MapVector FunctionProfileData; diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 345e09dce0b2b1..187ace3a0cbedf 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -372,6 +372,15 @@ RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID); /// Returns the comparison predicate used when expanding a min/max reduction. CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK); +/// See RecurrenceDescriptor::isAnyOfPattern for a description of the pattern we +/// are trying to match. In this pattern, we are only ever selecting between two +/// values: 1) an initial start value \p StartVal of the reduction PHI, and 2) a +/// loop invariant value. If any of lane value in \p Left, \p Right is not equal +/// to \p StartVal, select the loop invariant value. This is done by selecting +/// \p Right iff \p Left is equal to \p StartVal. +Value *createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, + Value *Left, Value *Right); + /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. /// The Builder's fast-math-flags must be set to propagate the expected values. Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index b082dfe8fbd217..16ee2ca49d0ece 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1283,8 +1283,7 @@ AliasResult BasicAAResult::aliasGEP( // VarIndex = Scale*V. const VariableGEPIndex &Var = DecompGEP1.VarIndices[0]; if (Var.Val.TruncBits == 0 && - isKnownNonZero(Var.Val.V, /*Depth=*/0, - SimplifyQuery(DL, DT, &AC, Var.CxtI))) { + isKnownNonZero(Var.Val.V, SimplifyQuery(DL, DT, &AC, Var.CxtI))) { // Check if abs(V*Scale) >= abs(Scale) holds in the presence of // potentially wrapping math. auto MultiplyByScaleNoWrap = [](const VariableGEPIndex &Var) { diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 4e6e666922671d..8955de6375dec4 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -1586,10 +1586,10 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, if (match(UnsignedICmp, m_c_ICmp(UnsignedPred, m_Specific(Y), m_Specific(A)))) { if (UnsignedPred == ICmpInst::ICMP_UGE && IsAnd && - EqPred == ICmpInst::ICMP_NE && isKnownNonZero(B, /*Depth=*/0, Q)) + EqPred == ICmpInst::ICMP_NE && isKnownNonZero(B, Q)) return UnsignedICmp; if (UnsignedPred == ICmpInst::ICMP_ULT && !IsAnd && - EqPred == ICmpInst::ICMP_EQ && isKnownNonZero(B, /*Depth=*/0, Q)) + EqPred == ICmpInst::ICMP_EQ && isKnownNonZero(B, Q)) return UnsignedICmp; } } @@ -1607,13 +1607,13 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, // X > Y && Y == 0 --> Y == 0 iff X != 0 // X > Y || Y == 0 --> X > Y iff X != 0 if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ && - isKnownNonZero(X, /*Depth=*/0, Q)) + isKnownNonZero(X, Q)) return IsAnd ? ZeroICmp : UnsignedICmp; // X <= Y && Y != 0 --> X <= Y iff X != 0 // X <= Y || Y != 0 --> Y != 0 iff X != 0 if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE && - isKnownNonZero(X, /*Depth=*/0, Q)) + isKnownNonZero(X, Q)) return IsAnd ? UnsignedICmp : ZeroICmp; // The transforms below here are expected to be handled more generally with @@ -2817,10 +2817,9 @@ static Constant *computePointerICmp(CmpInst::Predicate Pred, Value *LHS, // the other operand can not be based on the alloc - if it were, then // the cmp itself would be a capture. Value *MI = nullptr; - if (isAllocLikeFn(LHS, TLI) && llvm::isKnownNonZero(RHS, /*Depth=*/0, Q)) + if (isAllocLikeFn(LHS, TLI) && llvm::isKnownNonZero(RHS, Q)) MI = LHS; - else if (isAllocLikeFn(RHS, TLI) && - llvm::isKnownNonZero(LHS, /*Depth=*/0, Q)) + else if (isAllocLikeFn(RHS, TLI) && llvm::isKnownNonZero(LHS, Q)) MI = RHS; if (MI) { // FIXME: This is incorrect, see PR54002. While we can assume that the @@ -2976,12 +2975,12 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, return getTrue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: - if (isKnownNonZero(LHS, /*Depth=*/0, Q)) + if (isKnownNonZero(LHS, Q)) return getFalse(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: - if (isKnownNonZero(LHS, /*Depth=*/0, Q)) + if (isKnownNonZero(LHS, Q)) return getTrue(ITy); break; case ICmpInst::ICMP_SLT: { @@ -2996,7 +2995,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, KnownBits LHSKnown = computeKnownBits(LHS, /* Depth */ 0, Q); if (LHSKnown.isNegative()) return getTrue(ITy); - if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, /*Depth=*/0, Q)) + if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, Q)) return getFalse(ITy); break; } @@ -3012,7 +3011,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, KnownBits LHSKnown = computeKnownBits(LHS, /* Depth */ 0, Q); if (LHSKnown.isNegative()) return getFalse(ITy); - if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, /*Depth=*/0, Q)) + if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, Q)) return getTrue(ITy); break; } @@ -3165,7 +3164,7 @@ static Value *simplifyICmpWithBinOpOnLHS(CmpInst::Predicate Pred, const APInt *C; if ((match(LBO, m_LShr(m_Specific(RHS), m_APInt(C))) && *C != 0) || (match(LBO, m_UDiv(m_Specific(RHS), m_APInt(C))) && *C != 1)) { - if (isKnownNonZero(RHS, /*Depth=*/0, Q)) { + if (isKnownNonZero(RHS, Q)) { switch (Pred) { default: break; @@ -3398,7 +3397,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, bool NUW = Q.IIQ.hasNoUnsignedWrap(LBO) && Q.IIQ.hasNoUnsignedWrap(RBO); bool NSW = Q.IIQ.hasNoSignedWrap(LBO) && Q.IIQ.hasNoSignedWrap(RBO); if (!NUW || (ICmpInst::isSigned(Pred) && !NSW) || - !isKnownNonZero(LBO->getOperand(0), /*Depth=*/0, Q)) + !isKnownNonZero(LBO->getOperand(0), Q)) break; if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(1), RBO->getOperand(1), Q, MaxRecurse - 1)) diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 3223b0564e6c9d..6cded828c25f4a 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -645,7 +645,7 @@ LazyValueInfoImpl::solveBlockValueImpl(Value *Val, BasicBlock *BB) { // instruction is placed, even if it could legally be hoisted much higher. // That is unfortunate. PointerType *PT = dyn_cast(BBI->getType()); - if (PT && isKnownNonZero(BBI, /*Depth=*/0, DL)) + if (PT && isKnownNonZero(BBI, DL)) return ValueLatticeElement::getNot(ConstantPointerNull::get(PT)); if (BBI->getType()->isIntegerTy()) { @@ -1863,8 +1863,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, Module *M = CxtI->getModule(); const DataLayout &DL = M->getDataLayout(); if (V->getType()->isPointerTy() && C->isNullValue() && - isKnownNonZero(V->stripPointerCastsSameRepresentation(), /*Depth=*/0, - DL)) { + isKnownNonZero(V->stripPointerCastsSameRepresentation(), DL)) { if (Pred == ICmpInst::ICMP_EQ) return LazyValueInfo::False; else if (Pred == ICmpInst::ICMP_NE) diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index b5403408cf2ab3..ac508e19c9e014 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -100,7 +100,7 @@ static bool isDereferenceableAndAlignedPointer( if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size) && !CheckForFreed) if (!CheckForNonNull || - isKnownNonZero(V, /*Depth=*/0, SimplifyQuery(DL, DT, AC, CtxI))) { + isKnownNonZero(V, SimplifyQuery(DL, DT, AC, CtxI))) { // As we recursed through GEPs to get here, we've incrementally checked // that each step advanced by a multiple of the alignment. If our base is // properly aligned, then the original offset accessed must also be. @@ -134,7 +134,7 @@ static bool isDereferenceableAndAlignedPointer( if (getObjectSize(V, ObjSize, DL, TLI, Opts)) { APInt KnownDerefBytes(Size.getBitWidth(), ObjSize); if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size) && - isKnownNonZero(V, /*Depth=*/0, SimplifyQuery(DL, DT, AC, CtxI)) && + isKnownNonZero(V, SimplifyQuery(DL, DT, AC, CtxI)) && !V->canBeFreed()) { // As we recursed through GEPs to get here, we've incrementally // checked that each step advanced by a multiple of the alignment. If diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 1c98b0295e5253..95440dda3b4c0e 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6900,7 +6900,7 @@ const ConstantRange &ScalarEvolution::getRangeRef( uint64_t Rem = MaxVal.urem(Align); MaxVal -= APInt(BitWidth, Rem); APInt MinVal = APInt::getZero(BitWidth); - if (llvm::isKnownNonZero(V, /*Depth=*/0, DL)) + if (llvm::isKnownNonZero(V, DL)) MinVal = Align; ConservativeResult = ConservativeResult.intersectWith( ConstantRange::getNonEmpty(MinVal, MaxVal + 1), RangeType); diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index b3abf016cfb93d..ab2f43e1033fa1 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -272,7 +272,7 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, } static bool isKnownNonZero(const Value *V, const APInt &DemandedElts, - unsigned Depth, const SimplifyQuery &Q); + const SimplifyQuery &Q, unsigned Depth); bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth) { @@ -288,7 +288,7 @@ bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ, // this updated. KnownBits Known = computeKnownBits(V, Depth, SQ); return Known.isNonNegative() && - (Known.isNonZero() || isKnownNonZero(V, Depth, SQ)); + (Known.isNonZero() || isKnownNonZero(V, SQ, Depth)); } bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ, @@ -868,7 +868,7 @@ static void computeKnownBitsFromShiftOperator( bool ShAmtNonZero = Known.isNonZero() || (Known.getMaxValue().ult(Known.getBitWidth()) && - isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q)); + isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth + 1)); Known = KF(Known2, Known, ShAmtNonZero); } @@ -2124,7 +2124,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, case Instruction::Mul: return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) && - (OrZero || isKnownNonZero(I, Depth, Q)); + (OrZero || isKnownNonZero(I, Q, Depth)); case Instruction::And: // A power of two and'd with anything is a power of two or zero. if (OrZero && @@ -2134,7 +2134,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, // X & (-X) is always a power of two or zero. if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) || match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0))))) - return OrZero || isKnownNonZero(I->getOperand(0), Depth, Q); + return OrZero || isKnownNonZero(I->getOperand(0), Q, Depth); return false; case Instruction::Add: { // Adding a power-of-two or zero to the same power-of-two or zero yields @@ -2249,7 +2249,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, // If the base pointer is non-null, we cannot walk to a null address with an // inbounds GEP in address space zero. - if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q)) + if (isKnownNonZero(GEP->getPointerOperand(), Q, Depth)) return true; // Walk the GEP operands and see if any operand introduces a non-zero offset. @@ -2288,7 +2288,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, if (Depth++ >= MaxAnalysisRecursionDepth) continue; - if (isKnownNonZero(GTI.getOperand(), Depth, Q)) + if (isKnownNonZero(GTI.getOperand(), Q, Depth)) return true; } @@ -2441,8 +2441,8 @@ static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth, const SimplifyQuery &Q, unsigned BitWidth, Value *X, Value *Y, bool NSW, bool NUW) { if (NUW) - return isKnownNonZero(Y, DemandedElts, Depth, Q) || - isKnownNonZero(X, DemandedElts, Depth, Q); + return isKnownNonZero(Y, DemandedElts, Q, Depth) || + isKnownNonZero(X, DemandedElts, Q, Depth); KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q); KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q); @@ -2450,8 +2450,8 @@ static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth, // If X and Y are both non-negative (as signed values) then their sum is not // zero unless both X and Y are zero. if (XKnown.isNonNegative() && YKnown.isNonNegative()) - if (isKnownNonZero(Y, DemandedElts, Depth, Q) || - isKnownNonZero(X, DemandedElts, Depth, Q)) + if (isKnownNonZero(Y, DemandedElts, Q, Depth) || + isKnownNonZero(X, DemandedElts, Q, Depth)) return true; // If X and Y are both negative (as signed values) then their sum is not @@ -2485,7 +2485,7 @@ static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth, Value *Y) { // TODO: Move this case into isKnownNonEqual(). if (auto *C = dyn_cast(X)) - if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Depth, Q)) + if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Q, Depth)) return true; return ::isKnownNonEqual(X, Y, Depth, Q); @@ -2497,18 +2497,18 @@ static bool isNonZeroMul(const APInt &DemandedElts, unsigned Depth, // If X and Y are non-zero then so is X * Y as long as the multiplication // does not overflow. if (NSW || NUW) - return isKnownNonZero(X, DemandedElts, Depth, Q) && - isKnownNonZero(Y, DemandedElts, Depth, Q); + return isKnownNonZero(X, DemandedElts, Q, Depth) && + isKnownNonZero(Y, DemandedElts, Q, Depth); // If either X or Y is odd, then if the other is non-zero the result can't // be zero. KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q); if (XKnown.One[0]) - return isKnownNonZero(Y, DemandedElts, Depth, Q); + return isKnownNonZero(Y, DemandedElts, Q, Depth); KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q); if (YKnown.One[0]) - return XKnown.isNonZero() || isKnownNonZero(X, DemandedElts, Depth, Q); + return XKnown.isNonZero() || isKnownNonZero(X, DemandedElts, Q, Depth); // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is // non-zero, then X * Y is non-zero. We can find sX and sY by just taking @@ -2564,7 +2564,7 @@ static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts, // non-zero then at least one non-zero bit must remain. if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift) .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) && - isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q)) + isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth)) return true; return false; @@ -2613,7 +2613,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I, Type *FromTy = I->getOperand(0)->getType(); if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) && (BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0) - return isKnownNonZero(I->getOperand(0), Depth, Q); + return isKnownNonZero(I->getOperand(0), Q, Depth); } break; case Instruction::IntToPtr: // Note that we have to take special care to avoid looking through @@ -2622,7 +2622,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I, if (!isa(I->getType()) && Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <= Q.DL.getTypeSizeInBits(I->getType()).getFixedValue()) - return isKnownNonZero(I->getOperand(0), Depth, Q); + return isKnownNonZero(I->getOperand(0), Q, Depth); break; case Instruction::PtrToInt: // Similar to int2ptr above, we can look through ptr2int here if the cast @@ -2630,25 +2630,25 @@ static bool isKnownNonZeroFromOperator(const Operator *I, if (!isa(I->getType()) && Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <= Q.DL.getTypeSizeInBits(I->getType()).getFixedValue()) - return isKnownNonZero(I->getOperand(0), Depth, Q); + return isKnownNonZero(I->getOperand(0), Q, Depth); break; case Instruction::Sub: return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, I->getOperand(0), I->getOperand(1)); case Instruction::Or: // X | Y != 0 if X != 0 or Y != 0. - return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q) || - isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); + return isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth) || + isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth); case Instruction::SExt: case Instruction::ZExt: // ext X != 0 if X != 0. - return isKnownNonZero(I->getOperand(0), Depth, Q); + return isKnownNonZero(I->getOperand(0), Q, Depth); case Instruction::Shl: { // shl nsw/nuw can't remove any non-zero bits. const OverflowingBinaryOperator *BO = cast(I); if (Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO)) - return isKnownNonZero(I->getOperand(0), Depth, Q); + return isKnownNonZero(I->getOperand(0), Q, Depth); // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined // if the lowest bit is shifted off the end. @@ -2664,7 +2664,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I, // shr exact can only shift out zero bits. const PossiblyExactOperator *BO = cast(I); if (BO->isExact()) - return isKnownNonZero(I->getOperand(0), Depth, Q); + return isKnownNonZero(I->getOperand(0), Q, Depth); // shr X, Y != 0 if X is negative. Note that the value of the shift is not // defined if the sign bit is shifted off the end. @@ -2680,7 +2680,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I, // X / Y // div exact can only produce a zero if the dividend is zero. if (cast(I)->isExact()) - return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); + return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth); std::optional XUgeY; KnownBits XKnown = @@ -2730,7 +2730,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I, Value *Op; Op = IsTrueArm ? I->getOperand(1) : I->getOperand(2); // Op is trivially non-zero. - if (isKnownNonZero(Op, DemandedElts, Depth, Q)) + if (isKnownNonZero(Op, DemandedElts, Q, Depth)) return true; // The condition of the select dominates the true/false arm. Check if the @@ -2780,7 +2780,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I, } } // Finally recurse on the edge and check it directly. - return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ); + return isKnownNonZero(U.get(), DemandedElts, RecQ, NewDepth); }); } case Instruction::InsertElement: { @@ -2802,9 +2802,9 @@ static bool isKnownNonZeroFromOperator(const Operator *I, // Result is zero if Elt is non-zero and rest of the demanded elts in Vec // are non-zero. - return (SkipElt || isKnownNonZero(Elt, Depth, Q)) && + return (SkipElt || isKnownNonZero(Elt, Q, Depth)) && (DemandedVecElts.isZero() || - isKnownNonZero(Vec, DemandedVecElts, Depth, Q)); + isKnownNonZero(Vec, DemandedVecElts, Q, Depth)); } case Instruction::ExtractElement: if (const auto *EEI = dyn_cast(I)) { @@ -2816,7 +2816,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I, APInt DemandedVecElts = APInt::getAllOnes(NumElts); if (CIdx && CIdx->getValue().ult(NumElts)) DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); - return isKnownNonZero(Vec, DemandedVecElts, Depth, Q); + return isKnownNonZero(Vec, DemandedVecElts, Q, Depth); } } break; @@ -2831,12 +2831,12 @@ static bool isKnownNonZeroFromOperator(const Operator *I, break; // If demanded elements for both vecs are non-zero, the shuffle is non-zero. return (DemandedRHS.isZero() || - isKnownNonZero(Shuf->getOperand(1), DemandedRHS, Depth, Q)) && + isKnownNonZero(Shuf->getOperand(1), DemandedRHS, Q, Depth)) && (DemandedLHS.isZero() || - isKnownNonZero(Shuf->getOperand(0), DemandedLHS, Depth, Q)); + isKnownNonZero(Shuf->getOperand(0), DemandedLHS, Q, Depth)); } case Instruction::Freeze: - return isKnownNonZero(I->getOperand(0), Depth, Q) && + return isKnownNonZero(I->getOperand(0), Q, Depth) && isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, Depth); case Instruction::Load: { @@ -2886,7 +2886,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I, if (Call->isReturnNonNull()) return true; if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) - return isKnownNonZero(RP, Depth, Q); + return isKnownNonZero(RP, Q, Depth); } else { if (MDNode *Ranges = Q.IIQ.getMetadata(Call, LLVMContext::MD_range)) return rangeMetadataExcludesValue(Ranges, APInt::getZero(BitWidth)); @@ -2896,7 +2896,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I, return true; } if (const Value *RV = Call->getReturnedArgOperand()) - if (RV->getType() == I->getType() && isKnownNonZero(RV, Depth, Q)) + if (RV->getType() == I->getType() && isKnownNonZero(RV, Q, Depth)) return true; } @@ -2908,7 +2908,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I, case Intrinsic::bitreverse: case Intrinsic::bswap: case Intrinsic::ctpop: - return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); + return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth); // NB: We don't do usub_sat here as in any case we can prove its // non-zero, we will fold it to `sub nuw` in InstCombine. case Intrinsic::ssub_sat: @@ -2924,11 +2924,11 @@ static bool isKnownNonZeroFromOperator(const Operator *I, case Intrinsic::vector_reduce_umin: case Intrinsic::vector_reduce_smax: case Intrinsic::vector_reduce_smin: - return isKnownNonZero(II->getArgOperand(0), Depth, Q); + return isKnownNonZero(II->getArgOperand(0), Q, Depth); case Intrinsic::umax: case Intrinsic::uadd_sat: - return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) || - isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); + return isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth) || + isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth); case Intrinsic::smax: { // If either arg is strictly positive the result is non-zero. Otherwise // the result is non-zero if both ops are non-zero. @@ -2936,7 +2936,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I, const KnownBits &OpKnown) { if (!OpNonZero.has_value()) OpNonZero = OpKnown.isNonZero() || - isKnownNonZero(Op, DemandedElts, Depth, Q); + isKnownNonZero(Op, DemandedElts, Q, Depth); return *OpNonZero; }; // Avoid re-computing isKnownNonZero. @@ -2971,8 +2971,8 @@ static bool isKnownNonZeroFromOperator(const Operator *I, } [[fallthrough]]; case Intrinsic::umin: - return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q) && - isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q); + return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth) && + isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth); case Intrinsic::cttz: return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q) .Zero[0]; @@ -2983,12 +2983,12 @@ static bool isKnownNonZeroFromOperator(const Operator *I, case Intrinsic::fshl: // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0. if (II->getArgOperand(0) == II->getArgOperand(1)) - return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); + return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth); break; case Intrinsic::vscale: return true; case Intrinsic::experimental_get_vector_length: - return isKnownNonZero(I->getOperand(0), Depth, Q); + return isKnownNonZero(I->getOperand(0), Q, Depth); default: break; } @@ -3010,8 +3010,8 @@ static bool isKnownNonZeroFromOperator(const Operator *I, /// specified, perform context-sensitive analysis and return true if the /// pointer couldn't possibly be null at the specified instruction. /// Supports values with integer or pointer type and vectors of integers. -bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, - const SimplifyQuery &Q) { +bool isKnownNonZero(const Value *V, const APInt &DemandedElts, + const SimplifyQuery &Q, unsigned Depth) { Type *Ty = V->getType(); #ifndef NDEBUG @@ -3101,12 +3101,12 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, return false; } -bool llvm::isKnownNonZero(const Value *V, unsigned Depth, - const SimplifyQuery &Q) { +bool llvm::isKnownNonZero(const Value *V, const SimplifyQuery &Q, + unsigned Depth) { auto *FVTy = dyn_cast(V->getType()); APInt DemandedElts = FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); - return ::isKnownNonZero(V, DemandedElts, Depth, Q); + return ::isKnownNonZero(V, DemandedElts, Q, Depth); } /// If the pair of operators are the same invertible function, return the @@ -3253,7 +3253,7 @@ static bool isModifyingBinopOfNonZero(const Value *V1, const Value *V2, Op = BO->getOperand(0); else return false; - return isKnownNonZero(Op, Depth + 1, Q); + return isKnownNonZero(Op, Q, Depth + 1); } return false; } @@ -3266,7 +3266,7 @@ static bool isNonEqualMul(const Value *V1, const Value *V2, unsigned Depth, const APInt *C; return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) && (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) && - !C->isZero() && !C->isOne() && isKnownNonZero(V1, Depth + 1, Q); + !C->isZero() && !C->isOne() && isKnownNonZero(V1, Q, Depth + 1); } return false; } @@ -3279,7 +3279,7 @@ static bool isNonEqualShl(const Value *V1, const Value *V2, unsigned Depth, const APInt *C; return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) && (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) && - !C->isZero() && isKnownNonZero(V1, Depth + 1, Q); + !C->isZero() && isKnownNonZero(V1, Q, Depth + 1); } return false; } @@ -5032,6 +5032,19 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, break; } + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: + case Intrinsic::vector_reduce_fmaximum: + case Intrinsic::vector_reduce_fminimum: { + // reduce min/max will choose an element from one of the vector elements, + // so we can infer and class information that is common to all elements. + Known = computeKnownFPClass(II->getArgOperand(0), II->getFastMathFlags(), + InterestedClasses, Depth + 1, Q); + // Can only propagate sign if output is never NaN. + if (!Known.isKnownNeverNaN()) + Known.SignBit.reset(); + break; + } case Intrinsic::trunc: case Intrinsic::floor: case Intrinsic::ceil: diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 22dbb3198a9f17..e657872c382848 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2314,7 +2314,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, // Bail if the value is never zero. Use &Op = CountZeros->getOperandUse(0); - if (isKnownNonZero(Op, /*Depth=*/0, *DL)) + if (isKnownNonZero(Op, *DL)) return false; // The intrinsic will be sunk behind a compare against zero and branch. diff --git a/llvm/lib/CodeGen/MachineDebugify.cpp b/llvm/lib/CodeGen/MachineDebugify.cpp index c264e199cf4722..bffdd51bfbca76 100644 --- a/llvm/lib/CodeGen/MachineDebugify.cpp +++ b/llvm/lib/CodeGen/MachineDebugify.cpp @@ -65,6 +65,7 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI, // all the others. Function *DbgValF = M.getFunction("llvm.dbg.value"); DbgValueInst *EarliestDVI = nullptr; + DbgVariableRecord *EarliestDVR = nullptr; DenseMap Line2Var; DIExpression *Expr = nullptr; if (DbgValF) { @@ -80,6 +81,20 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI, Expr = DVI->getExpression(); } } + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { + if (!DVR.isDbgValue()) + continue; + unsigned Line = DVR.getDebugLoc().getLine(); + assert(Line != 0 && "debugify should not insert line 0 locations"); + Line2Var[Line] = DVR.getVariable(); + if (!EarliestDVR || Line < EarliestDVR->getDebugLoc().getLine()) + EarliestDVR = &DVR; + Expr = DVR.getExpression(); + } + } + } if (Line2Var.empty()) return true; @@ -109,7 +124,8 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI, // Find a suitable local variable for the DBG_VALUE. unsigned Line = MI.getDebugLoc().getLine(); if (!Line2Var.count(Line)) - Line = EarliestDVI->getDebugLoc().getLine(); + Line = EarliestDVI ? EarliestDVI->getDebugLoc().getLine() + : EarliestDVR->getDebugLoc().getLine(); DILocalVariable *LocalVar = Line2Var[Line]; assert(LocalVar && "No variable for current line?"); VarSet.insert(LocalVar); diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp index eb3894dbb3c254..cec50322bb9f90 100644 --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -343,7 +343,7 @@ template ErrorOr MCPseudoProbeDecoder::readUnencodedNumber() { if (Data + sizeof(T) > End) { return std::error_code(); } - T Val = endian::readNext(Data); + T Val = endian::readNext(Data); return ErrorOr(Val); } diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index a5abf63b010f7f..f9ba80bd99c857 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1135,9 +1135,9 @@ static T swapToHostOrder(const unsigned char *&D, llvm::endianness Orig) { using namespace support; if (Orig == llvm::endianness::little) - return endian::readNext(D); + return endian::readNext(D); else - return endian::readNext(D); + return endian::readNext(D); } static std::unique_ptr allocValueProfData(uint32_t TotalSize) { diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index a35366a106a322..8574a96a1b06fc 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -115,10 +115,9 @@ readBinaryIdsInternal(const MemoryBuffer &DataBuffer, uint64_t BILen = 0; if (Endian == llvm::endianness::little) - BILen = - endian::readNext(BI); + BILen = endian::readNext(BI); else - BILen = endian::readNext(BI); + BILen = endian::readNext(BI); if (BILen == 0) return make_error(instrprof_error::malformed, @@ -923,8 +922,7 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, // Read hash. if (D + sizeof(uint64_t) >= End) return data_type(); - uint64_t Hash = - endian::readNext(D); + uint64_t Hash = endian::readNext(D); // Initialize number of counters for GET_VERSION(FormatVersion) == 1. uint64_t CountsSize = N / sizeof(uint64_t) - 1; @@ -932,8 +930,7 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) { if (D + sizeof(uint64_t) > End) return data_type(); - CountsSize = - endian::readNext(D); + CountsSize = endian::readNext(D); } // Read counter values. if (D + CountsSize * sizeof(uint64_t) > End) @@ -943,15 +940,14 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, CounterBuffer.reserve(CountsSize); for (uint64_t J = 0; J < CountsSize; ++J) CounterBuffer.push_back( - endian::readNext(D)); + endian::readNext(D)); // Read bitmap bytes for GET_VERSION(FormatVersion) > 10. if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version10) { uint64_t BitmapBytes = 0; if (D + sizeof(uint64_t) > End) return data_type(); - BitmapBytes = - endian::readNext(D); + BitmapBytes = endian::readNext(D); // Read bitmap byte values. if (D + BitmapBytes * sizeof(uint8_t) > End) return data_type(); @@ -959,8 +955,7 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, BitmapByteBuffer.reserve(BitmapBytes); for (uint64_t J = 0; J < BitmapBytes; ++J) BitmapByteBuffer.push_back(static_cast( - endian::readNext( - D))); + endian::readNext(D))); } DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer), @@ -1256,8 +1251,7 @@ Error IndexedInstrProfReader::readHeader() { // memprof::MemProfVersion0 or the MemProf version number in // memprof::MemProfVersion1. const uint64_t FirstWord = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); memprof::IndexedVersion Version = memprof::Version0; if (FirstWord == memprof::Version1) { @@ -1282,17 +1276,15 @@ Error IndexedInstrProfReader::readHeader() { const uint64_t RecordTableOffset = Version == memprof::Version0 ? FirstWord - : support::endian::readNext(Ptr); + : support::endian::readNext( + Ptr); // The offset in the stream right before invoking // FrameTableGenerator.Emit. const uint64_t FramePayloadOffset = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); // The value returned from FrameTableGenerator.Emit. const uint64_t FrameTableOffset = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); // Read the schema. auto SchemaOr = memprof::readMemProfSchema(Ptr); @@ -1330,8 +1322,7 @@ Error IndexedInstrProfReader::readHeader() { const unsigned char *Ptr = Start + BinaryIdOffset; // Read binary ids size. BinaryIdsSize = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); if (BinaryIdsSize % sizeof(uint64_t)) return error(instrprof_error::bad_header); // Set the binary ids start. @@ -1348,8 +1339,7 @@ Error IndexedInstrProfReader::readHeader() { const unsigned char *Ptr = Start + VTableNamesOffset; CompressedVTableNamesLen = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); // Writer first writes the length of compressed string, and then the actual // content. @@ -1369,29 +1359,24 @@ Error IndexedInstrProfReader::readHeader() { if (Ptr + 2 * sizeof(uint64_t) > PtrEnd) return error(instrprof_error::truncated); const uint64_t NumTraces = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); TemporalProfTraceStreamSize = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); for (unsigned i = 0; i < NumTraces; i++) { // Expect at least two 64 bit fields: Weight and NumFunctions if (Ptr + 2 * sizeof(uint64_t) > PtrEnd) return error(instrprof_error::truncated); TemporalProfTraceTy Trace; Trace.Weight = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); const uint64_t NumFunctions = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); // Expect at least NumFunctions 64 bit fields if (Ptr + NumFunctions * sizeof(uint64_t) > PtrEnd) return error(instrprof_error::truncated); for (unsigned j = 0; j < NumFunctions; j++) { const uint64_t NameRef = - support::endian::readNext(Ptr); + support::endian::readNext(Ptr); Trace.FunctionNameRefs.push_back(NameRef); } TemporalProfTraces.push_back(std::move(Trace)); diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 97414505f1c134..8e0402dd16e680 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -144,14 +144,14 @@ static IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema, // Read the meminfo nodes. const uint64_t NumNodes = - endian::readNext(Ptr); + endian::readNext(Ptr); for (uint64_t I = 0; I < NumNodes; I++) { IndexedAllocationInfo Node; const uint64_t NumFrames = - endian::readNext(Ptr); + endian::readNext(Ptr); for (uint64_t J = 0; J < NumFrames; J++) { const FrameId Id = - endian::readNext(Ptr); + endian::readNext(Ptr); Node.CallStack.push_back(Id); } Node.CSId = hashCallStack(Node.CallStack); @@ -162,15 +162,15 @@ static IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema, // Read the callsite information. const uint64_t NumCtxs = - endian::readNext(Ptr); + endian::readNext(Ptr); for (uint64_t J = 0; J < NumCtxs; J++) { const uint64_t NumFrames = - endian::readNext(Ptr); + endian::readNext(Ptr); llvm::SmallVector Frames; Frames.reserve(NumFrames); for (uint64_t K = 0; K < NumFrames; K++) { const FrameId Id = - endian::readNext(Ptr); + endian::readNext(Ptr); Frames.push_back(Id); } Record.CallSites.push_back(Frames); @@ -188,11 +188,10 @@ static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, // Read the meminfo nodes. const uint64_t NumNodes = - endian::readNext(Ptr); + endian::readNext(Ptr); for (uint64_t I = 0; I < NumNodes; I++) { IndexedAllocationInfo Node; - Node.CSId = - endian::readNext(Ptr); + Node.CSId = endian::readNext(Ptr); Node.Info.deserialize(Schema, Ptr); Ptr += PortableMemInfoBlock::serializedSize(); Record.AllocSites.push_back(Node); @@ -200,10 +199,10 @@ static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, // Read the callsite information. const uint64_t NumCtxs = - endian::readNext(Ptr); + endian::readNext(Ptr); for (uint64_t J = 0; J < NumCtxs; J++) { CallStackId CSId = - endian::readNext(Ptr); + endian::readNext(Ptr); Record.CallSiteIds.push_back(CSId); } @@ -224,6 +223,24 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, llvm_unreachable("unsupported MemProf version"); } +MemProfRecord IndexedMemProfRecord::toMemProfRecord( + std::function(const CallStackId)> Callback) + const { + MemProfRecord Record; + + for (const memprof::IndexedAllocationInfo &IndexedAI : AllocSites) { + memprof::AllocationInfo AI; + AI.Info = IndexedAI.Info; + AI.CallStack = Callback(IndexedAI.CSId); + Record.AllocSites.push_back(AI); + } + + for (memprof::CallStackId CSId : CallSiteIds) + Record.CallSites.push_back(Callback(CSId)); + + return Record; +} + GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) { // Canonicalize the function name to drop suffixes such as ".llvm.". Note // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop @@ -245,7 +262,7 @@ Expected readMemProfSchema(const unsigned char *&Buffer) { const unsigned char *Ptr = Buffer; const uint64_t NumSchemaIds = - endian::readNext(Ptr); + endian::readNext(Ptr); if (NumSchemaIds > static_cast(Meta::Size)) { return make_error(instrprof_error::malformed, "memprof schema invalid"); @@ -254,7 +271,7 @@ Expected readMemProfSchema(const unsigned char *&Buffer) { MemProfSchema Result; for (size_t I = 0; I < NumSchemaIds; I++) { const uint64_t Tag = - endian::readNext(Ptr); + endian::readNext(Ptr); if (Tag >= static_cast(Meta::Size)) { return make_error(instrprof_error::malformed, "memprof schema invalid"); diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index 580867a9083fde..b4d2c6f043f6df 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -86,7 +86,7 @@ llvm::SmallVector readSegmentEntries(const char *Ptr) { using namespace support; const uint64_t NumItemsToRead = - endian::readNext(Ptr); + endian::readNext(Ptr); llvm::SmallVector Items; for (uint64_t I = 0; I < NumItemsToRead; I++) { Items.push_back(*reinterpret_cast( @@ -100,11 +100,11 @@ readMemInfoBlocks(const char *Ptr) { using namespace support; const uint64_t NumItemsToRead = - endian::readNext(Ptr); + endian::readNext(Ptr); llvm::SmallVector> Items; for (uint64_t I = 0; I < NumItemsToRead; I++) { const uint64_t Id = - endian::readNext(Ptr); + endian::readNext(Ptr); const MemInfoBlock MIB = *reinterpret_cast(Ptr); Items.push_back({Id, MIB}); // Only increment by size of MIB since readNext implicitly increments. @@ -117,20 +117,20 @@ CallStackMap readStackInfo(const char *Ptr) { using namespace support; const uint64_t NumItemsToRead = - endian::readNext(Ptr); + endian::readNext(Ptr); CallStackMap Items; for (uint64_t I = 0; I < NumItemsToRead; I++) { const uint64_t StackId = - endian::readNext(Ptr); + endian::readNext(Ptr); const uint64_t NumPCs = - endian::readNext(Ptr); + endian::readNext(Ptr); SmallVector CallStack; CallStack.reserve(NumPCs); for (uint64_t J = 0; J < NumPCs; J++) { CallStack.push_back( - endian::readNext(Ptr)); + endian::readNext(Ptr)); } Items[StackId] = CallStack; @@ -183,6 +183,28 @@ std::string getBuildIdString(const SegmentEntry &Entry) { } } // namespace +MemProfReader::MemProfReader( + llvm::DenseMap FrameIdMap, + llvm::MapVector ProfData) + : IdToFrame(std::move(FrameIdMap)), + FunctionProfileData(std::move(ProfData)) { + // Populate CSId in each IndexedAllocationInfo and IndexedMemProfRecord + // while storing CallStack in CSIdToCallStack. + for (auto &KV : FunctionProfileData) { + IndexedMemProfRecord &Record = KV.second; + for (auto &AS : Record.AllocSites) { + CallStackId CSId = hashCallStack(AS.CallStack); + AS.CSId = CSId; + CSIdToCallStack.insert({CSId, AS.CallStack}); + } + for (auto &CS : Record.CallSites) { + CallStackId CSId = hashCallStack(CS); + Record.CallSiteIds.push_back(CSId); + CSIdToCallStack.insert({CSId, CS}); + } + } +} + Expected> RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, bool KeepName) { @@ -445,6 +467,7 @@ Error RawMemProfReader::mapRawProfileToRecords() { } CallStackId CSId = hashCallStack(Callstack); + CSIdToCallStack.insert({CSId, Callstack}); // We attach the memprof record to each function bottom-up including the // first non-inline frame. @@ -467,7 +490,10 @@ Error RawMemProfReader::mapRawProfileToRecords() { auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()}); IndexedMemProfRecord &Record = Result.first->second; for (LocationPtr Loc : Locs) { + CallStackId CSId = hashCallStack(*Loc); + CSIdToCallStack.insert({CSId, *Loc}); Record.CallSites.push_back(*Loc); + Record.CallSiteIds.push_back(CSId); } } diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 98d0aa794529c5..f91a0e6177ea02 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -503,7 +503,7 @@ ErrorOr SampleProfileReaderBinary::readUnencodedNumber() { } using namespace support; - T Val = endian::readNext(Data); + T Val = endian::readNext(Data); return Val; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index f283af6fa07d3e..db69d50799e70b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -59,6 +59,12 @@ unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) { AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { + // Always lower memset, memcpy, and memmove intrinsics to load/store + // instructions, rather then generating calls to memset, mempcy or memmove. + MaxStoresPerMemset = MaxStoresPerMemsetOptSize = ~0U; + MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = ~0U; + MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = ~0U; + // Lower floating point store/load to integer store/load to reduce the number // of patterns in tablegen. setOperationAction(ISD::LOAD, MVT::f32, Promote); diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 173995f05b51cc..d93709ac03420e 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -326,8 +326,8 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB, .setMemRefs(MMOLo); if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) { - // FIXME: Zdinx RV32 can not work on unaligned memory. - assert(!STI->hasFastUnalignedAccess()); + // FIXME: Zdinx RV32 can not work on unaligned scalar memory. + assert(!STI->enableUnalignedScalarMem()); assert(MBBI->getOperand(2).getOffset() % 8 == 0); MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4); diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 59962216e0c041..561187c39a4a04 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1183,10 +1183,15 @@ def FeatureTrailingSeqCstFence : SubtargetFeature<"seq-cst-trailing-fence", "true", "Enable trailing fence for seq-cst store.">; -def FeatureFastUnalignedAccess - : SubtargetFeature<"fast-unaligned-access", "HasFastUnalignedAccess", - "true", "Has reasonably performant unaligned " - "loads and stores (both scalar and vector)">; +def FeatureUnalignedScalarMem + : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem", + "true", "Has reasonably performant unaligned scalar " + "loads and stores">; + +def FeatureUnalignedVectorMem + : SubtargetFeature<"unaligned-vector-mem", "EnableUnalignedVectorMem", + "true", "Has reasonably performant unaligned vector " + "loads and stores">; def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", "UsePostRAScheduler", "true", "Schedule again after register allocation">; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 71672ed7b4ae7f..cb41577c5d9435 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -435,6 +435,33 @@ void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF, Flag, getStackAlign()); } +static void appendScalableVectorExpression(const TargetRegisterInfo &TRI, + SmallVectorImpl &Expr, + int FixedOffset, int ScalableOffset, + llvm::raw_string_ostream &Comment) { + unsigned DwarfVLenB = TRI.getDwarfRegNum(RISCV::VLENB, true); + uint8_t Buffer[16]; + if (FixedOffset) { + Expr.push_back(dwarf::DW_OP_consts); + Expr.append(Buffer, Buffer + encodeSLEB128(FixedOffset, Buffer)); + Expr.push_back((uint8_t)dwarf::DW_OP_plus); + Comment << (FixedOffset < 0 ? " - " : " + ") << std::abs(FixedOffset); + } + + Expr.push_back((uint8_t)dwarf::DW_OP_consts); + Expr.append(Buffer, Buffer + encodeSLEB128(ScalableOffset, Buffer)); + + Expr.push_back((uint8_t)dwarf::DW_OP_bregx); + Expr.append(Buffer, Buffer + encodeULEB128(DwarfVLenB, Buffer)); + Expr.push_back(0); + + Expr.push_back((uint8_t)dwarf::DW_OP_mul); + Expr.push_back((uint8_t)dwarf::DW_OP_plus); + + Comment << (ScalableOffset < 0 ? " - " : " + ") << std::abs(ScalableOffset) + << " * vlenb"; +} + static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, Register Reg, uint64_t FixedOffset, @@ -452,30 +479,38 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, else Comment << printReg(Reg, &TRI); - uint8_t buffer[16]; - if (FixedOffset) { - Expr.push_back(dwarf::DW_OP_consts); - Expr.append(buffer, buffer + encodeSLEB128(FixedOffset, buffer)); - Expr.push_back((uint8_t)dwarf::DW_OP_plus); - Comment << " + " << FixedOffset; - } + appendScalableVectorExpression(TRI, Expr, FixedOffset, ScalableOffset, + Comment); - Expr.push_back((uint8_t)dwarf::DW_OP_consts); - Expr.append(buffer, buffer + encodeSLEB128(ScalableOffset, buffer)); + SmallString<64> DefCfaExpr; + uint8_t Buffer[16]; + DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); + DefCfaExpr.append(Buffer, Buffer + encodeULEB128(Expr.size(), Buffer)); + DefCfaExpr.append(Expr.str()); - unsigned DwarfVlenb = TRI.getDwarfRegNum(RISCV::VLENB, true); - Expr.push_back((uint8_t)dwarf::DW_OP_bregx); - Expr.append(buffer, buffer + encodeULEB128(DwarfVlenb, buffer)); - Expr.push_back(0); + return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(), + Comment.str()); +} - Expr.push_back((uint8_t)dwarf::DW_OP_mul); - Expr.push_back((uint8_t)dwarf::DW_OP_plus); +static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI, + Register Reg, uint64_t FixedOffset, + uint64_t ScalableOffset) { + assert(ScalableOffset != 0 && "Did not need to adjust CFA for RVV"); + SmallString<64> Expr; + std::string CommentBuffer; + llvm::raw_string_ostream Comment(CommentBuffer); + Comment << printReg(Reg, &TRI) << " @ cfa"; - Comment << " + " << ScalableOffset << " * vlenb"; + // Build up the expression (FixedOffset + ScalableOffset * VLENB). + appendScalableVectorExpression(TRI, Expr, FixedOffset, ScalableOffset, + Comment); SmallString<64> DefCfaExpr; - DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); - DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer)); + uint8_t Buffer[16]; + unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); + DefCfaExpr.push_back(dwarf::DW_CFA_expression); + DefCfaExpr.append(Buffer, Buffer + encodeULEB128(DwarfReg, Buffer)); + DefCfaExpr.append(Buffer, Buffer + encodeULEB128(Expr.size(), Buffer)); DefCfaExpr.append(Expr.str()); return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(), @@ -671,6 +706,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); } + + std::advance(MBBI, getRVVCalleeSavedInfo(MF, CSI).size()); + emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF)); } if (hasFP(MF)) { @@ -1492,6 +1530,41 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( return true; } +void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const { + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + RISCVMachineFunctionInfo *RVFI = MF->getInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + DebugLoc DL = MBB.findDebugLoc(MI); + + const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, MFI.getCalleeSavedInfo()); + if (RVVCSI.empty()) + return; + + uint64_t FixedSize = getStackSizeWithRVVPadding(*MF); + if (!HasFP) { + uint64_t ScalarLocalVarSize = + MFI.getStackSize() - RVFI->getCalleeSavedStackSize() - + RVFI->getRVPushStackSize() - RVFI->getVarArgsSaveSize() + + RVFI->getRVVPadding(); + FixedSize -= ScalarLocalVarSize; + } + + for (auto &CS : RVVCSI) { + // Insert the spill to the stack frame. + int FI = CS.getFrameIdx(); + if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector) { + unsigned CFIIndex = MF->addFrameInst( + createDefCFAOffset(*STI.getRegisterInfo(), CS.getReg(), -FixedSize, + MFI.getObjectOffset(FI) / 8)); + BuildMI(MBB, MI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } + } +} + bool RISCVFrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h index 210f8c1064724a..28ab4aff3b9d51 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -88,6 +88,9 @@ class RISCVFrameLowering : public TargetFrameLowering { void adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount, MachineInstr::MIFlag Flag) const; + void emitCalleeSavedRVVPrologCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + bool HasFP) const; std::pair assignRVVStackObjectOffsets(MachineFunction &MF) const; }; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 765838aafb58d2..b0deb1d2669952 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1924,7 +1924,7 @@ bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, // replace. If we don't support unaligned scalar mem, prefer the constant // pool. // TODO: Can the caller pass down the alignment? - if (!Subtarget.hasFastUnalignedAccess()) + if (!Subtarget.enableUnalignedScalarMem()) return true; // Prefer to keep the load if it would require many instructions. @@ -13408,7 +13408,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, if (VT != Subtarget.getXLenVT()) return SDValue(); - if (!Subtarget.hasStdExtZba()) + if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXTHeadBa()) return SDValue(); ConstantSDNode *CNode = dyn_cast(N->getOperand(1)); @@ -13416,6 +13416,12 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, return SDValue(); uint64_t MulAmt = CNode->getZExtValue(); + // 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C) + // Matched in tablegen, avoid perturbing patterns. + for (uint64_t Divisor : {3, 5, 9}) + if (MulAmt % Divisor == 0 && isPowerOf2_64(MulAmt / Divisor)) + return SDValue(); + // If this is a power 2 + 2/4/8, we can use a shift followed by a single // shXadd. First check if this a sum of two power of 2s because that's // easy. Then count how many zeros are up to the first bit. @@ -13431,6 +13437,43 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2); } } + + // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x) + // Matched in tablegen, avoid perturbing patterns. + switch (MulAmt) { + case 11: + case 13: + case 19: + case 21: + case 25: + case 27: + case 29: + case 37: + case 41: + case 45: + case 73: + case 91: + return SDValue(); + default: + break; + } + + // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X)) + if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) { + unsigned ScaleShift = llvm::countr_zero(MulAmt - 1); + if (ScaleShift >= 1 && ScaleShift < 4) { + unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2))); + SDLoc DL(N); + SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(ShiftAmt, DL, VT)); + SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(ScaleShift, DL, VT)); + return DAG.getNode( + ISD::ADD, DL, VT, Shift1, + DAG.getNode(ISD::ADD, DL, VT, Shift2, N->getOperand(0))); + } + } + return SDValue(); } @@ -15794,7 +15837,7 @@ static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, if (WiderElementSize > ST.getELen()/8) return false; - if (!ST.hasFastUnalignedAccess() && BaseAlign < WiderElementSize) + if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize) return false; for (unsigned i = 0; i < Index->getNumOperands(); i++) { @@ -20620,8 +20663,8 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( unsigned *Fast) const { if (!VT.isVector()) { if (Fast) - *Fast = Subtarget.hasFastUnalignedAccess(); - return Subtarget.hasFastUnalignedAccess(); + *Fast = Subtarget.enableUnalignedScalarMem(); + return Subtarget.enableUnalignedScalarMem(); } // All vector implementations must support element alignment @@ -20637,8 +20680,8 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( // misaligned accesses. TODO: Work through the codegen implications of // allowing such accesses to be formed, and considered fast. if (Fast) - *Fast = Subtarget.hasFastUnalignedAccess(); - return Subtarget.hasFastUnalignedAccess(); + *Fast = Subtarget.enableUnalignedVectorMem(); + return Subtarget.enableUnalignedVectorMem(); } @@ -20673,7 +20716,7 @@ EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op, // Do we have sufficient alignment for our preferred VT? If not, revert // to largest size allowed by our alignment criteria. - if (PreferredVT != MVT::i8 && !Subtarget.hasFastUnalignedAccess()) { + if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) { Align RequiredAlign(PreferredVT.getStoreSize()); if (Op.isFixedDstAlign()) RequiredAlign = std::min(RequiredAlign, Op.getDstAlign()); @@ -20865,7 +20908,7 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, if (!isLegalElementTypeForRVV(ScalarType)) return false; - if (!Subtarget.hasFastUnalignedAccess() && + if (!Subtarget.enableUnalignedVectorMem() && Alignment < ScalarType.getStoreSize()) return false; diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index a14f9a28354737..a54a1148cf28b9 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1522,9 +1522,7 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI, } } - if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) - return false; - + assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm()); auto PriorVType = PrevMI.getOperand(2).getImm(); auto VType = MI.getOperand(2).getImm(); return areCompatibleVTYPEs(PriorVType, VType, Used); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 668062c8d33f6f..8331fc0b8c3024 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -361,15 +361,12 @@ void RISCVInstrInfo::copyPhysRegVector( return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V, RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1}; }; - auto FindRegWithEncoding = [&TRI](const TargetRegisterClass &RegClass, - uint16_t Encoding) { - ArrayRef Regs = RegClass.getRegisters(); - const auto *FoundReg = llvm::find_if(Regs, [&](MCPhysReg Reg) { - return TRI->getEncodingValue(Reg) == Encoding; - }); - // We should be always able to find one valid register. - assert(FoundReg != Regs.end()); - return *FoundReg; + auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass, + uint16_t Encoding) { + MCRegister Reg = RISCV::V0 + Encoding; + if (&RegClass == &RISCV::VRRegClass) + return Reg; + return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass); }; while (I != NumRegs) { // For non-segment copying, we only do this once as the registers are always @@ -3016,7 +3013,7 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) #define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \ - CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E16) \ + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \ CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \ CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64) diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 739b50749e1323..f9a557e02bfe1a 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -257,7 +257,8 @@ def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", SiFiveP400Model, FeatureStdExtZbb, FeatureStdExtZbs, FeatureStdExtZfhmin, - FeatureFastUnalignedAccess], + FeatureUnalignedScalarMem, + FeatureUnalignedVectorMem], [TuneNoDefaultUnroll, TuneConditionalCompressedMoveFusion, TuneLUIADDIFusion, @@ -295,7 +296,8 @@ def SIFIVE_P670 : RISCVProcessorModel<"sifive-p670", SiFiveP600Model, FeatureStdExtZvkng, FeatureStdExtZvksc, FeatureStdExtZvksg, - FeatureFastUnalignedAccess], + FeatureUnalignedScalarMem, + FeatureUnalignedVectorMem], [TuneNoDefaultUnroll, TuneConditionalCompressedMoveFusion, TuneLUIADDIFusion, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index e0c0e6517b6f1f..2f9281ab892447 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -228,7 +228,7 @@ class RISCVTTIImpl : public BasicTTIImplBase { return false; EVT ElemType = DataTypeVT.getScalarType(); - if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize()) + if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize()) return false; return TLI->isLegalElementTypeForRVV(ElemType); @@ -253,7 +253,7 @@ class RISCVTTIImpl : public BasicTTIImplBase { return false; EVT ElemType = DataTypeVT.getScalarType(); - if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize()) + if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize()) return false; return TLI->isLegalElementTypeForRVV(ElemType); diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td index 5e792427cca282..4d68f93efeac17 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.td +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td @@ -693,38 +693,38 @@ let DecoderNamespace = "SparcV8", Predicates = [HasNoV9] in { } let rd = 0 in { - let Defs = [CPSR] in { - def STCSRrr : F3_1<3, 0b110101, (outs (MEMrr $rs1, $rs2):$addr), (ins), + let mayStore = 1, Uses = [CPSR] in { + def STCSRrr : F3_1<3, 0b110101, (outs), (ins (MEMrr $rs1, $rs2):$addr), "st %csr, [$addr]", [], IIC_st>; - def STCSRri : F3_2<3, 0b110101, (outs (MEMri $rs1, $simm13):$addr), (ins), + def STCSRri : F3_2<3, 0b110101, (outs), (ins (MEMri $rs1, $simm13):$addr), "st %csr, [$addr]", [], IIC_st>; } - let Defs = [CPQ] in { - def STDCQrr : F3_1<3, 0b110110, (outs (MEMrr $rs1, $rs2):$addr), (ins), + let mayStore = 1, Uses = [CPQ] in { + def STDCQrr : F3_1<3, 0b110110, (outs), (ins (MEMrr $rs1, $rs2):$addr), "std %cq, [$addr]", [], IIC_std>; - def STDCQri : F3_2<3, 0b110110, (outs (MEMri $rs1, $simm13):$addr), (ins), + def STDCQri : F3_2<3, 0b110110, (outs), (ins (MEMri $rs1, $simm13):$addr), "std %cq, [$addr]", [], IIC_std>; } } let rd = 0 in { - let Defs = [FSR] in { - def STFSRrr : F3_1<3, 0b100101, (outs (MEMrr $rs1, $rs2):$addr), (ins), + let mayStore = 1, Uses = [FSR] in { + def STFSRrr : F3_1<3, 0b100101, (outs), (ins (MEMrr $rs1, $rs2):$addr), "st %fsr, [$addr]", [], IIC_st>; - def STFSRri : F3_2<3, 0b100101, (outs (MEMri $rs1, $simm13):$addr), (ins), + def STFSRri : F3_2<3, 0b100101, (outs), (ins (MEMri $rs1, $simm13):$addr), "st %fsr, [$addr]", [], IIC_st>; } - let Defs = [FQ] in { - def STDFQrr : F3_1<3, 0b100110, (outs (MEMrr $rs1, $rs2):$addr), (ins), + let mayStore = 1, Defs = [FQ] in { + def STDFQrr : F3_1<3, 0b100110, (outs), (ins (MEMrr $rs1, $rs2):$addr), "std %fq, [$addr]", [], IIC_std>; - def STDFQri : F3_2<3, 0b100110, (outs (MEMri $rs1, $simm13):$addr), (ins), + def STDFQri : F3_2<3, 0b100110, (outs), (ins (MEMri $rs1, $simm13):$addr), "std %fq, [$addr]", [], IIC_std>; } } -let rd = 1, Defs = [FSR] in { - def STXFSRrr : F3_1<3, 0b100101, (outs (MEMrr $rs1, $rs2):$addr), (ins), +let rd = 1, mayStore = 1, Uses = [FSR] in { + def STXFSRrr : F3_1<3, 0b100101, (outs), (ins (MEMrr $rs1, $rs2):$addr), "stx %fsr, [$addr]", []>, Requires<[HasV9]>; - def STXFSRri : F3_2<3, 0b100101, (outs (MEMri $rs1, $simm13):$addr), (ins), + def STXFSRri : F3_2<3, 0b100101, (outs), (ins (MEMri $rs1, $simm13):$addr), "stx %fsr, [$addr]", []>, Requires<[HasV9]>; } diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp index a7fe329b064ee1..8ddc742004292b 100644 --- a/llvm/lib/Target/TargetMachine.cpp +++ b/llvm/lib/Target/TargetMachine.cpp @@ -43,6 +43,12 @@ bool TargetMachine::isLargeGlobalValue(const GlobalValue *GVal) const { if (getTargetTriple().getArch() != Triple::x86_64) return false; + // Remaining logic below is ELF-specific. For other object file formats where + // the large code model is mostly used for JIT compilation, just look at the + // code model. + if (!getTargetTriple().isOSBinFormatELF()) + return getCodeModel() == CodeModel::Large; + auto *GO = GVal->getAliaseeObject(); // Be conservative if we can't find an underlying GlobalObject. @@ -51,9 +57,20 @@ bool TargetMachine::isLargeGlobalValue(const GlobalValue *GVal) const { auto *GV = dyn_cast(GO); + auto IsPrefix = [](StringRef Name, StringRef Prefix) { + return Name.consume_front(Prefix) && (Name.empty() || Name[0] == '.'); + }; + // Functions/GlobalIFuncs are only large under the large code model. - if (!GV) + if (!GV) { + // Handle explicit sections as we do for GlobalVariables with an explicit + // section, see comments below. + if (GO->hasSection()) { + StringRef Name = GO->getSection(); + return IsPrefix(Name, ".ltext"); + } return getCodeModel() == CodeModel::Large; + } if (GV->isThreadLocal()) return false; @@ -73,11 +90,8 @@ bool TargetMachine::isLargeGlobalValue(const GlobalValue *GVal) const { // data sections. The code model attribute overrides this above. if (GV->hasSection()) { StringRef Name = GV->getSection(); - auto IsPrefix = [&](StringRef Prefix) { - StringRef S = Name; - return S.consume_front(Prefix) && (S.empty() || S[0] == '.'); - }; - return IsPrefix(".lbss") || IsPrefix(".ldata") || IsPrefix(".lrodata"); + return IsPrefix(Name, ".lbss") || IsPrefix(Name, ".ldata") || + IsPrefix(Name, ".lrodata"); } // Respect large data threshold for medium and large code models. diff --git a/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/llvm/lib/Target/X86/X86InstrCMovSetCC.td index 27a0c889a4da3e..e27aa4115990e9 100644 --- a/llvm/lib/Target/X86/X86InstrCMovSetCC.td +++ b/llvm/lib/Target/X86/X86InstrCMovSetCC.td @@ -58,8 +58,8 @@ let SchedRW = [WriteCMOV.Folded, WriteCMOV.ReadAfterFold] in { } let SchedRW = [WriteCMOV, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], Predicates = [HasCMOV, HasCF, In64BitMode], mayStore = 1 in - def mr : ITy<0x40, MRMDestMemCC, t, (outs t.MemOperand:$dst), - (ins t.RegClass:$src1, ccode:$cond), + def mr : ITy<0x40, MRMDestMemCC, t, (outs), + (ins t.MemOperand:$dst, t.RegClass:$src1, ccode:$cond), "cfcmov${cond}", unaryop_ndd_args, []>, UseEFLAGS, NF; } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index f27d8d64a10404..41b66aafe7d343 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -2453,7 +2453,7 @@ bool AANonNull::isImpliedByIR(Attributor &A, const IRPosition &IRP, if (llvm::any_of(Worklist, [&](AA::ValueAndContext VAC) { return !isKnownNonZero( - VAC.getValue(), /*Depth=*/0, + VAC.getValue(), SimplifyQuery(A.getDataLayout(), DT, AC, VAC.getCtxI())); })) return false; diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 14612b251d1a42..7ebf265e17ba1f 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1175,7 +1175,7 @@ static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, Value *RetVal = FlowsToReturn[i]; // If this value is locally known to be non-null, we're good - if (isKnownNonZero(RetVal, /*Depth=*/0, DL)) + if (isKnownNonZero(RetVal, DL)) continue; // Otherwise, we need to look upwards since we can't make any local diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 07c50d866544b3..c59b867b10e7d1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -988,7 +988,7 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { if (C->isOne()) { if (match(Op0, m_ZExt(m_Add(m_Value(X), m_AllOnes())))) { const SimplifyQuery Q = SQ.getWithInstruction(&Add); - if (llvm::isKnownNonZero(X, /*Depth=*/0, Q)) + if (llvm::isKnownNonZero(X, Q)) return new ZExtInst(X, Ty); } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 2c0c4ee46e8098..d311690be64f16 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1039,9 +1039,9 @@ static Value *foldUnsignedUnderflowCheck(ICmpInst *ZeroICmp, match(ZeroCmpOp, m_c_Add(m_Specific(A), m_Value(B))) && (ZeroICmp->hasOneUse() || UnsignedICmp->hasOneUse())) { auto GetKnownNonZeroAndOther = [&](Value *&NonZero, Value *&Other) { - if (!isKnownNonZero(NonZero, /*Depth=*/0, Q)) + if (!isKnownNonZero(NonZero, Q)) std::swap(NonZero, Other); - return isKnownNonZero(NonZero, /*Depth=*/0, Q); + return isKnownNonZero(NonZero, Q); }; // Given ZeroCmpOp = (A + B) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index ba5db854647a42..60e4be883f513b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -601,8 +601,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { // then change the 'ZeroIsPoison' parameter to 'true' // because we know the zero behavior can't affect the result. if (!Known.One.isZero() || - isKnownNonZero(Op0, /*Depth=*/0, - IC.getSimplifyQuery().getWithInstruction(&II))) { + isKnownNonZero(Op0, IC.getSimplifyQuery().getWithInstruction(&II))) { if (!match(II.getArgOperand(1), m_One())) return IC.replaceOperand(II, 1, IC.Builder.getTrue()); } @@ -2067,8 +2066,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // See if we can deduce non-null. if (!CI.hasRetAttr(Attribute::NonNull) && (Known.isNonZero() || - isKnownNonZero(II, /*Depth=*/0, - getSimplifyQuery().getWithInstruction(II)))) { + isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) { CI.addRetAttr(Attribute::NonNull); Changed = true; } @@ -3664,8 +3662,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { for (Value *V : Call.args()) { if (V->getType()->isPointerTy() && !Call.paramHasAttr(ArgNo, Attribute::NonNull) && - isKnownNonZero(V, /*Depth=*/0, - getSimplifyQuery().getWithInstruction(&Call))) + isKnownNonZero(V, getSimplifyQuery().getWithInstruction(&Call))) ArgNos.push_back(ArgNo); ArgNo++; } @@ -3845,7 +3842,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { // isKnownNonNull -> nonnull attribute if (!GCR.hasRetAttr(Attribute::NonNull) && - isKnownNonZero(DerivedPtr, /*Depth=*/0, + isKnownNonZero(DerivedPtr, getSimplifyQuery().getWithInstruction(&Call))) { GCR.addRetAttr(Attribute::NonNull); // We discovered new fact, re-check users. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 437e9b92c7032f..d242d3f443def9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1977,11 +1977,25 @@ Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) { } Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) { - return commonCastTransforms(CI); + if (Instruction *R = commonCastTransforms(CI)) + return R; + if (!CI.hasNonNeg() && isKnownNonNegative(CI.getOperand(0), SQ)) { + CI.setNonNeg(); + return &CI; + } + return nullptr; } Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) { - return commonCastTransforms(CI); + if (Instruction *R = commonCastTransforms(CI)) + return R; + if (isKnownNonNegative(CI.getOperand(0), SQ)) { + auto UI = + CastInst::Create(Instruction::UIToFP, CI.getOperand(0), CI.getType()); + UI->setNonNeg(true); + return UI; + } + return nullptr; } Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index ee783eed190a7c..de909077017432 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1273,12 +1273,12 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) { // if X non-zero and NoOverflow(X * Y) // (icmp eq/ne Y) - if (!XKnown.One.isZero() || isKnownNonZero(X, /*Depth=*/0, Q)) + if (!XKnown.One.isZero() || isKnownNonZero(X, Q)) return new ICmpInst(Pred, Y, Cmp.getOperand(1)); // if Y non-zero and NoOverflow(X * Y) // (icmp eq/ne X) - if (!YKnown.One.isZero() || isKnownNonZero(Y, /*Depth=*/0, Q)) + if (!YKnown.One.isZero() || isKnownNonZero(Y, Q)) return new ICmpInst(Pred, X, Cmp.getOperand(1)); } // Note, we are skipping cases: @@ -3087,7 +3087,7 @@ Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp, // (X + -1) X <=u C (if X is never null) if (Pred == CmpInst::ICMP_ULT && C2->isAllOnes()) { const SimplifyQuery Q = SQ.getWithInstruction(&Cmp); - if (llvm::isKnownNonZero(X, /*Depth=*/0, Q)) + if (llvm::isKnownNonZero(X, Q)) return new ICmpInst(ICmpInst::ICMP_ULE, X, ConstantInt::get(Ty, C)); } @@ -4275,7 +4275,7 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst::Predicate Pred, Value *Op0, // Look for: x & ~Mask pred ~Mask if (isMaskOrZero(X, /*Not=*/true, Q)) { - return !ICmpInst::isSigned(Pred) || isKnownNonZero(X, /*Depth=*/0, Q); + return !ICmpInst::isSigned(Pred) || isKnownNonZero(X, Q); } return false; } @@ -4779,7 +4779,7 @@ static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q, // icmp (X ^ Y_NonZero) s>= X --> icmp (X ^ Y_NonZero) s> X // icmp (X ^ Y_NonZero) s<= X --> icmp (X ^ Y_NonZero) s< X CmpInst::Predicate PredOut = CmpInst::getStrictPredicate(Pred); - if (PredOut != Pred && isKnownNonZero(A, /*Depth=*/0, Q)) + if (PredOut != Pred && isKnownNonZero(A, Q)) return new ICmpInst(PredOut, Op0, Op1); return nullptr; @@ -5062,11 +5062,11 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, return new ICmpInst(Pred, C, D); // (A - B) u>=/u< A --> B u>/u<= A iff B != 0 if (A == Op1 && (Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_ULT) && - isKnownNonZero(B, /*Depth=*/0, Q)) + isKnownNonZero(B, Q)) return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), B, A); // C u<=/u> (C - D) --> C u= D iff B != 0 if (C == Op0 && (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) && - isKnownNonZero(D, /*Depth=*/0, Q)) + isKnownNonZero(D, Q)) return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), C, D); // icmp (A-B), (C-B) -> icmp A, C for equalities or if there is no overflow. @@ -5108,13 +5108,13 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, // X * Z eq/ne Y * Z -> X eq/ne Y if (ZKnown.countMaxTrailingZeros() == 0) return new ICmpInst(Pred, X, Y); - NonZero = !ZKnown.One.isZero() || isKnownNonZero(Z, /*Depth=*/0, Q); + NonZero = !ZKnown.One.isZero() || isKnownNonZero(Z, Q); // if Z != 0 and nsw(X * Z) and nsw(Y * Z) // X * Z eq/ne Y * Z -> X eq/ne Y if (NonZero && BO0 && BO1 && Op0HasNSW && Op1HasNSW) return new ICmpInst(Pred, X, Y); } else - NonZero = isKnownNonZero(Z, /*Depth=*/0, Q); + NonZero = isKnownNonZero(Z, Q); // If Z != 0 and nuw(X * Z) and nuw(Y * Z) // X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 48372381a0d1cd..7b86fcde8937ba 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -624,31 +624,38 @@ Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) { Value *Op1 = I.getOperand(1); Value *X, *Y; Constant *C; + BinaryOperator *Op0BinOp; // Reassociate constant RHS with another constant to form constant // expression. - if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP()) { + if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP() && + match(Op0, m_AllowReassoc(m_BinOp(Op0BinOp)))) { + // Everything in this scope folds I with Op0, intersecting their FMF. + FastMathFlags FMF = I.getFastMathFlags() & Op0BinOp->getFastMathFlags(); + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(FMF); Constant *C1; if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) { // (C1 / X) * C --> (C * C1) / X Constant *CC1 = ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL); if (CC1 && CC1->isNormalFP()) - return BinaryOperator::CreateFDivFMF(CC1, X, &I); + return BinaryOperator::CreateFDivFMF(CC1, X, FMF); } if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) { + // FIXME: This seems like it should also be checking for arcp // (X / C1) * C --> X * (C / C1) Constant *CDivC1 = ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C1, DL); if (CDivC1 && CDivC1->isNormalFP()) - return BinaryOperator::CreateFMulFMF(X, CDivC1, &I); + return BinaryOperator::CreateFMulFMF(X, CDivC1, FMF); // If the constant was a denormal, try reassociating differently. // (X / C1) * C --> X / (C1 / C) Constant *C1DivC = ConstantFoldBinaryOpOperands(Instruction::FDiv, C1, C, DL); if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP()) - return BinaryOperator::CreateFDivFMF(X, C1DivC, &I); + return BinaryOperator::CreateFDivFMF(X, C1DivC, FMF); } // We do not need to match 'fadd C, X' and 'fsub X, C' because they are @@ -658,26 +665,33 @@ Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) { // (X + C1) * C --> (X * C) + (C * C1) if (Constant *CC1 = ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) { - Value *XC = Builder.CreateFMulFMF(X, C, &I); - return BinaryOperator::CreateFAddFMF(XC, CC1, &I); + Value *XC = Builder.CreateFMul(X, C); + return BinaryOperator::CreateFAddFMF(XC, CC1, FMF); } } if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) { // (C1 - X) * C --> (C * C1) - (X * C) if (Constant *CC1 = ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) { - Value *XC = Builder.CreateFMulFMF(X, C, &I); - return BinaryOperator::CreateFSubFMF(CC1, XC, &I); + Value *XC = Builder.CreateFMul(X, C); + return BinaryOperator::CreateFSubFMF(CC1, XC, FMF); } } } Value *Z; if (match(&I, - m_c_FMul(m_OneUse(m_FDiv(m_Value(X), m_Value(Y))), m_Value(Z)))) { - // Sink division: (X / Y) * Z --> (X * Z) / Y - Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I); - return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I); + m_c_FMul(m_AllowReassoc(m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))), + m_Value(Z)))) { + BinaryOperator *DivOp = cast(((Z == Op0) ? Op1 : Op0)); + FastMathFlags FMF = I.getFastMathFlags() & DivOp->getFastMathFlags(); + if (FMF.allowReassoc()) { + // Sink division: (X / Y) * Z --> (X * Z) / Y + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(FMF); + auto *NewFMul = Builder.CreateFMul(X, Z); + return BinaryOperator::CreateFDivFMF(NewFMul, Y, FMF); + } } // sqrt(X) * sqrt(Y) -> sqrt(X * Y) diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index 9838e2aa9f3a24..52803e9bea451e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -1537,8 +1537,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) { Instruction *CtxI = PN.getIncomingBlock(I)->getTerminator(); Value *VA = PN.getIncomingValue(I); - if (isKnownNonZero(VA, 0, - getSimplifyQuery().getWithInstruction(CtxI))) { + if (isKnownNonZero(VA, getSimplifyQuery().getWithInstruction(CtxI))) { if (!NonZeroConst) NonZeroConst = getAnyNonZeroConstInt(PN); if (NonZeroConst != VA) { diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 4c00f2a0ea1761..5a144cc7378962 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1431,7 +1431,7 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign( if (OpsKnown[OpNo].hasKnownBits() && OpsKnown[OpNo].getKnownBits(SQ).isNonZero()) return true; - return isKnownNonZero(IntOps[OpNo], /*Depth=*/0, SQ); + return isKnownNonZero(IntOps[OpNo], SQ); }; auto IsNonNeg = [&](unsigned OpNo) -> bool { diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index a72b0ee9a08e01..ee3531bbd68df3 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1281,7 +1281,7 @@ struct MemorySanitizerVisitor : public InstVisitor { // ignored. return; } - if (llvm::isKnownNonZero(ConvertedShadow, /*Depth=*/0, DL)) { + if (llvm::isKnownNonZero(ConvertedShadow, DL)) { // Copy origin as the value is definitely uninitialized. paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize, OriginAlignment); @@ -1427,7 +1427,7 @@ struct MemorySanitizerVisitor : public InstVisitor { // Skip, value is initialized or const shadow is ignored. continue; } - if (llvm::isKnownNonZero(ConvertedShadow, /*Depth=*/0, DL)) { + if (llvm::isKnownNonZero(ConvertedShadow, DL)) { // Report as the value is definitely uninitialized. insertWarningFn(IRB, ShadowData.Origin); if (!MS.Recover) diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp index 200bad22148f06..fcc82eadac36cf 100644 --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -87,10 +87,6 @@ bool llvm::applyDebugifyMetadata( return false; } - bool NewDebugMode = M.IsNewDbgInfoFormat; - if (NewDebugMode) - M.convertFromNewDbgValues(); - DIBuilder DIB(M); LLVMContext &Ctx = M.getContext(); auto *Int32Ty = Type::getInt32Ty(Ctx); @@ -214,9 +210,6 @@ bool llvm::applyDebugifyMetadata( if (!M.getModuleFlag(DIVersionKey)) M.addModuleFlag(Module::Warning, DIVersionKey, DEBUG_METADATA_VERSION); - if (NewDebugMode) - M.convertToNewDbgValues(); - return true; } @@ -311,10 +304,6 @@ bool llvm::collectDebugInfoMetadata(Module &M, return false; } - bool NewDebugMode = M.IsNewDbgInfoFormat; - if (NewDebugMode) - M.convertFromNewDbgValues(); - uint64_t FunctionsCnt = DebugInfoBeforePass.DIFunctions.size(); // Visit each instruction. for (Function &F : Functions) { @@ -349,20 +338,23 @@ bool llvm::collectDebugInfoMetadata(Module &M, // Cllect dbg.values and dbg.declare. if (DebugifyLevel > Level::Locations) { - if (auto *DVI = dyn_cast(&I)) { + auto HandleDbgVariable = [&](auto *DbgVar) { if (!SP) - continue; + return; // Skip inlined variables. - if (I.getDebugLoc().getInlinedAt()) - continue; + if (DbgVar->getDebugLoc().getInlinedAt()) + return; // Skip undef values. - if (DVI->isKillLocation()) - continue; + if (DbgVar->isKillLocation()) + return; - auto *Var = DVI->getVariable(); + auto *Var = DbgVar->getVariable(); DebugInfoBeforePass.DIVariables[Var]++; - continue; - } + }; + for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) + HandleDbgVariable(&DVR); + if (auto *DVI = dyn_cast(&I)) + HandleDbgVariable(DVI); } // Skip debug instructions other than dbg.value and dbg.declare. @@ -379,9 +371,6 @@ bool llvm::collectDebugInfoMetadata(Module &M, } } - if (NewDebugMode) - M.convertToNewDbgValues(); - return true; } @@ -561,10 +550,6 @@ bool llvm::checkDebugInfoMetadata(Module &M, return false; } - bool NewDebugMode = M.IsNewDbgInfoFormat; - if (NewDebugMode) - M.convertFromNewDbgValues(); - // Map the debug info holding DIs after a pass. DebugInfoPerPass DebugInfoAfterPass; @@ -599,20 +584,23 @@ bool llvm::checkDebugInfoMetadata(Module &M, // Collect dbg.values and dbg.declares. if (DebugifyLevel > Level::Locations) { - if (auto *DVI = dyn_cast(&I)) { + auto HandleDbgVariable = [&](auto *DbgVar) { if (!SP) - continue; + return; // Skip inlined variables. - if (I.getDebugLoc().getInlinedAt()) - continue; + if (DbgVar->getDebugLoc().getInlinedAt()) + return; // Skip undef values. - if (DVI->isKillLocation()) - continue; + if (DbgVar->isKillLocation()) + return; - auto *Var = DVI->getVariable(); + auto *Var = DbgVar->getVariable(); DebugInfoAfterPass.DIVariables[Var]++; - continue; - } + }; + for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) + HandleDbgVariable(&DVR); + if (auto *DVI = dyn_cast(&I)) + HandleDbgVariable(DVI); } // Skip debug instructions other than dbg.value and dbg.declare. @@ -675,16 +663,14 @@ bool llvm::checkDebugInfoMetadata(Module &M, // the debugging information from the previous pass. DebugInfoBeforePass = DebugInfoAfterPass; - if (NewDebugMode) - M.convertToNewDbgValues(); - LLVM_DEBUG(dbgs() << "\n\n"); return Result; } namespace { -/// Return true if a mis-sized diagnostic is issued for \p DVI. -bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) { +/// Return true if a mis-sized diagnostic is issued for \p DbgVal. +template +bool diagnoseMisSizedDbgValue(Module &M, DbgValTy *DbgVal) { // The size of a dbg.value's value operand should match the size of the // variable it corresponds to. // @@ -693,22 +679,22 @@ bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) { // For now, don't try to interpret anything more complicated than an empty // DIExpression. Eventually we should try to handle OP_deref and fragments. - if (DVI->getExpression()->getNumElements()) + if (DbgVal->getExpression()->getNumElements()) return false; - Value *V = DVI->getVariableLocationOp(0); + Value *V = DbgVal->getVariableLocationOp(0); if (!V) return false; Type *Ty = V->getType(); uint64_t ValueOperandSize = getAllocSizeInBits(M, Ty); - std::optional DbgVarSize = DVI->getFragmentSizeInBits(); + std::optional DbgVarSize = DbgVal->getFragmentSizeInBits(); if (!ValueOperandSize || !DbgVarSize) return false; bool HasBadSize = false; if (Ty->isIntegerTy()) { - auto Signedness = DVI->getVariable()->getSignedness(); + auto Signedness = DbgVal->getVariable()->getSignedness(); if (Signedness && *Signedness == DIBasicType::Signedness::Signed) HasBadSize = ValueOperandSize < *DbgVarSize; } else { @@ -718,7 +704,7 @@ bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) { if (HasBadSize) { dbg() << "ERROR: dbg.value operand has size " << ValueOperandSize << ", but its variable has size " << *DbgVarSize << ": "; - DVI->print(dbg()); + DbgVal->print(dbg()); dbg() << "\n"; } return HasBadSize; @@ -735,10 +721,6 @@ bool checkDebugifyMetadata(Module &M, return false; } - bool NewDebugMode = M.IsNewDbgInfoFormat; - if (NewDebugMode) - M.convertFromNewDbgValues(); - auto getDebugifyOperand = [&](unsigned Idx) -> unsigned { return mdconst::extract(NMD->getOperand(Idx)->getOperand(0)) ->getZExtValue(); @@ -780,18 +762,23 @@ bool checkDebugifyMetadata(Module &M, } // Find missing variables and mis-sized debug values. - for (Instruction &I : instructions(F)) { - auto *DVI = dyn_cast(&I); - if (!DVI) - continue; - + auto CheckForMisSized = [&](auto *DbgVal) { unsigned Var = ~0U; - (void)to_integer(DVI->getVariable()->getName(), Var, 10); + (void)to_integer(DbgVal->getVariable()->getName(), Var, 10); assert(Var <= OriginalNumVars && "Unexpected name for DILocalVariable"); - bool HasBadSize = diagnoseMisSizedDbgValue(M, DVI); + bool HasBadSize = diagnoseMisSizedDbgValue(M, DbgVal); if (!HasBadSize) MissingVars.reset(Var - 1); HasErrors |= HasBadSize; + }; + for (Instruction &I : instructions(F)) { + for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) + if (DVR.isDbgValue() || DVR.isDbgAssign()) + CheckForMisSized(&DVR); + auto *DVI = dyn_cast(&I); + if (!DVI) + continue; + CheckForMisSized(DVI); } } @@ -820,9 +807,6 @@ bool checkDebugifyMetadata(Module &M, if (Strip) Ret = stripDebugifyMetadata(M); - if (NewDebugMode) - M.convertToNewDbgValues(); - return Ret; } @@ -1052,10 +1036,6 @@ FunctionPass *createCheckDebugifyFunctionPass( PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M, ModuleAnalysisManager &) { - bool NewDebugMode = M.IsNewDbgInfoFormat; - if (NewDebugMode) - M.convertFromNewDbgValues(); - if (Mode == DebugifyMode::SyntheticDebugInfo) checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass, "CheckModuleDebugify", Strip, StatsMap); @@ -1065,9 +1045,6 @@ PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M, "CheckModuleDebugify (original debuginfo)", NameOfWrappedPass, OrigDIVerifyBugsReportFilePath); - if (NewDebugMode) - M.convertToNewDbgValues(); - return PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index baec51a07fcbfc..a42ef0c4e6ae9e 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -1019,14 +1019,12 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ, const SmallPtrSetImpl &SuccPreds, BasicBlock *&CommonPred) { - // When Succ has no phis, BB may be merged into Succ directly. We don't need - // to redirect the predecessors of BB in this case. - if (Succ->phis().empty()) + // There must be phis in BB, otherwise BB will be merged into Succ directly + if (BB->phis().empty() || Succ->phis().empty()) return false; - // BB must have multiple different predecessors, so that at least one of - // predecessors can be redirected to Succ, except the common predecessor. - if (BB->getUniquePredecessor() || pred_empty(BB)) + // BB must have predecessors not shared that can be redirected to Succ + if (!BB->hasNPredecessorsOrMore(2)) return false; // Get single common predecessors of both BB and Succ diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 9d816c5220532c..73c5d636782294 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1034,6 +1034,15 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) { } } +Value *llvm::createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, + RecurKind RK, Value *Left, Value *Right) { + if (auto VTy = dyn_cast(Left->getType())) + StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal); + Value *Cmp = + Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp"); + return Builder.CreateSelect(Cmp, Left, Right, "rdx.select"); +} + Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right) { Type *Ty = Left->getType(); @@ -1142,13 +1151,16 @@ Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src, NewVal = SI->getTrueValue(); } + // Create a splat vector with the new value and compare this to the vector + // we want to reduce. + ElementCount EC = cast(Src->getType())->getElementCount(); + Value *Right = Builder.CreateVectorSplat(EC, InitVal); + Value *Cmp = + Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp"); + // If any predicate is true it means that we want to select the new value. - Value *AnyOf = - Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src; - // The compares in the loop may yield poison, which propagates through the - // bitwise ORs. Freeze it here before the condition is used. - AnyOf = Builder.CreateFreeze(AnyOf); - return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select"); + Cmp = Builder.CreateOrReduce(Cmp); + return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select"); } Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src, diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index f376b5f7d68d4a..40d0f6b75d69b0 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -459,7 +459,7 @@ static void convertMetadataToAssumes(LoadInst *LI, Value *Val, // we can only do this if the value is known non-poison. if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && LI->getMetadata(LLVMContext::MD_noundef) && - !isKnownNonZero(Val, /*Depth=*/0, SimplifyQuery(DL, DT, AC, LI))) + !isKnownNonZero(Val, SimplifyQuery(DL, DT, AC, LI))) addAssumeNonNull(AC, LI); } diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 7e9e91606fe22d..2e68a9c01898c8 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -305,7 +305,7 @@ static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef A if (ConstantInt *LenC = dyn_cast(Size)) { annotateNonNullNoUndefBasedOnAccess(CI, ArgNos); annotateDereferenceableBytes(CI, ArgNos, LenC->getZExtValue()); - } else if (isKnownNonZero(Size, /*Depth=*/0, DL)) { + } else if (isKnownNonZero(Size, DL)) { annotateNonNullNoUndefBasedOnAccess(CI, ArgNos); const APInt *X, *Y; uint64_t DerefMin = 1; @@ -394,7 +394,7 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) { Value *Size = CI->getArgOperand(2); uint64_t Len; annotateNonNullNoUndefBasedOnAccess(CI, 0); - if (isKnownNonZero(Size, /*Depth=*/0, DL)) + if (isKnownNonZero(Size, DL)) annotateNonNullNoUndefBasedOnAccess(CI, 1); // We don't do anything if length is not constant. @@ -613,7 +613,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) { if (Str1P == Str2P) // strncmp(x,x,n) -> 0 return ConstantInt::get(CI->getType(), 0); - if (isKnownNonZero(Size, /*Depth=*/0, DL)) + if (isKnownNonZero(Size, DL)) annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); // Get the length argument if it is constant. uint64_t Length; @@ -749,7 +749,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) { Value *LibCallSimplifier::optimizeStrLCpy(CallInst *CI, IRBuilderBase &B) { Value *Size = CI->getArgOperand(2); - if (isKnownNonZero(Size, /*Depth=*/0, DL)) + if (isKnownNonZero(Size, DL)) // Like snprintf, the function stores into the destination only when // the size argument is nonzero. annotateNonNullNoUndefBasedOnAccess(CI, 0); @@ -833,7 +833,7 @@ Value *LibCallSimplifier::optimizeStringNCpy(CallInst *CI, bool RetEnd, Value *Src = CI->getArgOperand(1); Value *Size = CI->getArgOperand(2); - if (isKnownNonZero(Size, /*Depth=*/0, DL)) { + if (isKnownNonZero(Size, DL)) { // Both st{p,r}ncpy(D, S, N) access the source and destination arrays // only when N is nonzero. annotateNonNullNoUndefBasedOnAccess(CI, 0); @@ -926,7 +926,7 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B, Type *CharTy = B.getIntNTy(CharSize); if (isOnlyUsedInZeroEqualityComparison(CI) && - (!Bound || isKnownNonZero(Bound, /*Depth=*/0, DL))) { + (!Bound || isKnownNonZero(Bound, DL))) { // Fold strlen: // strlen(x) != 0 --> *x != 0 // strlen(x) == 0 --> *x == 0 @@ -1047,7 +1047,7 @@ Value *LibCallSimplifier::optimizeStrNLen(CallInst *CI, IRBuilderBase &B) { if (Value *V = optimizeStringLength(CI, B, 8, Bound)) return V; - if (isKnownNonZero(Bound, /*Depth=*/0, DL)) + if (isKnownNonZero(Bound, DL)) annotateNonNullNoUndefBasedOnAccess(CI, 0); return nullptr; } @@ -1291,7 +1291,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { Value *SrcStr = CI->getArgOperand(0); Value *Size = CI->getArgOperand(2); - if (isKnownNonZero(Size, /*Depth=*/0, DL)) { + if (isKnownNonZero(Size, DL)) { annotateNonNullNoUndefBasedOnAccess(CI, 0); if (isOnlyUsedInEqualityComparison(CI, SrcStr)) return memChrToCharCompare(CI, Size, B, DL); @@ -2976,7 +2976,7 @@ Value *LibCallSimplifier::optimizeStrToInt(CallInst *CI, IRBuilderBase &B, // It would be readonly too, except that it still may write to errno. CI->addParamAttr(0, Attribute::NoCapture); EndPtr = nullptr; - } else if (!isKnownNonZero(EndPtr, /*Depth=*/0, DL)) + } else if (!isKnownNonZero(EndPtr, DL)) return nullptr; StringRef Str; @@ -3402,7 +3402,7 @@ Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilderBase &B) { return V; } - if (isKnownNonZero(CI->getOperand(1), /*Depth=*/0, DL)) + if (isKnownNonZero(CI->getOperand(1), DL)) annotateNonNullNoUndefBasedOnAccess(CI, 0); return nullptr; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index ece2a34f180cb4..ebca2d855a4676 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -68,7 +68,9 @@ class VPBuilder { public: VPBuilder() = default; VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); } - VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); } + VPBuilder(VPRecipeBase *InsertPt) { + setInsertPoint(InsertPt->getParent(), InsertPt->getIterator()); + } /// Clear the insertion point: created instructions will not be inserted into /// a block. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5535cc55e93216..44885a95bd1020 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3051,8 +3051,9 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue( } // Create phi nodes to merge from the backedge-taken check block. - PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val", - LoopScalarPreHeader->getFirstNonPHI()); + PHINode *BCResumeVal = + PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val", + LoopScalarPreHeader->getTerminator()->getIterator()); // Copy original phi DL over to the new one. BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc()); @@ -7450,6 +7451,7 @@ static void createAndCollectMergePhiForReduction( auto *PhiR = cast(RedResult->getOperand(0)); const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); + TrackingVH ReductionStartValue = RdxDesc.getRecurrenceStartValue(); Value *FinalValue = State.get(RedResult, VPIteration(State.UF - 1, VPLane::getFirstLane())); auto *ResumePhi = @@ -7474,7 +7476,7 @@ static void createAndCollectMergePhiForReduction( BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming), Incoming); else - BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming); + BCBlockPhi->addIncoming(ReductionStartValue, Incoming); } auto *OrigPhi = cast(PhiR->getUnderlyingValue()); @@ -7767,10 +7769,11 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton( // Now, compare the remaining count and if there aren't enough iterations to // execute the vectorized epilogue skip to the scalar part. - LoopVectorPreHeader->setName("vec.epilog.ph"); - BasicBlock *VecEpilogueIterationCountCheck = - SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->begin(), DT, LI, - nullptr, "vec.epilog.iter.check", true); + BasicBlock *VecEpilogueIterationCountCheck = LoopVectorPreHeader; + VecEpilogueIterationCountCheck->setName("vec.epilog.iter.check"); + LoopVectorPreHeader = + SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, + LI, nullptr, "vec.epilog.ph"); emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader, VecEpilogueIterationCountCheck); @@ -8893,10 +8896,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { // A ComputeReductionResult recipe is added to the middle block, also for // in-loop reductions which compute their result in-loop, because generating // the subsequent bc.merge.rdx phi is driven by ComputeReductionResult recipes. -// -// Adjust AnyOf reductions; replace the reduction phi for the selected value -// with a boolean reduction phi node to check if the condition is true in any -// iteration. The final value is selected by the final ComputeReductionResult. void LoopVectorizationPlanner::adjustRecipesForReductions( VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF) { @@ -9071,41 +9070,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( continue; const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - // Adjust AnyOf reductions; replace the reduction phi for the selected value - // with a boolean reduction phi node to check if the condition is true in - // any iteration. The final value is selected by the final - // ComputeReductionResult. - if (RecurrenceDescriptor::isAnyOfRecurrenceKind( - RdxDesc.getRecurrenceKind())) { - auto *Select = cast(*find_if(PhiR->users(), [](VPUser *U) { - return isa(U) || - (isa(U) && - cast(U)->getUnderlyingInstr()->getOpcode() == - Instruction::Select); - })); - VPValue *Cmp = Select->getOperand(0); - // If the compare is checking the reduction PHI node, adjust it to check - // the start value. - if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) { - for (unsigned I = 0; I != CmpR->getNumOperands(); ++I) - if (CmpR->getOperand(I) == PhiR) - CmpR->setOperand(I, PhiR->getStartValue()); - } - VPBuilder::InsertPointGuard Guard(Builder); - Builder.setInsertPoint(Select); - - // If the true value of the select is the reduction phi, the new value is - // selected if the negated condition is true in any iteration. - if (Select->getOperand(1) == PhiR) - Cmp = Builder.createNot(Cmp); - VPValue *Or = Builder.createOr(PhiR, Cmp); - Select->getVPSingleValue()->replaceAllUsesWith(Or); - - // Convert the reduction phi to operate on bools. - PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse( - OrigLoop->getHeader()->getContext()))); - } - // If tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the beginning of the // dedicated latch block. @@ -9138,9 +9102,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( // then extend the loop exit value to enable InstCombine to evaluate the // entire expression in the smaller type. Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType(); - if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() && - !RecurrenceDescriptor::isAnyOfRecurrenceKind( - RdxDesc.getRecurrenceKind())) { + if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); Type *RdxTy = RdxDesc.getRecurrenceType(); auto *Trunc = @@ -9722,7 +9684,7 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks, } // The scalar cost should only be 0 when vectorizing with a user specified VF/IC. In those cases, runtime checks should always be generated. - double ScalarC = *VF.ScalarCost.getValue(); + uint64_t ScalarC = *VF.ScalarCost.getValue(); if (ScalarC == 0) return true; @@ -9749,7 +9711,7 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks, // RtC + VecC * (TC / VF) + EpiC < ScalarC * TC // // Now we can compute the minimum required trip count TC as - // (RtC + EpiC) / (ScalarC - (VecC / VF)) < TC + // VF * (RtC + EpiC) / (ScalarC * VF - VecC) < TC // // For now we assume the epilogue cost EpiC = 0 for simplicity. Note that // the computations are performed on doubles, not integers and the result @@ -9761,9 +9723,9 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks, AssumedMinimumVscale = *VScale; IntVF *= AssumedMinimumVscale; } - double VecCOverVF = double(*VF.Cost.getValue()) / IntVF; - double RtC = *CheckCost.getValue(); - double MinTC1 = RtC / (ScalarC - VecCOverVF); + uint64_t RtC = *CheckCost.getValue(); + uint64_t Div = ScalarC * IntVF - *VF.Cost.getValue(); + uint64_t MinTC1 = Div == 0 ? 0 : divideCeil(RtC * IntVF, Div); // Second, compute a minimum iteration count so that the cost of the // runtime checks is only a fraction of the total scalar loop cost. This @@ -9772,12 +9734,12 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks, // * TC. To bound the runtime check to be a fraction 1/X of the scalar // cost, compute // RtC < ScalarC * TC * (1 / X) ==> RtC * X / ScalarC < TC - double MinTC2 = RtC * 10 / ScalarC; + uint64_t MinTC2 = divideCeil(RtC * 10, ScalarC); // Now pick the larger minimum. If it is not a multiple of VF and a scalar // epilogue is allowed, choose the next closest multiple of VF. This should // partly compensate for ignoring the epilogue cost. - uint64_t MinTC = std::ceil(std::max(MinTC1, MinTC2)); + uint64_t MinTC = std::max(MinTC1, MinTC2); if (SEL == CM_ScalarEpilogueAllowed) MinTC = alignTo(MinTC, IntVF); VF.MinProfitableTripCount = ElementCount::getFixed(MinTC); @@ -10181,19 +10143,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { Value *ResumeV = nullptr; // TODO: Move setting of resume values to prepareToExecute. if (auto *ReductionPhi = dyn_cast(&R)) { - const RecurrenceDescriptor &RdxDesc = - ReductionPhi->getRecurrenceDescriptor(); - RecurKind RK = RdxDesc.getRecurrenceKind(); - ResumeV = ReductionResumeValues.find(&RdxDesc)->second; - if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { - // VPReductionPHIRecipes for AnyOf reductions expect a boolean as - // start value; compare the final value from the main vector loop - // to the start value. - IRBuilder<> Builder( - cast(ResumeV)->getParent()->getFirstNonPHI()); - ResumeV = Builder.CreateICmpNE(ResumeV, - RdxDesc.getRecurrenceStartValue()); - } + ResumeV = ReductionResumeValues + .find(&ReductionPhi->getRecurrenceDescriptor()) + ->second; } else { // Create induction resume values for both widened pointer and // integer/fp inductions and update the start value of the induction diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c63b500f546f3b..7694627c3b0430 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1134,6 +1134,7 @@ class BoUpSLP { MustGather.clear(); EntryToLastInstruction.clear(); ExternalUses.clear(); + ExternalUsesAsGEPs.clear(); for (auto &Iter : BlocksSchedules) { BlockScheduling *BS = Iter.second.get(); BS->clear(); @@ -3154,6 +3155,10 @@ class BoUpSLP { /// after vectorization. UserList ExternalUses; + /// A list of GEPs which can be reaplced by scalar GEPs instead of + /// extractelement instructions. + SmallPtrSet ExternalUsesAsGEPs; + /// Values used only by @llvm.assume calls. SmallPtrSet EphValues; @@ -5541,6 +5546,7 @@ void BoUpSLP::buildExternalUses( << FoundLane << " from " << *Scalar << ".\n"); ScalarToExtUses.try_emplace(Scalar, ExternalUses.size()); ExternalUses.emplace_back(Scalar, nullptr, FoundLane); + continue; } for (User *U : Scalar->users()) { LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n"); @@ -9925,6 +9931,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { SmallVector DemandedElts; SmallDenseSet UsedInserts; DenseSet> VectorCasts; + std::optional> ValueToExtUses; for (ExternalUser &EU : ExternalUses) { // We only add extract cost once for the same scalar. if (!isa_and_nonnull(EU.User) && @@ -10033,12 +10040,40 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { } } } + // Leave the GEPs as is, they are free in most cases and better to keep them + // as GEPs. + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + if (auto *GEP = dyn_cast(EU.Scalar)) { + if (!ValueToExtUses) { + ValueToExtUses.emplace(); + for_each(enumerate(ExternalUses), [&](const auto &P) { + ValueToExtUses->try_emplace(P.value().Scalar, P.index()); + }); + } + // Can use original GEP, if no operands vectorized or they are marked as + // externally used already. + bool CanBeUsedAsGEP = all_of(GEP->operands(), [&](Value *V) { + if (!getTreeEntry(V)) + return true; + auto It = ValueToExtUses->find(V); + if (It != ValueToExtUses->end()) { + // Replace all uses to avoid compiler crash. + ExternalUses[It->second].User = nullptr; + return true; + } + return false; + }); + if (CanBeUsedAsGEP) { + ExtractCost += TTI->getInstructionCost(GEP, CostKind); + ExternalUsesAsGEPs.insert(EU.Scalar); + continue; + } + } // If we plan to rewrite the tree in a smaller type, we will need to sign // extend the extracted value back to the original type. Here, we account // for the extract and the added cost of the sign extend if needed. auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; auto It = MinBWs.find(getTreeEntry(EU.Scalar)); if (It != MinBWs.end()) { auto *MinTy = IntegerType::get(F->getContext(), It->second.first); @@ -13161,6 +13196,8 @@ Value *BoUpSLP::vectorizeTree( if (Scalar->getType() != Vec->getType()) { Value *Ex = nullptr; Value *ExV = nullptr; + auto *GEP = dyn_cast(Scalar); + bool ReplaceGEP = GEP && ExternalUsesAsGEPs.contains(GEP); auto It = ScalarToEEs.find(Scalar); if (It != ScalarToEEs.end()) { // No need to emit many extracts, just move the only one in the @@ -13186,6 +13223,15 @@ Value *BoUpSLP::vectorizeTree( if (const TreeEntry *ETE = getTreeEntry(V)) V = ETE->VectorizedValue; Ex = Builder.CreateExtractElement(V, ES->getIndexOperand()); + } else if (ReplaceGEP) { + // Leave the GEPs as is, they are free in most cases and better to + // keep them as GEPs. + auto *CloneGEP = GEP->clone(); + CloneGEP->insertBefore(*Builder.GetInsertBlock(), + Builder.GetInsertPoint()); + if (GEP->hasName()) + CloneGEP->takeName(GEP); + Ex = CloneGEP; } else { Ex = Builder.CreateExtractElement(Vec, Lane); } @@ -13224,6 +13270,8 @@ Value *BoUpSLP::vectorizeTree( assert((ExternallyUsedValues.count(Scalar) || any_of(Scalar->users(), [&](llvm::User *U) { + if (ExternalUsesAsGEPs.contains(U)) + return true; TreeEntry *UseEntry = getTreeEntry(U); return UseEntry && (UseEntry->State == TreeEntry::Vectorize || @@ -14639,10 +14687,16 @@ bool BoUpSLP::collectValuesToDemote( assert((ID == Intrinsic::smin || ID == Intrinsic::smax) && "Expected min/max intrinsics only."); unsigned SignBits = OrigBitWidth - BitWidth; + APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth - 1); return SignBits <= ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, nullptr, DT) && + (!isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL)) || + MaskedValueIsZero(I->getOperand(0), Mask, + SimplifyQuery(*DL))) && SignBits <= ComputeNumSignBits(I->getOperand(1), *DL, 0, AC, - nullptr, DT); + nullptr, DT) && + (!isKnownNonNegative(I->getOperand(1), SimplifyQuery(*DL)) || + MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL))); }); }; if (ID != Intrinsic::abs) { @@ -15155,8 +15209,8 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef Stores, Type *ValueTy = StoreTy; if (auto *Trunc = dyn_cast(Store->getValueOperand())) ValueTy = Trunc->getSrcTy(); - unsigned MinVF = TTI->getStoreMinimumVF( - R.getMinVF(DL->getTypeSizeInBits(StoreTy)), StoreTy, ValueTy); + unsigned MinVF = PowerOf2Ceil(TTI->getStoreMinimumVF( + R.getMinVF(DL->getTypeStoreSizeInBits(StoreTy)), StoreTy, ValueTy)); if (MaxVF < MinVF) { LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 625319954e9b7b..9f242a1bee8f6c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -501,8 +501,6 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { // Reduce all of the unrolled parts into a single vector. Value *ReducedPartRdx = RdxParts[0]; unsigned Op = RecurrenceDescriptor::getOpcode(RK); - if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) - Op = Instruction::Or; if (PhiR->isOrdered()) { ReducedPartRdx = RdxParts[State.UF - 1]; @@ -515,16 +513,19 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { if (Op != Instruction::ICmp && Op != Instruction::FCmp) ReducedPartRdx = Builder.CreateBinOp( (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); - else + else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { + TrackingVH ReductionStartValue = + RdxDesc.getRecurrenceStartValue(); + ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK, + ReducedPartRdx, RdxPart); + } else ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); } } // Create the reduction after the loop. Note that inloop reductions create // the target reduction in the loop using a Reduction recipe. - if ((State.VF.isVector() || - RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) && - !PhiR->isInLoop()) { + if (State.VF.isVector() && !PhiR->isInLoop()) { ReducedPartRdx = createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi); // If the reduction can be performed in a smaller type, we need to extend diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index da3a768552fc5e..3f8d4f4fe7d647 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -361,8 +361,8 @@ class VPDef { VPWidenMemoryInstructionSC, VPWidenSC, VPWidenSelectSC, - // START: Phi-like recipes. Need to be kept together. VPBlendSC, + // START: Phi-like recipes. Need to be kept together. VPWidenPHISC, VPPredInstPHISC, // START: SubclassID for recipes that inherit VPHeaderPHIRecipe. @@ -376,7 +376,7 @@ class VPDef { VPReductionPHISC, // END: SubclassID for recipes that inherit VPHeaderPHIRecipe // END: Phi-like recipes - VPFirstPHISC = VPBlendSC, + VPFirstPHISC = VPWidenPHISC, VPFirstHeaderPHISC = VPCanonicalIVPHISC, VPLastHeaderPHISC = VPReductionPHISC, VPLastPHISC = VPReductionPHISC, diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index e0e2f50c89adad..4918cee1fa82a3 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -886,7 +886,7 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) { SafeToSpeculate = isSafeToSpeculativelyExecuteWithOpcode( *FunctionalOpcode, &VPI, nullptr, &AC, &DT); if (!SafeToSpeculate && - !isKnownNonZero(EVL, /*Depth=*/0, SimplifyQuery(*DL, &DT, &AC, &VPI))) + !isKnownNonZero(EVL, SimplifyQuery(*DL, &DT, &AC, &VPI))) return false; Value *ScalarVal = diff --git a/llvm/test/CodeGen/AArch64/and-sink.ll b/llvm/test/CodeGen/AArch64/and-sink.ll index a57e9d54f3078e..f298a55dab721e 100644 --- a/llvm/test/CodeGen/AArch64/and-sink.ll +++ b/llvm/test/CodeGen/AArch64/and-sink.ll @@ -11,14 +11,15 @@ define dso_local i32 @and_sink1(i32 %a, i1 %c) { ; CHECK-LABEL: and_sink1: ; CHECK: // %bb.0: -; CHECK-NEXT: tbz w1, #0, .LBB0_2 +; CHECK-NEXT: tbz w1, #0, .LBB0_3 ; CHECK-NEXT: // %bb.1: // %bb0 -; CHECK-NEXT: tst w0, #0x4 ; CHECK-NEXT: adrp x8, A -; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: str wzr, [x8, :lo12:A] +; CHECK-NEXT: tbnz w0, #2, .LBB0_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: .LBB0_3: // %bb2 ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll index dde3e81833a63d..6449c3e11d6672 100644 --- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -13,10 +13,10 @@ define i32 @combine_gt_ge_10() #0 { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:a ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] -; CHECK-NEXT: ldr w9, [x8] +; CHECK-NEXT: ldr w8, [x8] +; CHECK-NEXT: cmp w8, #10 ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] -; CHECK-NEXT: cmp w9, #10 ; CHECK-NEXT: b.le .LBB0_3 ; CHECK-NEXT: // %bb.1: // %land.lhs.true ; CHECK-NEXT: adrp x9, :got:c @@ -29,17 +29,18 @@ define i32 @combine_gt_ge_10() #0 { ; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_3: // %lor.lhs.false -; CHECK-NEXT: cmp w9, #10 -; CHECK-NEXT: b.lt .LBB0_5 +; CHECK-NEXT: b.lt .LBB0_6 ; CHECK-NEXT: .LBB0_4: // %land.lhs.true3 ; CHECK-NEXT: adrp x9, :got:d ; CHECK-NEXT: ldr x9, [x9, :got_lo12:d] ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: b.ne .LBB0_6 +; CHECK-NEXT: // %bb.5: +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: .LBB0_6: // %if.end ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: @@ -144,10 +145,10 @@ define i32 @combine_lt_ge_5() #0 { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:a ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] -; CHECK-NEXT: ldr w9, [x8] +; CHECK-NEXT: ldr w8, [x8] +; CHECK-NEXT: cmp w8, #5 ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] -; CHECK-NEXT: cmp w9, #5 ; CHECK-NEXT: b.ge .LBB2_3 ; CHECK-NEXT: // %bb.1: // %land.lhs.true ; CHECK-NEXT: adrp x9, :got:c @@ -160,17 +161,18 @@ define i32 @combine_lt_ge_5() #0 { ; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_3: // %lor.lhs.false -; CHECK-NEXT: cmp w9, #5 -; CHECK-NEXT: b.gt .LBB2_5 +; CHECK-NEXT: b.gt .LBB2_6 ; CHECK-NEXT: .LBB2_4: // %land.lhs.true3 ; CHECK-NEXT: adrp x9, :got:d ; CHECK-NEXT: ldr x9, [x9, :got_lo12:d] ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: b.ne .LBB2_6 +; CHECK-NEXT: // %bb.5: +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB2_5: +; CHECK-NEXT: .LBB2_6: // %if.end ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: @@ -497,17 +499,24 @@ define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 { ; CHECK-NEXT: // %bb.3: // %while.cond.while.end_crit_edge ; CHECK-NEXT: ldr w8, [x19] ; CHECK-NEXT: .LBB7_4: // %while.end -; CHECK-NEXT: adrp x9, :got:b -; CHECK-NEXT: adrp x10, :got:d -; CHECK-NEXT: ldr x9, [x9, :got_lo12:b] -; CHECK-NEXT: ldr x10, [x10, :got_lo12:d] -; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: cmp w8, #1 +; CHECK-NEXT: b.gt .LBB7_7 +; CHECK-NEXT: // %bb.5: // %land.lhs.true +; CHECK-NEXT: adrp x8, :got:b +; CHECK-NEXT: adrp x9, :got:d +; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] +; CHECK-NEXT: ldr x9, [x9, :got_lo12:d] +; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] -; CHECK-NEXT: ldr w10, [x10] -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: ccmp w8, #2, #0, eq -; CHECK-NEXT: mov w8, #123 // =0x7b -; CHECK-NEXT: csel w0, w8, wzr, lt +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB7_7 +; CHECK-NEXT: // %bb.6: +; CHECK-NEXT: mov w0, #123 // =0x7b +; CHECK-NEXT: b .LBB7_8 +; CHECK-NEXT: .LBB7_7: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: .LBB7_8: // %return +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w19 @@ -555,42 +564,52 @@ return: ; preds = %if.end, %land.lhs.t define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 { ; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: adrp x8, :got:a ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: b.gt .LBB8_4 +; CHECK-NEXT: b.gt .LBB8_3 ; CHECK-NEXT: // %bb.1: // %while.body.preheader -; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: sub w19, w8, #1 ; CHECK-NEXT: .LBB8_2: // %while.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: bl do_something ; CHECK-NEXT: adds w19, w19, #1 ; CHECK-NEXT: b.mi .LBB8_2 -; CHECK-NEXT: // %bb.3: -; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w19 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .LBB8_4: // %while.end +; CHECK-NEXT: .LBB8_3: // %while.end +; CHECK-NEXT: adrp x8, :got:c +; CHECK-NEXT: ldr x8, [x8, :got_lo12:c] +; CHECK-NEXT: ldr w8, [x8] +; CHECK-NEXT: cmn w8, #2 +; CHECK-NEXT: b.lt .LBB8_6 +; CHECK-NEXT: // %bb.4: // %land.lhs.true ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: adrp x9, :got:d -; CHECK-NEXT: adrp x10, :got:c ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] ; CHECK-NEXT: ldr x9, [x9, :got_lo12:d] -; CHECK-NEXT: ldr x10, [x10, :got_lo12:c] ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] -; CHECK-NEXT: ldr w10, [x10] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: mov w8, #-3 // =0xfffffffd -; CHECK-NEXT: ccmp w10, w8, #4, eq -; CHECK-NEXT: mov w8, #123 // =0x7b -; CHECK-NEXT: csel w0, w8, wzr, gt +; CHECK-NEXT: b.ne .LBB8_6 +; CHECK-NEXT: // %bb.5: +; CHECK-NEXT: mov w0, #123 // =0x7b +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB8_6: // %if.end +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret entry: %0 = load i32, ptr @a, align 4 @@ -763,14 +782,12 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 { ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: csel x9, x0, xzr, gt ; CHECK-NEXT: str x9, [x1] -; CHECK-NEXT: b.le .LBB11_3 +; CHECK-NEXT: b.le .LBB11_2 ; CHECK-NEXT: // %bb.1: // %lor.lhs.false ; CHECK-NEXT: cmp w8, #2 -; CHECK-NEXT: b.ge .LBB11_5 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB11_3: // %land.lhs.true +; CHECK-NEXT: b.ge .LBB11_4 +; CHECK-NEXT: b .LBB11_6 +; CHECK-NEXT: .LBB11_2: // %land.lhs.true ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: adrp x9, :got:c ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] @@ -778,11 +795,11 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 { ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB11_5 -; CHECK-NEXT: // %bb.4: +; CHECK-NEXT: b.ne .LBB11_4 +; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB11_5: // %land.lhs.true3 +; CHECK-NEXT: .LBB11_4: // %land.lhs.true3 ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: adrp x9, :got:d ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] @@ -790,7 +807,12 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 { ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: b.ne .LBB11_6 +; CHECK-NEXT: // %bb.5: +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB11_6: // %if.end +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: %0 = load i32, ptr @a, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-libcall.ll b/llvm/test/CodeGen/AMDGPU/memcpy-libcall.ll new file mode 100644 index 00000000000000..358f42dfe8dd5a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/memcpy-libcall.ll @@ -0,0 +1,2696 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 %s -o - | FileCheck %s + +%struct.S = type { [32 x i32] } + +@shared = addrspace(3) global %struct.S undef, align 4 + +define amdgpu_kernel void @memcpy_p0_p0_minsize(ptr %dest, ptr readonly %src) #0 { +; CHECK-LABEL: memcpy_p0_p0_minsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-NEXT: v_mov_b32_e32 v1, s3 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] +; CHECK-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:1 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:1 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:2 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:2 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:3 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:3 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:4 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:4 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:5 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:5 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:6 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:6 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:7 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:7 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:8 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:8 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:9 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:9 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:10 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:10 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:11 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:11 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:12 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:12 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:13 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:13 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:14 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:14 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:15 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:15 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:16 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:17 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:17 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:18 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:18 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:19 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:19 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:20 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:21 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:21 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:22 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:22 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:23 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:23 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:24 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:24 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:25 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:25 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:26 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:26 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:27 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:27 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:28 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:28 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:29 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:29 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:30 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:30 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:31 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:31 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:32 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:33 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:33 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:34 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:34 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:35 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:35 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:36 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:37 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:37 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:38 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:38 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:39 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:39 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:40 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:40 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:41 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:41 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:42 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:42 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:43 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:43 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:44 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:44 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:45 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:45 +; CHECK-NEXT: flat_load_ubyte v0, v[0:1] offset:46 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v0 offset:46 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 47, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p1_p1_minsize(ptr addrspace(1) %dest, ptr addrspace(1) %src) #0 { +; CHECK-LABEL: memcpy_p1_p1_minsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_dwordx2 v[0:1], v4, s[2:3] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:32 +; CHECK-NEXT: global_load_dwordx2 v[0:1], v4, s[2:3] offset:39 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:39 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dest, ptr addrspace(1) %src, i64 47, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p1_p4_minsize(ptr addrspace(1) %global, ptr addrspace(4) %0) #0 { +; CHECK-LABEL: memcpy_p1_p4_minsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:32 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:48 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:64 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:80 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:96 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:112 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) %global, ptr addrspace(4) %0, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p5_p4_minsize(ptr addrspace(5) %local, ptr addrspace(4) %0) #0 { +; CHECK-LABEL: memcpy_p5_p4_minsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] +; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; CHECK-NEXT: s_load_dword s2, s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: s_add_u32 s8, s8, s7 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:1 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:2 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:3 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:4 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:5 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:6 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:7 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:8 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:9 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:10 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:11 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:12 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:13 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:14 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:15 +; CHECK-NEXT: s_addc_u32 s9, s9, 0 +; CHECK-NEXT: v_mov_b32_e32 v1, s2 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:16 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:17 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:18 +; CHECK-NEXT: s_waitcnt vmcnt(18) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:19 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:1 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(20) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:2 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:21 +; CHECK-NEXT: s_waitcnt vmcnt(21) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:3 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:22 +; CHECK-NEXT: s_waitcnt vmcnt(22) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:4 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:23 +; CHECK-NEXT: s_waitcnt vmcnt(23) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:5 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:24 +; CHECK-NEXT: s_waitcnt vmcnt(24) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:6 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:25 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:7 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:26 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:8 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:27 +; CHECK-NEXT: s_waitcnt vmcnt(27) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:9 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:28 +; CHECK-NEXT: s_waitcnt vmcnt(28) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:10 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:29 +; CHECK-NEXT: s_waitcnt vmcnt(29) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:11 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:30 +; CHECK-NEXT: s_waitcnt vmcnt(30) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:12 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:31 +; CHECK-NEXT: s_waitcnt vmcnt(31) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:13 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(32) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:14 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:33 +; CHECK-NEXT: s_waitcnt vmcnt(33) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:15 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:34 +; CHECK-NEXT: s_waitcnt vmcnt(34) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:16 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:35 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:17 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:18 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:37 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:19 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:38 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:20 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:39 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:21 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:40 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:22 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:41 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:23 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:42 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:24 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:43 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:25 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:44 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:26 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:45 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:27 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:46 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:28 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:47 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:29 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:30 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:49 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:31 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:50 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:32 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:51 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:33 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:52 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:34 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:53 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:35 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:54 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:36 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:55 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:37 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:56 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:38 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:57 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:39 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:58 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:40 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:59 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:41 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:42 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:61 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:43 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:62 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:44 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:63 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:45 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:46 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:65 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:47 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:66 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:48 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:67 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:49 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:68 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:50 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:69 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:51 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:70 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:52 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:71 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:53 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:72 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:54 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:73 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:55 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:74 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:56 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:75 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:57 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:76 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:58 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:77 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:59 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:78 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:60 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:79 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:61 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:62 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:81 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:63 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:82 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:64 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:83 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:65 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:84 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:66 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:85 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:67 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:86 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:68 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:87 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:69 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:88 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:70 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:71 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:89 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:90 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:72 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:91 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:73 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:92 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:74 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:93 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:75 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:94 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:76 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:95 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:77 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:78 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:97 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:79 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:98 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:80 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:99 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:81 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:100 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:82 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:101 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:83 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:102 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:84 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:103 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:85 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:104 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:86 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:105 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:87 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:106 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:88 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:107 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:89 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:90 +; CHECK-NEXT: s_waitcnt vmcnt(34) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:91 +; CHECK-NEXT: s_waitcnt vmcnt(33) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:92 +; CHECK-NEXT: s_waitcnt vmcnt(32) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:93 +; CHECK-NEXT: s_waitcnt vmcnt(31) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:94 +; CHECK-NEXT: s_waitcnt vmcnt(30) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:95 +; CHECK-NEXT: s_waitcnt vmcnt(29) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(28) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:97 +; CHECK-NEXT: s_waitcnt vmcnt(27) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:98 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:99 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:100 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:108 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:109 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:110 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:111 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:112 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:113 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:114 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:115 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:116 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:117 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:118 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:119 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:101 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:120 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:102 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:121 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:103 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:122 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:104 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:123 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:105 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:124 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:106 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:125 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:107 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:126 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: global_load_ubyte v21, v0, s[0:1] offset:127 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:108 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:109 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:110 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:111 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:113 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:114 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:115 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:116 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:117 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:118 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:119 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:120 +; CHECK-NEXT: s_waitcnt vmcnt(24) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:121 +; CHECK-NEXT: s_waitcnt vmcnt(23) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:122 +; CHECK-NEXT: s_waitcnt vmcnt(22) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:123 +; CHECK-NEXT: s_waitcnt vmcnt(21) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:124 +; CHECK-NEXT: s_waitcnt vmcnt(20) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:125 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:126 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_byte v21, v1, s[8:11], 0 offen offset:127 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %local, ptr addrspace(4) %0, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p0_p5_minsize(ptr %generic, ptr addrspace(5) %src) #0 { +; CHECK-LABEL: memcpy_p0_p5_minsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] +; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] +; CHECK-NEXT: s_load_dword s0, s[4:5], 0x8 +; CHECK-NEXT: s_add_u32 s8, s8, s7 +; CHECK-NEXT: s_addc_u32 s9, s9, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:1 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:2 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:3 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:4 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:5 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:6 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:7 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:8 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:9 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:10 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:11 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:12 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:13 +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:14 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:15 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:16 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:17 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-NEXT: s_waitcnt vmcnt(17) +; CHECK-NEXT: flat_store_byte v[0:1], v3 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:18 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:1 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:19 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:2 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:20 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:3 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:21 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:4 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:22 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:5 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:23 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:6 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:24 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:7 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:25 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:8 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:26 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:9 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:27 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:10 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:28 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:11 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:29 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:12 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:30 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:13 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:31 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:14 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:32 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:15 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:33 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:16 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:34 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:17 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:35 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:18 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:19 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:37 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:20 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:38 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:21 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:39 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:22 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:40 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:23 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:41 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:24 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:42 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:25 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:43 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:26 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:44 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:27 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:45 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:28 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:46 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:29 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:47 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:30 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:48 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:31 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:49 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:32 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:50 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:33 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:51 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:34 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:52 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:35 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:53 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:36 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:54 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:37 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:55 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:38 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:56 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:39 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:57 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:40 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:58 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:41 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:59 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:42 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:60 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:43 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:61 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:44 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:62 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:45 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:63 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:46 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:64 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:47 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:65 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:48 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:66 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:49 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:67 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:50 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:68 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:51 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:69 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:52 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:70 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:53 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:71 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:54 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:72 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:55 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:73 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:56 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:74 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:57 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:75 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:58 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:76 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:59 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:77 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:60 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:78 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:61 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:79 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:62 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:80 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:63 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:81 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:64 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:82 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:65 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:83 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:66 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:84 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:67 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:85 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:68 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:86 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:69 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:87 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:70 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:88 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:71 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:89 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:72 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:90 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:73 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:74 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:91 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:92 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:75 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:93 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:76 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:94 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:77 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:95 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:78 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:96 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:79 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:97 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:80 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:98 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:81 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:99 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:82 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:100 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:83 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:101 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:84 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:102 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:85 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:103 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:86 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:104 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:87 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:105 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:88 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:106 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:89 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:107 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:90 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:108 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:91 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:92 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:93 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:94 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:95 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:96 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:97 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:98 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:99 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:100 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:101 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:109 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:110 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:111 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:112 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:113 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:114 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:115 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:116 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:117 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:118 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:119 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:102 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:120 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:103 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:121 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:104 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:122 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:105 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:123 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:106 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:124 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:107 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:125 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:108 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:126 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: buffer_load_ubyte v21, v2, s[8:11], 0 offen offset:127 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:109 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:110 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:111 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:112 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:113 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:114 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:115 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:116 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:117 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:118 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:119 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:120 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:121 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:122 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:123 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:124 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:125 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:126 +; CHECK-NEXT: flat_store_byte v[0:1], v21 offset:127 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p0.p5.i64(ptr %generic, ptr addrspace(5) %src, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p3_p4_minsize(ptr addrspace(4) %0) #0 { +; CHECK-LABEL: memcpy_p3_p4_minsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v24, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_dwordx4 v[0:3], v24, s[0:1] offset:112 +; CHECK-NEXT: global_load_dwordx4 v[4:7], v24, s[0:1] offset:96 +; CHECK-NEXT: global_load_dwordx4 v[8:11], v24, s[0:1] offset:80 +; CHECK-NEXT: global_load_dwordx4 v[12:15], v24, s[0:1] offset:64 +; CHECK-NEXT: global_load_dwordx4 v[16:19], v24, s[0:1] offset:48 +; CHECK-NEXT: global_load_dwordx4 v[20:23], v24, s[0:1] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: ds_write2_b64 v24, v[0:1], v[2:3] offset0:14 offset1:15 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: ds_write2_b64 v24, v[4:5], v[6:7] offset0:12 offset1:13 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v24, s[0:1] offset:16 +; CHECK-NEXT: global_load_dwordx4 v[4:7], v24, s[0:1] +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: ds_write2_b64 v24, v[8:9], v[10:11] offset0:10 offset1:11 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: ds_write2_b64 v24, v[12:13], v[14:15] offset0:8 offset1:9 +; CHECK-NEXT: s_waitcnt vmcnt(3) +; CHECK-NEXT: ds_write2_b64 v24, v[16:17], v[18:19] offset0:6 offset1:7 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: ds_write2_b64 v24, v[20:21], v[22:23] offset0:4 offset1:5 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: ds_write2_b64 v24, v[0:1], v[2:3] offset0:2 offset1:3 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: ds_write2_b64 v24, v[4:5], v[6:7] offset1:1 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) @shared, ptr addrspace(4) %0, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p0_p3_minsize(ptr %generic) #0 { +; CHECK-LABEL: memcpy_p0_p3_minsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:127 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:126 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:125 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:124 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:127 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:126 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:123 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:125 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:124 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:122 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:121 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:123 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:120 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:119 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:122 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:121 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:118 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:120 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:119 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:117 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:116 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:118 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:115 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:114 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:117 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:116 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:113 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:115 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:114 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:112 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:111 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:113 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:110 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:109 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:112 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:111 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:108 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:110 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:109 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:107 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:106 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:108 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:105 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:104 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:107 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:106 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:103 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:105 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:104 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:102 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:101 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:103 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:100 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:99 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:102 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:101 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:98 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:100 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:99 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:97 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:96 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:98 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:95 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:94 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:97 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:96 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:93 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:95 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:94 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:92 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:91 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:93 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:90 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:89 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:92 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:91 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:88 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:90 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:89 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:87 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:86 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:88 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:85 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:84 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:87 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:86 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:83 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:85 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:84 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:82 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:81 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:83 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:80 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:79 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:82 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:81 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:78 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:80 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:79 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:77 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:76 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:78 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:75 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:74 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:77 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:76 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:73 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:75 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:74 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:72 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:71 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:73 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:70 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:69 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:72 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:71 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:68 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:70 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:69 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:67 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:66 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:68 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:65 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:64 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:67 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:66 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:63 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:65 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:64 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:62 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:61 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:63 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:60 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:59 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:62 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:61 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:58 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:60 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:59 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:57 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:56 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:58 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:55 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:54 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:57 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:56 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:53 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:55 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:54 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:52 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:51 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:53 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:50 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:49 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:52 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:51 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:48 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:50 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:49 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:47 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:46 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:48 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:45 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:44 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:47 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:46 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:43 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:45 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:44 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:42 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:41 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:43 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:40 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:39 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:42 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:41 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:38 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:40 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:39 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:37 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:36 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:38 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:35 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:34 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:37 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:36 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:33 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:35 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:34 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:32 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:31 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:33 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:30 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:29 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:32 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:31 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:28 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:30 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:29 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:27 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:26 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:28 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:25 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:24 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:27 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:26 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:23 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:25 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:24 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:22 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:21 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:23 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:20 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:19 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:22 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:21 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:18 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:20 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:19 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:16 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:17 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:18 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:8 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:9 +; CHECK-NEXT: ds_read_u8 v7, v2 offset:10 +; CHECK-NEXT: ds_read_u8 v8, v2 offset:11 +; CHECK-NEXT: ds_read_u8 v9, v2 offset:12 +; CHECK-NEXT: ds_read_u8 v10, v2 offset:13 +; CHECK-NEXT: ds_read_u8 v11, v2 offset:14 +; CHECK-NEXT: ds_read_u8 v12, v2 offset:15 +; CHECK-NEXT: ds_read_u8 v13, v2 +; CHECK-NEXT: ds_read_u8 v14, v2 offset:1 +; CHECK-NEXT: ds_read_u8 v15, v2 offset:2 +; CHECK-NEXT: ds_read_u8 v16, v2 offset:3 +; CHECK-NEXT: ds_read_u8 v17, v2 offset:4 +; CHECK-NEXT: ds_read_u8 v18, v2 offset:5 +; CHECK-NEXT: ds_read_u8 v19, v2 offset:6 +; CHECK-NEXT: ds_read_u8 v2, v2 offset:7 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:17 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:16 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:15 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:14 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:13 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:12 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:11 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:10 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:9 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:8 +; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:7 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:6 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:5 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:4 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:3 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:2 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:1 +; CHECK-NEXT: flat_store_byte v[0:1], v13 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p0.p3.i64(ptr %generic, ptr addrspace(3) @shared, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p0_p0_optsize(ptr %dest, ptr %src) #1 { +; CHECK-LABEL: memcpy_p0_p0_optsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-NEXT: v_mov_b32_e32 v1, s3 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] +; CHECK-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:1 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:1 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:2 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:2 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:3 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:3 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:4 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:4 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:5 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:5 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:6 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:6 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:7 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:7 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:8 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:8 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:9 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:9 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:10 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:10 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:11 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:11 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:12 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:12 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:13 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:13 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:14 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:14 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:15 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:15 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:16 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:17 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:17 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:18 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:18 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:19 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:19 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:20 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:21 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:21 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:22 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:22 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:23 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:23 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:24 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:24 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:25 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:25 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:26 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:26 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:27 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:27 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:28 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:28 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:29 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:29 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:30 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:30 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:31 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:31 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:32 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:33 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:33 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:34 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:34 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:35 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:35 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:36 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:37 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:37 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:38 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:38 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:39 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:39 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:40 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:40 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:41 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:41 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:42 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:42 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:43 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:43 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:44 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:44 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:45 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:45 +; CHECK-NEXT: flat_load_ubyte v0, v[0:1] offset:46 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v0 offset:46 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 47, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p1_p1_optsize(ptr addrspace(1) %dest, ptr addrspace(1) %src) #1 { +; CHECK-LABEL: memcpy_p1_p1_optsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_dwordx2 v[0:1], v4, s[2:3] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:32 +; CHECK-NEXT: global_load_dwordx2 v[0:1], v4, s[2:3] offset:39 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:39 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dest, ptr addrspace(1) %src, i64 47, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p1_p4_optsize(ptr addrspace(1) %global, ptr addrspace(4) %0) #1 { +; CHECK-LABEL: memcpy_p1_p4_optsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:32 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:48 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:64 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:80 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:96 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:112 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) %global, ptr addrspace(4) %0, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p5_p4_optsize(ptr addrspace(5) %local, ptr addrspace(4) %0) #1 { +; CHECK-LABEL: memcpy_p5_p4_optsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] +; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; CHECK-NEXT: s_load_dword s2, s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: s_add_u32 s8, s8, s7 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:1 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:2 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:3 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:4 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:5 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:6 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:7 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:8 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:9 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:10 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:11 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:12 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:13 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:14 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:15 +; CHECK-NEXT: s_addc_u32 s9, s9, 0 +; CHECK-NEXT: v_mov_b32_e32 v1, s2 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:16 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:17 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:18 +; CHECK-NEXT: s_waitcnt vmcnt(18) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:19 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:1 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(20) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:2 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:21 +; CHECK-NEXT: s_waitcnt vmcnt(21) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:3 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:22 +; CHECK-NEXT: s_waitcnt vmcnt(22) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:4 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:23 +; CHECK-NEXT: s_waitcnt vmcnt(23) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:5 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:24 +; CHECK-NEXT: s_waitcnt vmcnt(24) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:6 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:25 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:7 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:26 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:8 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:27 +; CHECK-NEXT: s_waitcnt vmcnt(27) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:9 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:28 +; CHECK-NEXT: s_waitcnt vmcnt(28) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:10 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:29 +; CHECK-NEXT: s_waitcnt vmcnt(29) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:11 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:30 +; CHECK-NEXT: s_waitcnt vmcnt(30) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:12 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:31 +; CHECK-NEXT: s_waitcnt vmcnt(31) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:13 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(32) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:14 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:33 +; CHECK-NEXT: s_waitcnt vmcnt(33) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:15 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:34 +; CHECK-NEXT: s_waitcnt vmcnt(34) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:16 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:35 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:17 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:18 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:37 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:19 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:38 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:20 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:39 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:21 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:40 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:22 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:41 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:23 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:42 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:24 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:43 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:25 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:44 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:26 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:45 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:27 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:46 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:28 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:47 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:29 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:30 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:49 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:31 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:50 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:32 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:51 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:33 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:52 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:34 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:53 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:35 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:54 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:36 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:55 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:37 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:56 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:38 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:57 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:39 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:58 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:40 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:59 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:41 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:42 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:61 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:43 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:62 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:44 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:63 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:45 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:46 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:65 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:47 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:66 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:48 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:67 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:49 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:68 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:50 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:69 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:51 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:70 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:52 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:71 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:53 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:72 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:54 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:73 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:55 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:74 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:56 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:75 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:57 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:76 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:58 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:77 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:59 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:78 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:60 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:79 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:61 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:62 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:81 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:63 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:82 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:64 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:83 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:65 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:84 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:66 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:85 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:67 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:86 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:68 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:87 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:69 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:88 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:70 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:71 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:89 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:90 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:72 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:91 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:73 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:92 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:74 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:93 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:75 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:94 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:76 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:95 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:77 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:78 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:97 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:79 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:98 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:80 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:99 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:81 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:100 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:82 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:101 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:83 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:102 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:84 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:103 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:85 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:104 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:86 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:105 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:87 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:106 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:88 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:107 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:89 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:90 +; CHECK-NEXT: s_waitcnt vmcnt(34) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:91 +; CHECK-NEXT: s_waitcnt vmcnt(33) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:92 +; CHECK-NEXT: s_waitcnt vmcnt(32) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:93 +; CHECK-NEXT: s_waitcnt vmcnt(31) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:94 +; CHECK-NEXT: s_waitcnt vmcnt(30) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:95 +; CHECK-NEXT: s_waitcnt vmcnt(29) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(28) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:97 +; CHECK-NEXT: s_waitcnt vmcnt(27) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:98 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:99 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:100 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:108 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:109 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:110 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:111 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:112 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:113 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:114 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:115 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:116 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:117 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:118 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:119 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:101 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:120 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:102 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:121 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:103 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:122 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:104 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:123 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:105 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:124 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:106 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:125 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:107 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:126 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: global_load_ubyte v21, v0, s[0:1] offset:127 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:108 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:109 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:110 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:111 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:113 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:114 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:115 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:116 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:117 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:118 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:119 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:120 +; CHECK-NEXT: s_waitcnt vmcnt(24) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:121 +; CHECK-NEXT: s_waitcnt vmcnt(23) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:122 +; CHECK-NEXT: s_waitcnt vmcnt(22) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:123 +; CHECK-NEXT: s_waitcnt vmcnt(21) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:124 +; CHECK-NEXT: s_waitcnt vmcnt(20) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:125 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:126 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_byte v21, v1, s[8:11], 0 offen offset:127 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %local, ptr addrspace(4) %0, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p0_p5_optsize(ptr %generic, ptr addrspace(5) %src) #1 { +; CHECK-LABEL: memcpy_p0_p5_optsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] +; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] +; CHECK-NEXT: s_load_dword s0, s[4:5], 0x8 +; CHECK-NEXT: s_add_u32 s8, s8, s7 +; CHECK-NEXT: s_addc_u32 s9, s9, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:1 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:2 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:3 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:4 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:5 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:6 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:7 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:8 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:9 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:10 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:11 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:12 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:13 +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:14 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:15 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:16 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:17 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-NEXT: s_waitcnt vmcnt(17) +; CHECK-NEXT: flat_store_byte v[0:1], v3 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:18 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:1 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:19 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:2 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:20 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:3 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:21 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:4 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:22 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:5 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:23 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:6 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:24 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:7 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:25 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:8 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:26 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:9 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:27 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:10 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:28 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:11 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:29 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:12 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:30 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:13 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:31 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:14 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:32 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:15 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:33 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:16 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:34 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:17 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:35 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:18 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:19 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:37 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:20 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:38 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:21 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:39 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:22 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:40 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:23 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:41 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:24 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:42 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:25 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:43 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:26 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:44 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:27 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:45 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:28 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:46 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:29 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:47 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:30 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:48 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:31 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:49 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:32 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:50 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:33 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:51 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:34 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:52 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:35 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:53 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:36 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:54 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:37 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:55 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:38 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:56 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:39 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:57 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:40 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:58 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:41 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:59 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:42 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:60 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:43 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:61 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:44 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:62 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:45 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:63 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:46 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:64 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:47 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:65 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:48 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:66 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:49 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:67 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:50 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:68 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:51 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:69 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:52 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:70 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:53 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:71 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:54 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:72 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:55 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:73 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:56 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:74 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:57 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:75 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:58 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:76 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:59 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:77 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:60 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:78 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:61 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:79 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:62 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:80 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:63 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:81 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:64 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:82 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:65 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:83 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:66 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:84 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:67 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:85 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:68 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:86 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:69 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:87 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:70 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:88 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:71 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:89 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:72 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:90 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:73 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:74 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:91 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:92 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:75 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:93 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:76 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:94 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:77 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:95 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:78 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:96 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:79 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:97 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:80 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:98 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:81 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:99 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:82 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:100 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:83 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:101 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:84 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:102 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:85 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:103 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:86 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:104 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:87 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:105 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:88 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:106 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:89 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:107 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:90 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:108 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:91 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:92 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:93 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:94 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:95 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:96 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:97 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:98 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:99 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:100 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:101 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:109 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:110 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:111 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:112 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:113 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:114 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:115 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:116 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:117 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:118 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:119 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:102 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:120 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:103 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:121 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:104 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:122 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:105 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:123 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:106 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:124 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:107 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:125 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:108 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:126 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: buffer_load_ubyte v21, v2, s[8:11], 0 offen offset:127 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:109 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:110 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:111 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:112 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:113 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:114 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:115 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:116 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:117 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:118 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:119 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:120 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:121 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:122 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:123 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:124 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:125 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:126 +; CHECK-NEXT: flat_store_byte v[0:1], v21 offset:127 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p0.p5.i64(ptr %generic, ptr addrspace(5) %src, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p3_p4_optsize(ptr addrspace(4) %0) #1 { +; CHECK-LABEL: memcpy_p3_p4_optsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v24, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_dwordx4 v[0:3], v24, s[0:1] offset:112 +; CHECK-NEXT: global_load_dwordx4 v[4:7], v24, s[0:1] offset:96 +; CHECK-NEXT: global_load_dwordx4 v[8:11], v24, s[0:1] offset:80 +; CHECK-NEXT: global_load_dwordx4 v[12:15], v24, s[0:1] offset:64 +; CHECK-NEXT: global_load_dwordx4 v[16:19], v24, s[0:1] offset:48 +; CHECK-NEXT: global_load_dwordx4 v[20:23], v24, s[0:1] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: ds_write2_b64 v24, v[0:1], v[2:3] offset0:14 offset1:15 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: ds_write2_b64 v24, v[4:5], v[6:7] offset0:12 offset1:13 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v24, s[0:1] offset:16 +; CHECK-NEXT: global_load_dwordx4 v[4:7], v24, s[0:1] +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: ds_write2_b64 v24, v[8:9], v[10:11] offset0:10 offset1:11 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: ds_write2_b64 v24, v[12:13], v[14:15] offset0:8 offset1:9 +; CHECK-NEXT: s_waitcnt vmcnt(3) +; CHECK-NEXT: ds_write2_b64 v24, v[16:17], v[18:19] offset0:6 offset1:7 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: ds_write2_b64 v24, v[20:21], v[22:23] offset0:4 offset1:5 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: ds_write2_b64 v24, v[0:1], v[2:3] offset0:2 offset1:3 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: ds_write2_b64 v24, v[4:5], v[6:7] offset1:1 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) @shared, ptr addrspace(4) %0, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p0_p3_optsize(ptr %generic) #1 { +; CHECK-LABEL: memcpy_p0_p3_optsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:127 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:126 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:125 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:124 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:127 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:126 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:123 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:125 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:124 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:122 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:121 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:123 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:120 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:119 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:122 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:121 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:118 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:120 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:119 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:117 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:116 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:118 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:115 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:114 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:117 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:116 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:113 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:115 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:114 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:112 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:111 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:113 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:110 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:109 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:112 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:111 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:108 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:110 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:109 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:107 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:106 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:108 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:105 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:104 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:107 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:106 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:103 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:105 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:104 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:102 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:101 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:103 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:100 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:99 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:102 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:101 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:98 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:100 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:99 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:97 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:96 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:98 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:95 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:94 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:97 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:96 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:93 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:95 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:94 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:92 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:91 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:93 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:90 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:89 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:92 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:91 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:88 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:90 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:89 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:87 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:86 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:88 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:85 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:84 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:87 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:86 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:83 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:85 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:84 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:82 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:81 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:83 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:80 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:79 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:82 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:81 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:78 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:80 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:79 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:77 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:76 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:78 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:75 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:74 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:77 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:76 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:73 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:75 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:74 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:72 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:71 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:73 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:70 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:69 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:72 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:71 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:68 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:70 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:69 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:67 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:66 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:68 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:65 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:64 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:67 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:66 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:63 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:65 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:64 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:62 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:61 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:63 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:60 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:59 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:62 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:61 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:58 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:60 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:59 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:57 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:56 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:58 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:55 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:54 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:57 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:56 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:53 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:55 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:54 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:52 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:51 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:53 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:50 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:49 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:52 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:51 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:48 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:50 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:49 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:47 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:46 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:48 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:45 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:44 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:47 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:46 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:43 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:45 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:44 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:42 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:41 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:43 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:40 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:39 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:42 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:41 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:38 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:40 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:39 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:37 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:36 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:38 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:35 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:34 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:37 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:36 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:33 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:35 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:34 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:32 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:31 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:33 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:30 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:29 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:32 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:31 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:28 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:30 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:29 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:27 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:26 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:28 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:25 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:24 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:27 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:26 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:23 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:25 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:24 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:22 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:21 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:23 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:20 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:19 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:22 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:21 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:18 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:20 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:19 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:16 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:17 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:18 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:8 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:9 +; CHECK-NEXT: ds_read_u8 v7, v2 offset:10 +; CHECK-NEXT: ds_read_u8 v8, v2 offset:11 +; CHECK-NEXT: ds_read_u8 v9, v2 offset:12 +; CHECK-NEXT: ds_read_u8 v10, v2 offset:13 +; CHECK-NEXT: ds_read_u8 v11, v2 offset:14 +; CHECK-NEXT: ds_read_u8 v12, v2 offset:15 +; CHECK-NEXT: ds_read_u8 v13, v2 +; CHECK-NEXT: ds_read_u8 v14, v2 offset:1 +; CHECK-NEXT: ds_read_u8 v15, v2 offset:2 +; CHECK-NEXT: ds_read_u8 v16, v2 offset:3 +; CHECK-NEXT: ds_read_u8 v17, v2 offset:4 +; CHECK-NEXT: ds_read_u8 v18, v2 offset:5 +; CHECK-NEXT: ds_read_u8 v19, v2 offset:6 +; CHECK-NEXT: ds_read_u8 v2, v2 offset:7 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:17 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:16 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:15 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:14 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:13 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:12 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:11 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:10 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:9 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:8 +; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:7 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:6 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:5 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:4 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:3 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:2 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:1 +; CHECK-NEXT: flat_store_byte v[0:1], v13 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p0.p3.i64(ptr %generic, ptr addrspace(3) @shared, i64 128, i1 false) + ret void +} + +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memcpy.p0.p5.i64(ptr noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memcpy.p0.p3.i64(ptr noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2 + +attributes #0 = { minsize } +attributes #1 = { optsize } +attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir index eaa627966347fb..40ea01189f2cd9 100644 --- a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir +++ b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir @@ -1,5 +1,6 @@ # REQUIRES: x86-registered-target # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-check-debugify -o - %s 2>&1 | FileCheck %s +# RUN: llc --experimental-debuginfo-iterators=false -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-check-debugify -o - %s 2>&1 | FileCheck %s --- | ; ModuleID = 'check-line-and-variables.mir' source_filename = "check-line-and-variables.c" diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.ll b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.ll index 9033fd2f147c47..56c7cf45705a78 100644 --- a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.ll +++ b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.ll @@ -1,4 +1,5 @@ ; RUN: llc -debugify-check-and-strip-all-safe -o - %s 2>&1 | FileCheck %s +; RUN: llc --experimental-debuginfo-iterators=false -debugify-check-and-strip-all-safe -o - %s 2>&1 | FileCheck %s ; ModuleID = 'main.c' source_filename = "main.c" diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir index 9eb722258b7031..0805a7f4cfc6ce 100644 --- a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir +++ b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir @@ -1,6 +1,8 @@ # REQUIRES: x86-registered-target # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-debugify,dead-mi-elimination,mir-check-debugify -o - %s 2>&1 | FileCheck %s # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-debugify,mir-check-debugify -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-PASS +# RUN: llc --experimental-debuginfo-iterators=false -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-debugify,dead-mi-elimination,mir-check-debugify -o - %s 2>&1 | FileCheck %s +# RUN: llc --experimental-debuginfo-iterators=false -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-debugify,mir-check-debugify -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-PASS --- | ; ModuleID = 'check-line-and-variables.mir' source_filename = "check-line-and-variables.ll" diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir b/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir index 59dcff9efd4d54..3035fb8eab3f82 100644 --- a/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir +++ b/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir @@ -2,6 +2,10 @@ # RUN: llc -run-pass=mir-debugify -debugify-level=locations -o - %s | FileCheck --check-prefixes=ALL --implicit-check-not=dbg.value %s # RUN: llc -run-pass=mir-debugify,mir-strip-debug,mir-debugify -o - %s | FileCheck --check-prefixes=ALL,VALUE %s # RUN: llc -run-pass=mir-debugify,mir-strip-debug -o - %s | FileCheck --check-prefix=STRIP %s +# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify -o - %s | FileCheck --check-prefixes=ALL,VALUE %s +# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify -debugify-level=locations -o - %s | FileCheck --check-prefixes=ALL --implicit-check-not=dbg.value %s +# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify,mir-strip-debug,mir-debugify -o - %s | FileCheck --check-prefixes=ALL,VALUE %s +# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify,mir-strip-debug -o - %s | FileCheck --check-prefix=STRIP %s --- | ; ModuleID = 'loc-only.ll' diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/multifunction-module.mir b/llvm/test/CodeGen/Generic/MIRDebugify/multifunction-module.mir index fe4fcc1a15bb82..8079db926e1b05 100644 --- a/llvm/test/CodeGen/Generic/MIRDebugify/multifunction-module.mir +++ b/llvm/test/CodeGen/Generic/MIRDebugify/multifunction-module.mir @@ -1,6 +1,5 @@ -# FIXME: Remove rm after a few weeks. -# RUN: rm -f %S/multifunction-module.s # RUN: llc -run-pass=mir-debugify,mir-check-debugify -o - %s 2>&1 | FileCheck %s +# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify,mir-check-debugify -o - %s 2>&1 | FileCheck %s # CHECK: Machine IR debug info check: PASS # CHECK-NOT: Assertion `Var <= NumVars && "Unexpected name for DILocalVariable"' diff --git a/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll b/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll index c33c81841be65e..dddc4bd953d7ac 100644 --- a/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll +++ b/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll @@ -10,13 +10,12 @@ define i32 @fred(ptr %a0) #0 { ; CHECK-LABEL: fred: ; CHECK: // %bb.0: // %b0 ; CHECK-NEXT: { -; CHECK-NEXT: r1:0 = combine(r0,#0) -; CHECK-NEXT: if (p0) jumpr r31 +; CHECK-NEXT: if (p0) jump:nt .LBB0_2 ; CHECK-NEXT: } -; CHECK-NEXT: .LBB0_1: // %b2 +; CHECK-NEXT: // %bb.1: // %b2 ; CHECK-NEXT: { ; CHECK-NEXT: r3:2 = combine(#0,#0) -; CHECK-NEXT: r1:0 = memd(r1+#0) +; CHECK-NEXT: r1:0 = memd(r0+#0) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: p0 = vcmph.eq(r1:0,r3:2) @@ -28,7 +27,16 @@ define i32 @fred(ptr %a0) #0 { ; CHECK-NEXT: r0 = and(r0,#1) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: r0 = !cmp.eq(r0,#11) +; CHECK-NEXT: p0 = cmp.eq(r0,#11) +; CHECK-NEXT: r0 = #1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) r0 = #0 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +; CHECK-NEXT: .LBB0_2: // %b14 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #0 ; CHECK-NEXT: jumpr r31 ; CHECK-NEXT: } b0: diff --git a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll index 10103f071462c5..48fa69e1045656 100644 --- a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll +++ b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll @@ -551,9 +551,8 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) { ; RV32IMB-NEXT: sh3add a1, a1, a2 ; RV32IMB-NEXT: sh1add a0, a0, a0 ; RV32IMB-NEXT: slli a2, a0, 3 -; RV32IMB-NEXT: li a3, 1 -; RV32IMB-NEXT: slli a3, a3, 11 -; RV32IMB-NEXT: sh3add a0, a0, a3 +; RV32IMB-NEXT: addi a0, a2, 2047 +; RV32IMB-NEXT: addi a0, a0, 1 ; RV32IMB-NEXT: sltu a2, a0, a2 ; RV32IMB-NEXT: add a1, a1, a2 ; RV32IMB-NEXT: ret @@ -562,8 +561,8 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) { ; RV64IMB: # %bb.0: ; RV64IMB-NEXT: addi a0, a0, 86 ; RV64IMB-NEXT: sh1add a0, a0, a0 -; RV64IMB-NEXT: slli a0, a0, 3 -; RV64IMB-NEXT: addi a0, a0, -16 +; RV64IMB-NEXT: li a1, -16 +; RV64IMB-NEXT: sh3add a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = mul i64 %x, 24 %tmp1 = add i64 %tmp0, 2048 diff --git a/llvm/test/CodeGen/RISCV/memcpy-inline.ll b/llvm/test/CodeGen/RISCV/memcpy-inline.ll index 343695ee37da84..833e07351eec77 100644 --- a/llvm/test/CodeGen/RISCV/memcpy-inline.ll +++ b/llvm/test/CodeGen/RISCV/memcpy-inline.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 ; RUN: llc < %s -mtriple=riscv64 \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 -; RUN: llc < %s -mtriple=riscv32 -mattr=+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem \ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST -; RUN: llc < %s -mtriple=riscv64 -mattr=+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+unaligned-scalar-mem \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST ; ---------------------------------------------------------------------- diff --git a/llvm/test/CodeGen/RISCV/memcpy.ll b/llvm/test/CodeGen/RISCV/memcpy.ll index 12ec0881b20d9f..02f582339d0b78 100644 --- a/llvm/test/CodeGen/RISCV/memcpy.ll +++ b/llvm/test/CodeGen/RISCV/memcpy.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 ; RUN: llc < %s -mtriple=riscv64 \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 -; RUN: llc < %s -mtriple=riscv32 -mattr=+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem \ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST -; RUN: llc < %s -mtriple=riscv64 -mattr=+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+unaligned-scalar-mem \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } diff --git a/llvm/test/CodeGen/RISCV/memset-inline.ll b/llvm/test/CodeGen/RISCV/memset-inline.ll index cc22b77c641e27..55fe81a58805ed 100644 --- a/llvm/test/CodeGen/RISCV/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/memset-inline.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 -; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+unaligned-scalar-mem \ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+unaligned-scalar-mem \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index af341dbaadeabd..364e8c7b38dacc 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -465,6 +465,192 @@ define i32 @mulhu_constant(i32 %a) nounwind { ret i32 %4 } +define i32 @muli32_p14(i32 %a) nounwind { +; RV32I-LABEL: muli32_p14: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 14 +; RV32I-NEXT: tail __mulsi3 +; +; RV32IM-LABEL: muli32_p14: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a1, 14 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli32_p14: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 14 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli32_p14: +; RV64IM: # %bb.0: +; RV64IM-NEXT: li a1, 14 +; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i32 %a, 14 + ret i32 %1 +} + +define i32 @muli32_p28(i32 %a) nounwind { +; RV32I-LABEL: muli32_p28: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 28 +; RV32I-NEXT: tail __mulsi3 +; +; RV32IM-LABEL: muli32_p28: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a1, 28 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli32_p28: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 28 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli32_p28: +; RV64IM: # %bb.0: +; RV64IM-NEXT: li a1, 28 +; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i32 %a, 28 + ret i32 %1 +} + +define i32 @muli32_p30(i32 %a) nounwind { +; RV32I-LABEL: muli32_p30: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 30 +; RV32I-NEXT: tail __mulsi3 +; +; RV32IM-LABEL: muli32_p30: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a1, 30 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli32_p30: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 30 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli32_p30: +; RV64IM: # %bb.0: +; RV64IM-NEXT: li a1, 30 +; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i32 %a, 30 + ret i32 %1 +} + +define i32 @muli32_p56(i32 %a) nounwind { +; RV32I-LABEL: muli32_p56: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 56 +; RV32I-NEXT: tail __mulsi3 +; +; RV32IM-LABEL: muli32_p56: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a1, 56 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli32_p56: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 56 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli32_p56: +; RV64IM: # %bb.0: +; RV64IM-NEXT: li a1, 56 +; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i32 %a, 56 + ret i32 %1 +} + +define i32 @muli32_p60(i32 %a) nounwind { +; RV32I-LABEL: muli32_p60: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 60 +; RV32I-NEXT: tail __mulsi3 +; +; RV32IM-LABEL: muli32_p60: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a1, 60 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli32_p60: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 60 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli32_p60: +; RV64IM: # %bb.0: +; RV64IM-NEXT: li a1, 60 +; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i32 %a, 60 + ret i32 %1 +} + +define i32 @muli32_p62(i32 %a) nounwind { +; RV32I-LABEL: muli32_p62: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 62 +; RV32I-NEXT: tail __mulsi3 +; +; RV32IM-LABEL: muli32_p62: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a1, 62 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli32_p62: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 62 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli32_p62: +; RV64IM: # %bb.0: +; RV64IM-NEXT: li a1, 62 +; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i32 %a, 62 + ret i32 %1 +} + define i32 @muli32_p65(i32 %a) nounwind { ; RV32I-LABEL: muli32_p65: ; RV32I: # %bb.0: @@ -600,6 +786,8 @@ define i64 @muli64_p63(i64 %a) nounwind { ret i64 %1 } + + define i32 @muli32_m63(i32 %a) nounwind { ; RV32I-LABEL: muli32_m63: ; RV32I: # %bb.0: @@ -1145,10 +1333,10 @@ define i128 @muli128_m3840(i128 %a) nounwind { ; RV32I-NEXT: sltu a7, a6, a4 ; RV32I-NEXT: sub t0, t1, t0 ; RV32I-NEXT: mv t1, a7 -; RV32I-NEXT: beq a5, a3, .LBB30_2 +; RV32I-NEXT: beq a5, a3, .LBB36_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu t1, a5, a3 -; RV32I-NEXT: .LBB30_2: +; RV32I-NEXT: .LBB36_2: ; RV32I-NEXT: sub a2, a2, a1 ; RV32I-NEXT: sltu a1, a2, t1 ; RV32I-NEXT: sub a1, t0, a1 @@ -1261,10 +1449,10 @@ define i128 @muli128_m63(i128 %a) nounwind { ; RV32I-NEXT: slli t0, a1, 6 ; RV32I-NEXT: or a7, t0, a7 ; RV32I-NEXT: mv t0, a5 -; RV32I-NEXT: beq a1, a7, .LBB31_2 +; RV32I-NEXT: beq a1, a7, .LBB37_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu t0, a1, a7 -; RV32I-NEXT: .LBB31_2: +; RV32I-NEXT: .LBB37_2: ; RV32I-NEXT: srli t1, a1, 26 ; RV32I-NEXT: slli t2, a6, 6 ; RV32I-NEXT: or t1, t2, t1 diff --git a/llvm/test/CodeGen/RISCV/pr56110.ll b/llvm/test/CodeGen/RISCV/pr56110.ll index c795b17419f564..fa441f5fc3aef4 100644 --- a/llvm/test/CodeGen/RISCV/pr56110.ll +++ b/llvm/test/CodeGen/RISCV/pr56110.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=riscv32 | FileCheck %s -; RUN: llc < %s -mtriple=riscv32 -mattr=+fast-unaligned-access | FileCheck %s +; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem | FileCheck %s define void @foo_set(ptr nocapture noundef %a, i32 noundef %v) { ; CHECK-LABEL: foo_set: diff --git a/llvm/test/CodeGen/RISCV/riscv-func-target-feature.ll b/llvm/test/CodeGen/RISCV/riscv-func-target-feature.ll index a03dadbc1d1160..d627ae9c90394e 100644 --- a/llvm/test/CodeGen/RISCV/riscv-func-target-feature.ll +++ b/llvm/test/CodeGen/RISCV/riscv-func-target-feature.ll @@ -36,7 +36,7 @@ entry: } ; CHECK-NOT: .option push -define void @test5() "target-features"="+fast-unaligned-access" { +define void @test5() "target-features"="+unaligned-scalar-mem" { ; CHECK-LABEL: test5 ; CHECK-NOT: .option pop entry: diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll index 6f56babf28f5ec..1450c86c76d05f 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll @@ -268,6 +268,23 @@ define i64 @mul96(i64 %a) { ret i64 %c } +define i64 @mul137(i64 %a) { +; RV64I-LABEL: mul137: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 137 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: mul137: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a1, a0, a0, 3 +; RV64XTHEADBA-NEXT: slli a0, a0, 7 +; RV64XTHEADBA-NEXT: add a0, a0, a1 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 137 + ret i64 %c +} + define i64 @mul160(i64 %a) { ; RV64I-LABEL: mul160: ; RV64I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 0d1d4838c61133..b4c80b60e0bad5 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -567,6 +567,87 @@ define i64 @mul96(i64 %a) { ret i64 %c } +define i64 @mul119(i64 %a) { +; CHECK-LABEL: mul119: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 119 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: ret + %c = mul i64 %a, 119 + ret i64 %c +} + +define i64 @mul123(i64 %a) { +; CHECK-LABEL: mul123: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 123 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: ret + %c = mul i64 %a, 123 + ret i64 %c +} + +define i64 @mul125(i64 %a) { +; CHECK-LABEL: mul125: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 125 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: ret + %c = mul i64 %a, 125 + ret i64 %c +} + +define i64 @mul131(i64 %a) { +; RV64I-LABEL: mul131: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 131 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul131: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh1add a1, a0, a0 +; RV64ZBA-NEXT: slli a0, a0, 7 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: ret + %c = mul i64 %a, 131 + ret i64 %c +} + +define i64 @mul133(i64 %a) { +; RV64I-LABEL: mul133: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 133 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul133: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a1, a0, a0 +; RV64ZBA-NEXT: slli a0, a0, 7 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: ret + %c = mul i64 %a, 133 + ret i64 %c +} + +define i64 @mul137(i64 %a) { +; RV64I-LABEL: mul137: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 137 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul137: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a1, a0, a0 +; RV64ZBA-NEXT: slli a0, a0, 7 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: ret + %c = mul i64 %a, 137 + ret i64 %c +} + define i64 @mul160(i64 %a) { ; RV64I-LABEL: mul160: ; RV64I: # %bb.0: @@ -2430,3 +2511,25 @@ define ptr @test_gep_gep_dont_crash(ptr %p, i64 %a1, i64 %a2) { %gep2 = getelementptr i64, ptr %gep1, i64 %a1 ret ptr %gep2 } + +define i64 @regression(i32 signext %x, i32 signext %y) { +; RV64I-LABEL: regression: +; RV64I: # %bb.0: +; RV64I-NEXT: subw a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: li a1, 3 +; RV64I-NEXT: slli a1, a1, 35 +; RV64I-NEXT: mulhu a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: regression: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: subw a0, a0, a1 +; RV64ZBA-NEXT: slli.uw a0, a0, 3 +; RV64ZBA-NEXT: sh1add a0, a0, a0 +; RV64ZBA-NEXT: ret + %sub = sub i32 %x, %y + %ext = zext i32 %sub to i64 + %res = mul nuw nsw i64 %ext, 24 + ret i64 %res +} diff --git a/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll b/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll new file mode 100644 index 00000000000000..c99388cbdaf441 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=OMIT-FP %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs -frame-pointer=all < %s \ +; RUN: | FileCheck -check-prefix=NO-OMIT-FP %s + +define riscv_vector_cc @test_vector_callee_cfi( %va) { +; OMIT-FP-LABEL: test_vector_callee_cfi: +; OMIT-FP: # %bb.0: # %entry +; OMIT-FP-NEXT: addi sp, sp, -16 +; OMIT-FP-NEXT: .cfi_def_cfa_offset 16 +; OMIT-FP-NEXT: csrr a0, vlenb +; OMIT-FP-NEXT: slli a0, a0, 3 +; OMIT-FP-NEXT: sub sp, sp, a0 +; OMIT-FP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; OMIT-FP-NEXT: csrr a0, vlenb +; OMIT-FP-NEXT: li a1, 6 +; OMIT-FP-NEXT: mul a0, a0, a1 +; OMIT-FP-NEXT: add a0, sp, a0 +; OMIT-FP-NEXT: addi a0, a0, 16 +; OMIT-FP-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; OMIT-FP-NEXT: csrr a0, vlenb +; OMIT-FP-NEXT: slli a0, a0, 2 +; OMIT-FP-NEXT: add a0, sp, a0 +; OMIT-FP-NEXT: addi a0, a0, 16 +; OMIT-FP-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill +; OMIT-FP-NEXT: addi a0, sp, 16 +; OMIT-FP-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill +; OMIT-FP-NEXT: .cfi_escape 0x10, 0x61, 0x08, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 2 * vlenb +; OMIT-FP-NEXT: .cfi_escape 0x10, 0x62, 0x08, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2m2 @ cfa - 4 * vlenb +; OMIT-FP-NEXT: .cfi_escape 0x10, 0x64, 0x08, 0x11, 0x78, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4m4 @ cfa - 8 * vlenb +; OMIT-FP-NEXT: #APP +; OMIT-FP-NEXT: #NO_APP +; OMIT-FP-NEXT: csrr a0, vlenb +; OMIT-FP-NEXT: li a1, 6 +; OMIT-FP-NEXT: mul a0, a0, a1 +; OMIT-FP-NEXT: add a0, sp, a0 +; OMIT-FP-NEXT: addi a0, a0, 16 +; OMIT-FP-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; OMIT-FP-NEXT: csrr a0, vlenb +; OMIT-FP-NEXT: slli a0, a0, 2 +; OMIT-FP-NEXT: add a0, sp, a0 +; OMIT-FP-NEXT: addi a0, a0, 16 +; OMIT-FP-NEXT: vl2r.v v2, (a0) # Unknown-size Folded Reload +; OMIT-FP-NEXT: addi a0, sp, 16 +; OMIT-FP-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload +; OMIT-FP-NEXT: csrr a0, vlenb +; OMIT-FP-NEXT: slli a0, a0, 3 +; OMIT-FP-NEXT: add sp, sp, a0 +; OMIT-FP-NEXT: addi sp, sp, 16 +; OMIT-FP-NEXT: ret +; +; NO-OMIT-FP-LABEL: test_vector_callee_cfi: +; NO-OMIT-FP: # %bb.0: # %entry +; NO-OMIT-FP-NEXT: addi sp, sp, -32 +; NO-OMIT-FP-NEXT: .cfi_def_cfa_offset 32 +; NO-OMIT-FP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; NO-OMIT-FP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; NO-OMIT-FP-NEXT: .cfi_offset ra, -8 +; NO-OMIT-FP-NEXT: .cfi_offset s0, -16 +; NO-OMIT-FP-NEXT: addi s0, sp, 32 +; NO-OMIT-FP-NEXT: .cfi_def_cfa s0, 0 +; NO-OMIT-FP-NEXT: csrr a0, vlenb +; NO-OMIT-FP-NEXT: slli a0, a0, 3 +; NO-OMIT-FP-NEXT: sub sp, sp, a0 +; NO-OMIT-FP-NEXT: csrr a0, vlenb +; NO-OMIT-FP-NEXT: slli a0, a0, 1 +; NO-OMIT-FP-NEXT: sub a0, s0, a0 +; NO-OMIT-FP-NEXT: addi a0, a0, -32 +; NO-OMIT-FP-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; NO-OMIT-FP-NEXT: csrr a0, vlenb +; NO-OMIT-FP-NEXT: slli a0, a0, 2 +; NO-OMIT-FP-NEXT: sub a0, s0, a0 +; NO-OMIT-FP-NEXT: addi a0, a0, -32 +; NO-OMIT-FP-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill +; NO-OMIT-FP-NEXT: csrr a0, vlenb +; NO-OMIT-FP-NEXT: slli a0, a0, 3 +; NO-OMIT-FP-NEXT: sub a0, s0, a0 +; NO-OMIT-FP-NEXT: addi a0, a0, -32 +; NO-OMIT-FP-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill +; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x61, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 32 - 2 * vlenb +; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x62, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2m2 @ cfa - 32 - 4 * vlenb +; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x64, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4m4 @ cfa - 32 - 8 * vlenb +; NO-OMIT-FP-NEXT: #APP +; NO-OMIT-FP-NEXT: #NO_APP +; NO-OMIT-FP-NEXT: csrr a0, vlenb +; NO-OMIT-FP-NEXT: slli a0, a0, 1 +; NO-OMIT-FP-NEXT: sub a0, s0, a0 +; NO-OMIT-FP-NEXT: addi a0, a0, -32 +; NO-OMIT-FP-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; NO-OMIT-FP-NEXT: csrr a0, vlenb +; NO-OMIT-FP-NEXT: slli a0, a0, 2 +; NO-OMIT-FP-NEXT: sub a0, s0, a0 +; NO-OMIT-FP-NEXT: addi a0, a0, -32 +; NO-OMIT-FP-NEXT: vl2r.v v2, (a0) # Unknown-size Folded Reload +; NO-OMIT-FP-NEXT: csrr a0, vlenb +; NO-OMIT-FP-NEXT: slli a0, a0, 3 +; NO-OMIT-FP-NEXT: sub a0, s0, a0 +; NO-OMIT-FP-NEXT: addi a0, a0, -32 +; NO-OMIT-FP-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload +; NO-OMIT-FP-NEXT: addi sp, s0, -32 +; NO-OMIT-FP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; NO-OMIT-FP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; NO-OMIT-FP-NEXT: addi sp, sp, 32 +; NO-OMIT-FP-NEXT: ret +entry: + call void asm sideeffect "", + "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() + + ret %va +} diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll index f244810e739d93..ff35043dbd7e75 100644 --- a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll +++ b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+fast-unaligned-access -target-abi=ilp32 \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+unaligned-vector-mem -target-abi=ilp32 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+fast-unaligned-access -target-abi=lp64 \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+unaligned-vector-mem -target-abi=lp64 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define void @constant_forward_stride(ptr %s, ptr %d) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index 1d3c22a02efc0f..ab6df1d3e883fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -628,6 +628,7 @@ define void @insert_v2i64_nxv16i64_hi(ptr %psv, ptr %out) { ; RV32-NEXT: vs8r.v v8, (a0) ; RV32-NEXT: vs8r.v v16, (a1) ; RV32-NEXT: addi sp, s0, -80 +; RV32-NEXT: .cfi_def_cfa sp, 80 ; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 80 @@ -661,6 +662,7 @@ define void @insert_v2i64_nxv16i64_hi(ptr %psv, ptr %out) { ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: vs8r.v v16, (a1) ; RV64-NEXT: addi sp, s0, -80 +; RV64-NEXT: .cfi_def_cfa sp, 80 ; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 80 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll index 657d52354aa39f..f0fcc482e2207e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,RV64 -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+fast-unaligned-access -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64,RV64-MISALIGN +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+unaligned-vector-mem -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64,RV64-MISALIGN ; RUN: llc -mtriple=riscv64 -mattr=+f,+zfh,+zve64f,+zvl128b,+zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,ZVE64F diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index fffc4d6c08335c..36c36a13964c92 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=SLOW,RV32-SLOW ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=SLOW,RV64-SLOW -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+fast-unaligned-access -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+unaligned-vector-mem -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=FAST,RV32-FAST -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+fast-unaligned-access -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+unaligned-vector-mem -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=FAST,RV64-FAST define <4 x i32> @load_v4i32_align1(ptr %ptr) { diff --git a/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll index 485f94ee2a1026..53598c609107b0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 -; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem \ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST -; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST ; ---------------------------------------------------------------------- diff --git a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll index 0e7e914cf68e8a..accc18519d6260 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 -; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+unaligned-scalar-mem,,+unaligned-vector-mem \ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+unaligned-scalar-mem,+unaligned-vector-mem \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } diff --git a/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll index f488baf5a9d9fe..1491bb6c337a02 100644 --- a/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll +++ b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll @@ -3,9 +3,9 @@ ; RUN: -verify-machineinstrs | FileCheck %s ; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+zvfh,+v < %s \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: llc -mtriple riscv32 -mattr=+d,+zfh,+zvfh,+v,+fast-unaligned-access < %s \ +; RUN: llc -mtriple riscv32 -mattr=+d,+zfh,+zvfh,+v,+unaligned-vector-mem < %s \ ; RUN: -verify-machineinstrs | FileCheck --check-prefix=FAST %s -; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+zvfh,+v,+fast-unaligned-access < %s \ +; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+zvfh,+v,+unaligned-vector-mem < %s \ ; RUN: -verify-machineinstrs | FileCheck --check-prefix=FAST %s diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll index 599b0d08629eaf..ce0d8fedbfb88f 100644 --- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck -check-prefixes=ALL,SLOW,RV32I %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ALL,SLOW,RV64I %s -; RUN: llc -mtriple=riscv32 -mattr=+fast-unaligned-access -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ALL,FAST,RV32I-FAST %s -; RUN: llc -mtriple=riscv64 -mattr=+fast-unaligned-access -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ALL,FAST,RV64I-FAST %s ; A collection of cases showing codegen for unaligned loads and stores diff --git a/llvm/test/CodeGen/X86/code-model-elf-text-sections.ll b/llvm/test/CodeGen/X86/code-model-elf-text-sections.ll index 016c9a4d7b8390..66a6fd37675427 100644 --- a/llvm/test/CodeGen/X86/code-model-elf-text-sections.ll +++ b/llvm/test/CodeGen/X86/code-model-elf-text-sections.ll @@ -13,9 +13,20 @@ ; RUN: llvm-readelf -S %t | FileCheck %s --check-prefix=LARGE-DS ; SMALL: .text {{.*}} AX {{.*}} +; SMALL: .ltext {{.*}} AXl {{.*}} +; SMALL: .ltext.2 {{.*}} AXl {{.*}} +; SMALL: .foo {{.*}} AX {{.*}} ; SMALL-DS: .text.func {{.*}} AX {{.*}} +; SMALL-DS: .ltext {{.*}} AXl {{.*}} +; SMALL-DS: .ltext.2 {{.*}} AXl {{.*}} +; SMALL-DS: .foo {{.*}} AX {{.*}} ; LARGE: .ltext {{.*}} AXl {{.*}} +; LARGE: .ltext.2 {{.*}} AXl {{.*}} +; LARGE: .foo {{.*}} AX {{.*}} ; LARGE-DS: .ltext.func {{.*}} AXl {{.*}} +; LARGE-DS: .ltext {{.*}} AXl {{.*}} +; LARGE-DS: .ltext.2 {{.*}} AXl {{.*}} +; LARGE-DS: .foo {{.*}} AX {{.*}} target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64--linux" @@ -23,3 +34,15 @@ target triple = "x86_64--linux" define void @func() { ret void } + +define void @ltext() section ".ltext" { + ret void +} + +define void @ltext2() section ".ltext.2" { + ret void +} + +define void @foo() section ".foo" { + ret void +} diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 0c76c14afb0aee..4859a8e0eaaa51 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -305,6 +305,37 @@ define <4 x float> @combine_vpermilvar_4f32_as_insertps(<4 x float> %a0) { ret <4 x float> %2 } +define <8 x i32> @combine_blend_of_permutes_v8i32(<4 x i64> %a0, <4 x i64> %a1) { +; AVX1-LABEL: combine_blend_of_permutes_v8i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6],ymm1[7] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX2-LABEL: combine_blend_of_permutes_v8i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6],ymm1[7] +; AVX2-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: combine_blend_of_permutes_v8i32: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512-NEXT: vpmovsxbd {{.*#+}} ymm2 = [4,21,6,23,16,1,2,19] +; AVX512-NEXT: vpermt2d %zmm1, %zmm2, %zmm0 +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512-NEXT: ret{{[l|q]}} + %s0 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> + %s1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> + %x0 = bitcast <4 x i64> %s0 to <8 x i32> + %x1 = bitcast <4 x i64> %s1 to <8 x i32> + %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <8 x i32> + ret <8 x i32> %r +} + define <2 x double> @constant_fold_vpermilvar_pd() { ; CHECK-LABEL: constant_fold_vpermilvar_pd: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll index f53b1eeaf8f54b..e87e810971e119 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll @@ -973,3 +973,47 @@ define <8 x i64> @combine_vpermvar_insertion_as_broadcast_v8i64(i64 %a0) { %2 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %1, <8 x i64> zeroinitializer) ret <8 x i64> %2 } + +define <16 x i32> @blend_of_permutes_v16i32(<8 x i64> %a0, <8x i64> %a1) { +; X86-AVX512F-LABEL: blend_of_permutes_v16i32: +; X86-AVX512F: # %bb.0: +; X86-AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] +; X86-AVX512F-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5] +; X86-AVX512F-NEXT: movw $-25958, %ax # imm = 0x9A9A +; X86-AVX512F-NEXT: kmovw %eax, %k1 +; X86-AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; X86-AVX512F-NEXT: retl +; +; X86-AVX512BW-LABEL: blend_of_permutes_v16i32: +; X86-AVX512BW: # %bb.0: +; X86-AVX512BW-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] +; X86-AVX512BW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5] +; X86-AVX512BW-NEXT: movw $-25958, %ax # imm = 0x9A9A +; X86-AVX512BW-NEXT: kmovd %eax, %k1 +; X86-AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; X86-AVX512BW-NEXT: retl +; +; X64-AVX512F-LABEL: blend_of_permutes_v16i32: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] +; X64-AVX512F-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5] +; X64-AVX512F-NEXT: movw $-25958, %ax # imm = 0x9A9A +; X64-AVX512F-NEXT: kmovw %eax, %k1 +; X64-AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: blend_of_permutes_v16i32: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] +; X64-AVX512BW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5] +; X64-AVX512BW-NEXT: movw $-25958, %ax # imm = 0x9A9A +; X64-AVX512BW-NEXT: kmovd %eax, %k1 +; X64-AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; X64-AVX512BW-NEXT: retq + %s0 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> + %s1 = shufflevector <8 x i64> %a1, <8 x i64> undef, <8 x i32> + %x0 = bitcast <8 x i64> %s0 to <16 x i32> + %x1 = bitcast <8 x i64> %s1 to <16 x i32> + %r = shufflevector <16 x i32> %x0, <16 x i32> %x1, <16 x i32> + ret <16 x i32> %r +} diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll index 5eb017bc80ca58..33851f56fe8de5 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll @@ -22,6 +22,21 @@ define <16 x i8> @combine_vpshufb_as_movzx(<16 x i8> %a0) { ret <16 x i8> %res0 } +define <4 x i32> @combine_blend_of_permutes_v4i32(<2 x i64> %a0, <2 x i64> %a1) { +; SSE-LABEL: combine_blend_of_permutes_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] +; SSE-NEXT: retq + %s0 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> + %s1 = shufflevector <2 x i64> %a1, <2 x i64> undef, <2 x i32> + %x0 = bitcast <2 x i64> %s0 to <4 x i32> + %x1 = bitcast <2 x i64> %s1 to <4 x i32> + %r = shufflevector <4 x i32> %x0, <4 x i32> %x1, <4 x i32> + ret <4 x i32> %r +} + define <16 x i8> @PR50049(ptr %p1, ptr %p2) { ; SSE-LABEL: PR50049: ; SSE: # %bb.0: diff --git a/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll b/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll index 4990979f10c53c..55e436b1a93b22 100644 --- a/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll +++ b/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll @@ -1,4 +1,5 @@ ; RUN: opt -passes=check-debugify < %s 2>&1 | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false -passes=check-debugify < %s 2>&1 | FileCheck %s define <2 x i64> @test-fun(<2 x i64> %A) !dbg !6 { %and = and <2 x i64> %A, , !dbg !14 diff --git a/llvm/test/DebugInfo/debugify-each.ll b/llvm/test/DebugInfo/debugify-each.ll index e9241dedb69600..7685b57b5dd15f 100644 --- a/llvm/test/DebugInfo/debugify-each.ll +++ b/llvm/test/DebugInfo/debugify-each.ll @@ -40,6 +40,40 @@ ; RUN: opt -debugify-each -passes=globalopt -S -o /dev/null < %s 2> %t ; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS-ONE +; Repeat the same checks with debug intrinsics enabled. +; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -O3 -S -o /dev/null < %s 2> %t +; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS +; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS +; RUN: opt --experimental-debuginfo-iterators=false -disable-output -debugify-each -passes='default' %s 2> %t +; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS +; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS + +; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -debugify-each -O3 -S -o /dev/null < %s 2> %t +; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS +; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS + +; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -passes='instrprof,instrprof,sroa,sccp' -S -o /dev/null < %s 2> %t +; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS +; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS + +; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -O1 < %s | opt -O2 -o /dev/null + +; RUN: opt --experimental-debuginfo-iterators=false -disable-output -debugify-quiet -debugify-each -O1 < %s 2>&1 | count 0 + +; RUN: opt --experimental-debuginfo-iterators=false -O1 < %s -S -o %t.before +; RUN: opt --experimental-debuginfo-iterators=false -O1 -debugify-each < %s -S -o %t.after +; RUN: diff %t.before %t.after + +; RUN: opt --experimental-debuginfo-iterators=false -O1 < %s | llvm-dis -o %t.before +; RUN: opt --experimental-debuginfo-iterators=false -O1 -debugify-each < %s | llvm-dis -o %t.after +; RUN: diff %t.before %t.after + +; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -passes=instsimplify -S -o /dev/null < %s 2> %t +; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS-ONE + +; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -passes=globalopt -S -o /dev/null < %s 2> %t +; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS-ONE + define void @foo(i32 %arg) { call i32 asm "bswap $0", "=r,r"(i32 %arg) ret void diff --git a/llvm/test/DebugInfo/debugify-export.ll b/llvm/test/DebugInfo/debugify-export.ll index 6e5952d433da9b..30333ca908b0d2 100644 --- a/llvm/test/DebugInfo/debugify-export.ll +++ b/llvm/test/DebugInfo/debugify-export.ll @@ -1,6 +1,9 @@ ; RUN: opt %s -disable-output -debugify-each -debugify-quiet -debugify-export - -passes=globalopt | FileCheck %s ; RUN: opt %s -disable-output -debugify-each -debugify-quiet -debugify-export - -passes=globalopt | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false %s -disable-output -debugify-each -debugify-quiet -debugify-export - -passes=globalopt | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false %s -disable-output -debugify-each -debugify-quiet -debugify-export - -passes=globalopt | FileCheck %s + ; CHECK: Pass Name ; CHECK-SAME: # of missing debug values ; CHECK-SAME: # of missing locations diff --git a/llvm/test/DebugInfo/debugify-ignore-phi.ll b/llvm/test/DebugInfo/debugify-ignore-phi.ll index 322ccafa22ac81..643df1d9604852 100644 --- a/llvm/test/DebugInfo/debugify-ignore-phi.ll +++ b/llvm/test/DebugInfo/debugify-ignore-phi.ll @@ -1,4 +1,5 @@ ; RUN: opt -passes=check-debugify < %s -S 2>&1 | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false -passes=check-debugify < %s -S 2>&1 | FileCheck %s define void @test_phi(i1 %cond) !dbg !6 { br i1 %cond, label %1, label %2, !dbg !11 diff --git a/llvm/test/DebugInfo/debugify-original-no-dbg-info.ll b/llvm/test/DebugInfo/debugify-original-no-dbg-info.ll index 941b294fb85567..4cbbfc5c215e28 100644 --- a/llvm/test/DebugInfo/debugify-original-no-dbg-info.ll +++ b/llvm/test/DebugInfo/debugify-original-no-dbg-info.ll @@ -1,4 +1,5 @@ ; RUN: opt -verify-debuginfo-preserve -passes=instcombine -S -o - < %s 2>&1 | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false -verify-debuginfo-preserve -passes=instcombine -S -o - < %s 2>&1 | FileCheck %s ; CHECK: ModuleDebugify (original debuginfo): Skipping module without debug info ; CHECK-NEXT: CheckModuleDebugify (original debuginfo): Skipping module without debug info diff --git a/llvm/test/DebugInfo/debugify-report-missing-locs-only.ll b/llvm/test/DebugInfo/debugify-report-missing-locs-only.ll index 1c5daa19c64841..04b7636f025a07 100644 --- a/llvm/test/DebugInfo/debugify-report-missing-locs-only.ll +++ b/llvm/test/DebugInfo/debugify-report-missing-locs-only.ll @@ -1,4 +1,5 @@ ; RUN: opt -passes=check-debugify < %s -S -o - 2>&1 | FileCheck %s -implicit-check-not "WARNING: Instruction with empty DebugLoc in function bar" +; RUN: opt --experimental-debuginfo-iterators=false -passes=check-debugify < %s -S -o - 2>&1 | FileCheck %s -implicit-check-not "WARNING: Instruction with empty DebugLoc in function bar" ; CHECK: WARNING: Instruction with empty DebugLoc in function foo -- ret void define void @foo() !dbg !6 { diff --git a/llvm/test/DebugInfo/debugify.ll b/llvm/test/DebugInfo/debugify.ll index 5ce6795d41b6bf..191015f8259339 100644 --- a/llvm/test/DebugInfo/debugify.ll +++ b/llvm/test/DebugInfo/debugify.ll @@ -25,6 +25,33 @@ ; RUN: opt -enable-debugify -O1 < %s | opt -O2 -o /dev/null ; RUN: opt -passes=debugify,mem2reg,check-debugify < %s | opt -O2 -o /dev/null +;; Perform the same checks again for intrinsic debug info +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify -S -o - < %s | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify -S -o - < %s | FileCheck %s + +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,debugify -S -o - < %s 2>&1 | \ +; RUN: FileCheck %s -check-prefix=CHECK-REPEAT +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,debugify -S -o - < %s 2>&1 | \ +; RUN: FileCheck %s -check-prefix=CHECK-REPEAT + +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,check-debugify -S -o - < %s | \ +; RUN: FileCheck %s -implicit-check-not="CheckModuleDebugify: FAIL" +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,check-debugify -S -o - < %s | \ +; RUN: FileCheck %s -implicit-check-not="CheckModuleDebugify: FAIL" +; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -passes=verify -S -o - < %s | \ +; RUN: FileCheck %s -implicit-check-not="CheckModuleDebugify: FAIL" + +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,strip,check-debugify -S -o - < %s 2>&1 | \ +; RUN: FileCheck %s -check-prefix=CHECK-WARN + +; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -passes=strip -S -o - < %s 2>&1 | \ +; RUN: FileCheck %s -check-prefix=CHECK-WARN + +; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -S -o - < %s 2>&1 | FileCheck %s -check-prefix=PASS + +; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -O1 < %s | opt -O2 -o /dev/null +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,mem2reg,check-debugify < %s | opt -O2 -o /dev/null + ; CHECK-LABEL: define void @foo define void @foo() { ; CHECK: ret void, !dbg ![[RET1:.*]] diff --git a/llvm/test/DebugInfo/pr37964.ll b/llvm/test/DebugInfo/pr37964.ll index 9581f1a6b35dc5..63db67d2bd37f7 100644 --- a/llvm/test/DebugInfo/pr37964.ll +++ b/llvm/test/DebugInfo/pr37964.ll @@ -1,4 +1,5 @@ ; RUN: opt -disable-output -debugify-each -passes=gvn < %s 2>&1 | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false -disable-output -debugify-each -passes=gvn < %s 2>&1 | FileCheck %s ; CHECK-NOT: ERROR: Instruction with empty DebugLoc in function _Z3bazv -- {{%.*}} = phi ; CHECK: CheckFunctionDebugify [GVNPass]: PASS diff --git a/llvm/test/DebugInfo/salvage-cast-debug-info.ll b/llvm/test/DebugInfo/salvage-cast-debug-info.ll index 4676aee3d4e480..b72f717a4f2de7 100644 --- a/llvm/test/DebugInfo/salvage-cast-debug-info.ll +++ b/llvm/test/DebugInfo/salvage-cast-debug-info.ll @@ -1,5 +1,5 @@ ; RUN: opt %s -passes=debugify,early-cse -earlycse-debug-hash -S | FileCheck %s -; RUN: opt %s -passes=debugify,early-cse -earlycse-debug-hash -S --try-experimental-debuginfo-iterators | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false %s -passes=debugify,early-cse -earlycse-debug-hash -S | FileCheck %s define i32 @foo(i64 %nose, i32 %more) { ; CHECK-LABEL: @foo( ; CHECK: call void @llvm.dbg.value(metadata i64 %nose, metadata [[V1:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned diff --git a/llvm/test/DebugInfo/verify-di-preserve.ll b/llvm/test/DebugInfo/verify-di-preserve.ll index a2f1b1dd78dc5a..92fc62a0b34c47 100644 --- a/llvm/test/DebugInfo/verify-di-preserve.ll +++ b/llvm/test/DebugInfo/verify-di-preserve.ll @@ -1,10 +1,10 @@ ; RUN: opt %s -verify-debuginfo-preserve -passes=instcombine -disable-output 2>&1 | FileCheck --check-prefix=VERIFY %s -; RUN: opt --try-experimental-debuginfo-iterators %s -verify-debuginfo-preserve -passes=instcombine -disable-output 2>&1 | FileCheck --check-prefix=VERIFY %s +; RUN: opt --experimental-debuginfo-iterators=false %s -verify-debuginfo-preserve -passes=instcombine -disable-output 2>&1 | FileCheck --check-prefix=VERIFY %s ; VERIFY: CheckModuleDebugify (original debuginfo): ; RUN: opt %s -verify-each-debuginfo-preserve -O2 -disable-output 2>&1 | FileCheck --check-prefix=VERIFY-EACH %s -; RUN: opt %s --try-experimental-debuginfo-iterators -verify-each-debuginfo-preserve -O2 -disable-output 2>&1 | FileCheck --check-prefix=VERIFY-EACH %s +; RUN: opt %s --experimental-debuginfo-iterators=false -verify-each-debuginfo-preserve -O2 -disable-output 2>&1 | FileCheck --check-prefix=VERIFY-EACH %s ; VERIFY-EACH: DeadArgumentEliminationPass ; VERIFY-EACH: GlobalDCEPass diff --git a/llvm/test/TableGen/def-multiple-operands.td b/llvm/test/TableGen/def-multiple-operands.td new file mode 100644 index 00000000000000..b747c58907505a --- /dev/null +++ b/llvm/test/TableGen/def-multiple-operands.td @@ -0,0 +1,37 @@ +// RUN: llvm-tblgen -gen-instr-info -I %p/../../include %s | FileCheck %s + +include "llvm/Target/Target.td" + +def archInstrInfo : InstrInfo {} + +def arch : Target { + let InstructionSet = archInstrInfo; +} + +def R0 : Register<"r0">; +def P0 : Register<"p0">; +def R32 : RegisterClass<"MyNS", [i32], 0, (add R0)>; +def P1 : RegisterClass<"MyNS", [i1], 0, (add P0)>; + +def Reg3Opnd : Operand { + let MIOperandInfo = (ops R32, R32, P1); +} + +// The following checks verify that 'MCInstrDesc' entry for 'InstA' has the +// expected 'NumOperands' and 'NumDefs', i.e. 'InstA' should have 3 defs out of +// 4 operands. + +// CHECK: archInstrTable {{.* = \{}} +// CHECK: {{\{}} +// CHECK: {{\{}} [[ID:[0-9]+]], 4, 3, 13, {{.+\}, \/\/}} +// CHECK-SAME: Inst #[[ID]] = InstA +def InstA : Instruction { + let Namespace = "MyNS"; + let Size = 13; + // InstA should have 3 defs out of 4 operands. + let OutOperandList = (outs Reg3Opnd:$dst); + let InOperandList = (ins i32imm:$c); + field bits<8> Inst; + field bits<8> SoftFail = 0; + let hasSideEffects = false; +} diff --git a/llvm/test/Transforms/InstCombine/add-sitofp.ll b/llvm/test/Transforms/InstCombine/add-sitofp.ll index 2bdc808d9771c4..f1afcaf5f85d2a 100644 --- a/llvm/test/Transforms/InstCombine/add-sitofp.ll +++ b/llvm/test/Transforms/InstCombine/add-sitofp.ll @@ -6,7 +6,7 @@ define double @x(i32 %a, i32 %b) { ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[A:%.*]], 24 ; CHECK-NEXT: [[N:%.*]] = and i32 [[M]], [[B:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[N]], 1 -; CHECK-NEXT: [[P:%.*]] = uitofp i32 [[TMP1]] to double +; CHECK-NEXT: [[P:%.*]] = uitofp nneg i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[P]] ; %m = lshr i32 %a, 24 @@ -20,7 +20,7 @@ define double @test(i32 %a) { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[A_AND]], 1 -; CHECK-NEXT: [[RES:%.*]] = uitofp i32 [[TMP1]] to double +; CHECK-NEXT: [[RES:%.*]] = uitofp nneg i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; ; Drop two highest bits to guarantee that %a + 1 doesn't overflow @@ -33,7 +33,7 @@ define double @test(i32 %a) { define float @test_neg(i32 %a) { ; CHECK-LABEL: @test_neg( ; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823 -; CHECK-NEXT: [[A_AND_FP:%.*]] = sitofp i32 [[A_AND]] to float +; CHECK-NEXT: [[A_AND_FP:%.*]] = uitofp nneg i32 [[A_AND]] to float ; CHECK-NEXT: [[RES:%.*]] = fadd float [[A_AND_FP]], 1.000000e+00 ; CHECK-NEXT: ret float [[RES]] ; @@ -49,7 +49,7 @@ define double @test_2(i32 %a, i32 %b) { ; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823 ; CHECK-NEXT: [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[A_AND]], [[B_AND]] -; CHECK-NEXT: [[RES:%.*]] = uitofp i32 [[TMP1]] to double +; CHECK-NEXT: [[RES:%.*]] = uitofp nneg i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; ; Drop two highest bits to guarantee that %a + %b doesn't overflow @@ -67,8 +67,8 @@ define float @test_2_neg(i32 %a, i32 %b) { ; CHECK-LABEL: @test_2_neg( ; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823 ; CHECK-NEXT: [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823 -; CHECK-NEXT: [[A_AND_FP:%.*]] = sitofp i32 [[A_AND]] to float -; CHECK-NEXT: [[B_AND_FP:%.*]] = sitofp i32 [[B_AND]] to float +; CHECK-NEXT: [[A_AND_FP:%.*]] = uitofp nneg i32 [[A_AND]] to float +; CHECK-NEXT: [[B_AND_FP:%.*]] = uitofp nneg i32 [[B_AND]] to float ; CHECK-NEXT: [[RES:%.*]] = fadd float [[A_AND_FP]], [[B_AND_FP]] ; CHECK-NEXT: ret float [[RES]] ; @@ -89,7 +89,7 @@ define float @test_3(i32 %a, i32 %b) { ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[A:%.*]], 24 ; CHECK-NEXT: [[N:%.*]] = and i32 [[M]], [[B:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[N]], 1 -; CHECK-NEXT: [[P:%.*]] = uitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[P:%.*]] = uitofp nneg i32 [[TMP1]] to float ; CHECK-NEXT: ret float [[P]] ; %m = lshr i32 %a, 24 @@ -104,7 +104,7 @@ define <4 x double> @test_4(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], ; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i32> [[A_AND]], [[B_AND]] -; CHECK-NEXT: [[RES:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double> +; CHECK-NEXT: [[RES:%.*]] = uitofp nneg <4 x i32> [[TMP1]] to <4 x double> ; CHECK-NEXT: ret <4 x double> [[RES]] ; ; Drop two highest bits to guarantee that %a + %b doesn't overflow @@ -122,8 +122,8 @@ define <4 x float> @test_4_neg(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @test_4_neg( ; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], ; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], -; CHECK-NEXT: [[A_AND_FP:%.*]] = sitofp <4 x i32> [[A_AND]] to <4 x float> -; CHECK-NEXT: [[B_AND_FP:%.*]] = sitofp <4 x i32> [[B_AND]] to <4 x float> +; CHECK-NEXT: [[A_AND_FP:%.*]] = uitofp nneg <4 x i32> [[A_AND]] to <4 x float> +; CHECK-NEXT: [[B_AND_FP:%.*]] = uitofp nneg <4 x i32> [[B_AND]] to <4 x float> ; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[A_AND_FP]], [[B_AND_FP]] ; CHECK-NEXT: ret <4 x float> [[RES]] ; diff --git a/llvm/test/Transforms/InstCombine/binop-itofp.ll b/llvm/test/Transforms/InstCombine/binop-itofp.ll index d72a54e8babc9f..097a8196af80f8 100644 --- a/llvm/test/Transforms/InstCombine/binop-itofp.ll +++ b/llvm/test/Transforms/InstCombine/binop-itofp.ll @@ -21,7 +21,7 @@ define half @test_ui_ui_i8_add_fail_overflow(i8 noundef %x_in, i8 noundef %y_in) ; CHECK-LABEL: @test_ui_ui_i8_add_fail_overflow( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 127 ; CHECK-NEXT: [[Y:%.*]] = and i8 [[Y_IN:%.*]], -127 -; CHECK-NEXT: [[XF:%.*]] = uitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = uitofp i8 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -49,7 +49,7 @@ define half @test_ui_ui_i8_add_C(i8 noundef %x_in) { define half @test_ui_ui_i8_add_C_fail_no_repr(i8 noundef %x_in) { ; CHECK-LABEL: @test_ui_ui_i8_add_C_fail_no_repr( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 127 -; CHECK-NEXT: [[XF:%.*]] = uitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], 0xH57F8 ; CHECK-NEXT: ret half [[R]] ; @@ -62,7 +62,7 @@ define half @test_ui_ui_i8_add_C_fail_no_repr(i8 noundef %x_in) { define half @test_ui_ui_i8_add_C_fail_overflow(i8 noundef %x_in) { ; CHECK-LABEL: @test_ui_ui_i8_add_C_fail_overflow( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 127 -; CHECK-NEXT: [[XF:%.*]] = uitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], 0xH5808 ; CHECK-NEXT: ret half [[R]] ; @@ -110,7 +110,7 @@ define half @test_ui_si_i8_add(i8 noundef %x_in, i8 noundef %y_in) { ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 63 ; CHECK-NEXT: [[Y:%.*]] = and i8 [[Y_IN:%.*]], 63 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i8 %x_in, 63 @@ -140,7 +140,7 @@ define half @test_ui_si_i8_add_overflow(i8 noundef %x_in, i8 noundef %y_in) { define half @test_ui_ui_i8_sub_C(i8 noundef %x_in) { ; CHECK-LABEL: @test_ui_ui_i8_sub_C( ; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X_IN:%.*]], 127 -; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = or i8 %x_in, 128 @@ -166,7 +166,7 @@ define half @test_si_si_i8_sub(i8 noundef %x_in, i8 noundef %y_in) { ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 63 ; CHECK-NEXT: [[Y:%.*]] = or i8 [[Y_IN:%.*]], -64 ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = sitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i8 %x_in, 63 @@ -181,7 +181,7 @@ define half @test_si_si_i8_sub_fail_overflow(i8 noundef %x_in, i8 noundef %y_in) ; CHECK-LABEL: @test_si_si_i8_sub_fail_overflow( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 63 ; CHECK-NEXT: [[Y:%.*]] = or i8 [[Y_IN:%.*]], -65 -; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = sitofp i8 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fsub half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -198,7 +198,7 @@ define half @test_si_si_i8_sub_C(i8 noundef %x_in) { ; CHECK-LABEL: @test_si_si_i8_sub_C( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 63 ; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[X]], 64 -; CHECK-NEXT: [[R:%.*]] = sitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i8 %x_in, 63 @@ -283,7 +283,7 @@ define half @test_ui_ui_i8_mul_C(i8 noundef %x_in) { define half @test_ui_ui_i8_mul_C_fail_overlow(i8 noundef %x_in) { ; CHECK-LABEL: @test_ui_ui_i8_mul_C_fail_overlow( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 14 -; CHECK-NEXT: [[XF:%.*]] = uitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], 0xH4CC0 ; CHECK-NEXT: ret half [[R]] ; @@ -315,7 +315,7 @@ define half @test_si_si_i8_mul_fail_maybe_zero(i8 noundef %x_in, i8 noundef %y_i ; CHECK-LABEL: @test_si_si_i8_mul_fail_maybe_zero( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 7 ; CHECK-NEXT: [[Y:%.*]] = or i8 [[Y_IN:%.*]], -8 -; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = sitofp i8 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -332,7 +332,7 @@ define half @test_si_si_i8_mul_C_fail_no_repr(i8 noundef %x_in) { ; CHECK-LABEL: @test_si_si_i8_mul_C_fail_no_repr( ; CHECK-NEXT: [[XX:%.*]] = and i8 [[X_IN:%.*]], 6 ; CHECK-NEXT: [[X:%.*]] = or disjoint i8 [[XX]], 1 -; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], 0xHC780 ; CHECK-NEXT: ret half [[R]] ; @@ -347,7 +347,7 @@ define half @test_si_si_i8_mul_C_fail_overflow(i8 noundef %x_in) { ; CHECK-LABEL: @test_si_si_i8_mul_C_fail_overflow( ; CHECK-NEXT: [[XX:%.*]] = and i8 [[X_IN:%.*]], 6 ; CHECK-NEXT: [[X:%.*]] = or disjoint i8 [[XX]], 1 -; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], 0xHCCC0 ; CHECK-NEXT: ret half [[R]] ; @@ -365,7 +365,7 @@ define half @test_ui_si_i8_mul(i8 noundef %x_in, i8 noundef %y_in) { ; CHECK-NEXT: [[YY:%.*]] = and i8 [[Y_IN:%.*]], 7 ; CHECK-NEXT: [[Y:%.*]] = add nuw nsw i8 [[YY]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %xx = and i8 %x_in, 6 @@ -384,7 +384,7 @@ define half @test_ui_si_i8_mul_fail_maybe_zero(i8 noundef %x_in, i8 noundef %y_i ; CHECK-NEXT: [[X:%.*]] = add nuw nsw i8 [[XX]], 1 ; CHECK-NEXT: [[Y:%.*]] = and i8 [[Y_IN:%.*]], 7 ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %xx = and i8 %x_in, 7 @@ -401,7 +401,7 @@ define half @test_ui_si_i8_mul_fail_signed(i8 noundef %x_in, i8 noundef %y_in) { ; CHECK-NEXT: [[XX:%.*]] = and i8 [[X_IN:%.*]], 7 ; CHECK-NEXT: [[X:%.*]] = add nuw nsw i8 [[XX]], 1 ; CHECK-NEXT: [[Y:%.*]] = or i8 [[Y_IN:%.*]], -4 -; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = uitofp i8 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -420,7 +420,7 @@ define half @test_ui_ui_i16_add(i16 noundef %x_in, i16 noundef %y_in) { ; CHECK-NEXT: [[X:%.*]] = and i16 [[X_IN:%.*]], 2047 ; CHECK-NEXT: [[Y:%.*]] = and i16 [[Y_IN:%.*]], 2047 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i16 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = uitofp i16 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i16 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i16 %x_in, 2047 @@ -435,8 +435,8 @@ define half @test_ui_ui_i16_add_fail_not_promotable(i16 noundef %x_in, i16 nound ; CHECK-LABEL: @test_ui_ui_i16_add_fail_not_promotable( ; CHECK-NEXT: [[X:%.*]] = and i16 [[X_IN:%.*]], 2049 ; CHECK-NEXT: [[Y:%.*]] = and i16 [[Y_IN:%.*]], 2047 -; CHECK-NEXT: [[XF:%.*]] = uitofp i16 [[X]] to half -; CHECK-NEXT: [[YF:%.*]] = uitofp i16 [[Y]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i16 [[X]] to half +; CHECK-NEXT: [[YF:%.*]] = uitofp nneg i16 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] ; @@ -463,7 +463,7 @@ define half @test_ui_ui_i16_add_C(i16 noundef %x_in) { define half @test_ui_ui_i16_add_C_fail_overflow(i16 noundef %x_in) { ; CHECK-LABEL: @test_ui_ui_i16_add_C_fail_overflow( ; CHECK-NEXT: [[X:%.*]] = and i16 [[X_IN:%.*]], 2047 -; CHECK-NEXT: [[XF:%.*]] = uitofp i16 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i16 [[X]] to half ; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], 0xH7BD0 ; CHECK-NEXT: ret half [[R]] ; @@ -541,7 +541,7 @@ define half @test_si_si_i16_sub_fail_no_promotion(i16 noundef %x_in, i16 noundef ; CHECK-LABEL: @test_si_si_i16_sub_fail_no_promotion( ; CHECK-NEXT: [[X:%.*]] = and i16 [[X_IN:%.*]], 2047 ; CHECK-NEXT: [[Y:%.*]] = or i16 [[Y_IN:%.*]], -2049 -; CHECK-NEXT: [[XF:%.*]] = sitofp i16 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i16 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = sitofp i16 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fsub half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -575,7 +575,7 @@ define half @test_ui_si_i16_sub_fail_maybe_signed(i16 noundef %x_in, i16 noundef ; CHECK-NEXT: [[X:%.*]] = or i16 [[X_IN:%.*]], -2048 ; CHECK-NEXT: [[Y:%.*]] = and i16 [[Y_IN:%.*]], 2047 ; CHECK-NEXT: [[XF:%.*]] = uitofp i16 [[X]] to half -; CHECK-NEXT: [[YF:%.*]] = sitofp i16 [[Y]] to half +; CHECK-NEXT: [[YF:%.*]] = uitofp nneg i16 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fsub half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] ; @@ -607,8 +607,8 @@ define half @test_ui_ui_i16_mul_fail_no_promotion(i16 noundef %x_in, i16 noundef ; CHECK-LABEL: @test_ui_ui_i16_mul_fail_no_promotion( ; CHECK-NEXT: [[X:%.*]] = and i16 [[X_IN:%.*]], 4095 ; CHECK-NEXT: [[Y:%.*]] = and i16 [[Y_IN:%.*]], 3 -; CHECK-NEXT: [[XF:%.*]] = uitofp i16 [[X]] to half -; CHECK-NEXT: [[YF:%.*]] = uitofp i16 [[Y]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i16 [[X]] to half +; CHECK-NEXT: [[YF:%.*]] = uitofp nneg i16 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] ; @@ -643,7 +643,7 @@ define half @test_si_si_i16_mul_fail_overflow(i16 noundef %x_in, i16 noundef %y_ ; CHECK-NEXT: [[XX:%.*]] = and i16 [[X_IN:%.*]], 126 ; CHECK-NEXT: [[X:%.*]] = or disjoint i16 [[XX]], 1 ; CHECK-NEXT: [[Y:%.*]] = or i16 [[Y_IN:%.*]], -257 -; CHECK-NEXT: [[XF:%.*]] = sitofp i16 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i16 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = sitofp i16 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -690,7 +690,7 @@ define half @test_ui_si_i16_mul(i16 noundef %x_in, i16 noundef %y_in) { ; CHECK-NEXT: [[YY:%.*]] = and i16 [[Y_IN:%.*]], 126 ; CHECK-NEXT: [[Y:%.*]] = or disjoint i16 [[YY]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i16 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = uitofp i16 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i16 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %xx = and i16 %x_in, 126 @@ -723,7 +723,7 @@ define half @test_ui_ui_i12_add_fail_overflow(i12 noundef %x_in, i12 noundef %y_ ; CHECK-LABEL: @test_ui_ui_i12_add_fail_overflow( ; CHECK-NEXT: [[X:%.*]] = and i12 [[X_IN:%.*]], 2047 ; CHECK-NEXT: [[Y:%.*]] = and i12 [[Y_IN:%.*]], -2047 -; CHECK-NEXT: [[XF:%.*]] = uitofp i12 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i12 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = uitofp i12 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -821,7 +821,7 @@ define half @test_si_si_i12_sub(i12 noundef %x_in, i12 noundef %y_in) { ; CHECK-NEXT: [[X:%.*]] = and i12 [[X_IN:%.*]], 1023 ; CHECK-NEXT: [[Y:%.*]] = or i12 [[Y_IN:%.*]], -1024 ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i12 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = sitofp i12 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i12 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i12 %x_in, 1023 @@ -850,7 +850,7 @@ define half @test_ui_ui_i12_mul(i12 noundef %x_in, i12 noundef %y_in) { ; CHECK-NEXT: [[X:%.*]] = and i12 [[X_IN:%.*]], 31 ; CHECK-NEXT: [[Y:%.*]] = and i12 [[Y_IN:%.*]], 63 ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i12 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = uitofp i12 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i12 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i12 %x_in, 31 @@ -883,7 +883,7 @@ define half @test_ui_ui_i12_mul_C(i12 noundef %x_in) { ; CHECK-LABEL: @test_ui_ui_i12_mul_C( ; CHECK-NEXT: [[X:%.*]] = shl i12 [[X_IN:%.*]], 6 ; CHECK-NEXT: [[TMP1:%.*]] = and i12 [[X]], 1984 -; CHECK-NEXT: [[R:%.*]] = uitofp i12 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i12 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i12 %x_in, 31 @@ -915,7 +915,7 @@ define half @test_si_si_i12_mul_fail_overflow(i12 noundef %x_in, i12 noundef %y_ ; CHECK-NEXT: [[XX:%.*]] = and i12 [[X_IN:%.*]], 30 ; CHECK-NEXT: [[X:%.*]] = or disjoint i12 [[XX]], 1 ; CHECK-NEXT: [[Y:%.*]] = or i12 [[Y_IN:%.*]], -128 -; CHECK-NEXT: [[XF:%.*]] = sitofp i12 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i12 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = sitofp i12 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -933,7 +933,7 @@ define half @test_si_si_i12_mul_fail_maybe_non_zero(i12 noundef %x_in, i12 nound ; CHECK-LABEL: @test_si_si_i12_mul_fail_maybe_non_zero( ; CHECK-NEXT: [[X:%.*]] = and i12 [[X_IN:%.*]], 30 ; CHECK-NEXT: [[Y:%.*]] = or i12 [[Y_IN:%.*]], -128 -; CHECK-NEXT: [[XF:%.*]] = sitofp i12 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i12 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = sitofp i12 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -950,7 +950,7 @@ define half @test_si_si_i12_mul_C(i12 noundef %x_in) { ; CHECK-LABEL: @test_si_si_i12_mul_C( ; CHECK-NEXT: [[X:%.*]] = or i12 [[X_IN:%.*]], -64 ; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i12 [[X]], -16 -; CHECK-NEXT: [[R:%.*]] = sitofp i12 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i12 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = or i12 %x_in, -64 @@ -979,7 +979,7 @@ define half @test_ui_si_i12_mul_nsw(i12 noundef %x_in, i12 noundef %y_in) { ; CHECK-NEXT: [[YY:%.*]] = and i12 [[Y_IN:%.*]], 30 ; CHECK-NEXT: [[Y:%.*]] = or disjoint i12 [[YY]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i12 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = uitofp i12 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i12 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %xx = and i12 %x_in, 31 diff --git a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll index 9da9eb36d381f0..1dd0b17e9f46dd 100644 --- a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll +++ b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll @@ -472,7 +472,7 @@ define float @ui32_clamp_and_cast_to_float(i32 %x) { ; CHECK-LABEL: @ui32_clamp_and_cast_to_float( ; CHECK-NEXT: [[LO_CMP:%.*]] = icmp eq i32 [[X:%.*]], 0 ; CHECK-NEXT: [[MIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[X]], i32 255) -; CHECK-NEXT: [[MIN:%.*]] = uitofp i32 [[MIN1]] to float +; CHECK-NEXT: [[MIN:%.*]] = uitofp nneg i32 [[MIN1]] to float ; CHECK-NEXT: [[R:%.*]] = select i1 [[LO_CMP]], float 1.000000e+00, float [[MIN]] ; CHECK-NEXT: ret float [[R]] ; @@ -488,7 +488,7 @@ define float @ui64_clamp_and_cast_to_float(i64 %x) { ; CHECK-LABEL: @ui64_clamp_and_cast_to_float( ; CHECK-NEXT: [[LO_CMP:%.*]] = icmp eq i64 [[X:%.*]], 0 ; CHECK-NEXT: [[MIN1:%.*]] = call i64 @llvm.umin.i64(i64 [[X]], i64 255) -; CHECK-NEXT: [[MIN:%.*]] = uitofp i64 [[MIN1]] to float +; CHECK-NEXT: [[MIN:%.*]] = uitofp nneg i64 [[MIN1]] to float ; CHECK-NEXT: [[R:%.*]] = select i1 [[LO_CMP]], float 1.000000e+00, float [[MIN]] ; CHECK-NEXT: ret float [[R]] ; @@ -504,7 +504,7 @@ define float @mixed_clamp_to_float_1(i32 %x) { ; CHECK-LABEL: @mixed_clamp_to_float_1( ; CHECK-NEXT: [[SI_MIN:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255) ; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.smax.i32(i32 [[SI_MIN]], i32 1) -; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[R1]] to float +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[R1]] to float ; CHECK-NEXT: ret float [[R]] ; %si_min_cmp = icmp sgt i32 %x, 255 @@ -539,7 +539,7 @@ define float @mixed_clamp_to_float_2(i32 %x) { ; CHECK-LABEL: @mixed_clamp_to_float_2( ; CHECK-NEXT: [[SI_MIN:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255) ; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.smax.i32(i32 [[SI_MIN]], i32 1) -; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[R1]] to float +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[R1]] to float ; CHECK-NEXT: ret float [[R]] ; %si_min_cmp = icmp sgt i32 %x, 255 @@ -572,7 +572,7 @@ define <2 x float> @mixed_clamp_to_float_vec(<2 x i32> %x) { ; CHECK-LABEL: @mixed_clamp_to_float_vec( ; CHECK-NEXT: [[SI_MIN:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) ; CHECK-NEXT: [[R1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[SI_MIN]], <2 x i32> ) -; CHECK-NEXT: [[R:%.*]] = sitofp <2 x i32> [[R1]] to <2 x float> +; CHECK-NEXT: [[R:%.*]] = uitofp nneg <2 x i32> [[R1]] to <2 x float> ; CHECK-NEXT: ret <2 x float> [[R]] ; %si_min_cmp = icmp sgt <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll index 129d7811cfb867..916955e34efacb 100644 --- a/llvm/test/Transforms/InstCombine/fast-math.ll +++ b/llvm/test/Transforms/InstCombine/fast-math.ll @@ -562,7 +562,7 @@ define float @fdiv1(float %x) { ; CHECK-NEXT: [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FD7303B60000000 ; CHECK-NEXT: ret float [[DIV1]] ; - %div = fdiv float %x, 0x3FF3333340000000 + %div = fdiv fast float %x, 0x3FF3333340000000 %div1 = fdiv fast float %div, 0x4002666660000000 ret float %div1 ; 0x3FF3333340000000 = 1.2f @@ -603,7 +603,7 @@ define float @fdiv3(float %x) { ; CHECK-NEXT: [[DIV1:%.*]] = fdiv fast float [[TMP1]], 0x47EFFFFFE0000000 ; CHECK-NEXT: ret float [[DIV1]] ; - %div = fdiv float %x, 0x47EFFFFFE0000000 + %div = fdiv fast float %x, 0x47EFFFFFE0000000 %div1 = fdiv fast float %div, 0x4002666660000000 ret float %div1 } diff --git a/llvm/test/Transforms/InstCombine/fmul-pow.ll b/llvm/test/Transforms/InstCombine/fmul-pow.ll index 63458e136074c9..84592d220d62c4 100644 --- a/llvm/test/Transforms/InstCombine/fmul-pow.ll +++ b/llvm/test/Transforms/InstCombine/fmul-pow.ll @@ -85,8 +85,8 @@ define double @pow_ab_recip_a_reassoc(double %a, double %b) { ; CHECK-NEXT: [[M:%.*]] = call reassoc double @llvm.pow.f64(double [[A:%.*]], double [[TMP1]]) ; CHECK-NEXT: ret double [[M]] ; - %r = fdiv double 1.0, %a - %p = call double @llvm.pow.f64(double %a, double %b) + %r = fdiv reassoc double 1.0, %a + %p = call reassoc double @llvm.pow.f64(double %a, double %b) %m = fmul reassoc double %r, %p ret double %m } @@ -99,8 +99,8 @@ define double @pow_ab_recip_a_reassoc_commute(double %a, double %b) { ; CHECK-NEXT: [[M:%.*]] = call reassoc double @llvm.pow.f64(double [[A:%.*]], double [[TMP1]]) ; CHECK-NEXT: ret double [[M]] ; - %r = fdiv double 1.0, %a - %p = call double @llvm.pow.f64(double %a, double %b) + %r = fdiv reassoc double 1.0, %a + %p = call reassoc double @llvm.pow.f64(double %a, double %b) %m = fmul reassoc double %p, %r ret double %m } @@ -109,14 +109,14 @@ define double @pow_ab_recip_a_reassoc_commute(double %a, double %b) { define double @pow_ab_recip_a_reassoc_use1(double %a, double %b) { ; CHECK-LABEL: @pow_ab_recip_a_reassoc_use1( -; CHECK-NEXT: [[R:%.*]] = fdiv double 1.000000e+00, [[A:%.*]] -; CHECK-NEXT: [[P:%.*]] = call double @llvm.pow.f64(double [[A]], double [[B:%.*]]) +; CHECK-NEXT: [[R:%.*]] = fdiv reassoc double 1.000000e+00, [[A:%.*]] +; CHECK-NEXT: [[P:%.*]] = call reassoc double @llvm.pow.f64(double [[A]], double [[B:%.*]]) ; CHECK-NEXT: [[M:%.*]] = fmul reassoc double [[R]], [[P]] ; CHECK-NEXT: call void @use(double [[R]]) ; CHECK-NEXT: ret double [[M]] ; - %r = fdiv double 1.0, %a - %p = call double @llvm.pow.f64(double %a, double %b) + %r = fdiv reassoc double 1.0, %a + %p = call reassoc double @llvm.pow.f64(double %a, double %b) %m = fmul reassoc double %r, %p call void @use(double %r) ret double %m @@ -126,13 +126,13 @@ define double @pow_ab_recip_a_reassoc_use1(double %a, double %b) { define double @pow_ab_recip_a_reassoc_use2(double %a, double %b) { ; CHECK-LABEL: @pow_ab_recip_a_reassoc_use2( -; CHECK-NEXT: [[P:%.*]] = call double @llvm.pow.f64(double [[A:%.*]], double [[B:%.*]]) +; CHECK-NEXT: [[P:%.*]] = call reassoc double @llvm.pow.f64(double [[A:%.*]], double [[B:%.*]]) ; CHECK-NEXT: [[M:%.*]] = fdiv reassoc double [[P]], [[A]] ; CHECK-NEXT: call void @use(double [[P]]) ; CHECK-NEXT: ret double [[M]] ; - %r = fdiv double 1.0, %a - %p = call double @llvm.pow.f64(double %a, double %b) + %r = fdiv reassoc double 1.0, %a + %p = call reassoc double @llvm.pow.f64(double %a, double %b) %m = fmul reassoc double %r, %p call void @use(double %p) ret double %m @@ -142,15 +142,15 @@ define double @pow_ab_recip_a_reassoc_use2(double %a, double %b) { define double @pow_ab_recip_a_reassoc_use3(double %a, double %b) { ; CHECK-LABEL: @pow_ab_recip_a_reassoc_use3( -; CHECK-NEXT: [[R:%.*]] = fdiv double 1.000000e+00, [[A:%.*]] -; CHECK-NEXT: [[P:%.*]] = call double @llvm.pow.f64(double [[A]], double [[B:%.*]]) +; CHECK-NEXT: [[R:%.*]] = fdiv reassoc double 1.000000e+00, [[A:%.*]] +; CHECK-NEXT: [[P:%.*]] = call reassoc double @llvm.pow.f64(double [[A]], double [[B:%.*]]) ; CHECK-NEXT: [[M:%.*]] = fmul reassoc double [[R]], [[P]] ; CHECK-NEXT: call void @use(double [[R]]) ; CHECK-NEXT: call void @use(double [[P]]) ; CHECK-NEXT: ret double [[M]] ; - %r = fdiv double 1.0, %a - %p = call double @llvm.pow.f64(double %a, double %b) + %r = fdiv reassoc double 1.0, %a + %p = call reassoc double @llvm.pow.f64(double %a, double %b) %m = fmul reassoc double %r, %p call void @use(double %r) call void @use(double %p) diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll index f6435f0032891e..39f9e74f899d18 100644 --- a/llvm/test/Transforms/InstCombine/fmul.ll +++ b/llvm/test/Transforms/InstCombine/fmul.ll @@ -633,15 +633,15 @@ define float @log2half(float %x, float %y) { define float @log2half_commute(float %x1, float %y) { ; CHECK-LABEL: @log2half_commute( +; CHECK-NEXT: [[X1:%.*]] = fmul fast float [[X2:%.*]], 0x3FC24924A0000000 ; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.log2.f32(float [[Y:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[X1:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[X1]] ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast float [[TMP2]], [[X1]] -; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[TMP3]], 0x3FC24924A0000000 -; CHECK-NEXT: ret float [[MUL]] +; CHECK-NEXT: ret float [[TMP3]] ; - %x = fdiv float %x1, 7.0 ; thwart complexity-based canonicalization - %halfy = fmul float %y, 0.5 - %log2 = call float @llvm.log2.f32(float %halfy) + %x = fdiv fast float %x1, 7.0 ; thwart complexity-based canonicalization + %halfy = fmul fast float %y, 0.5 + %log2 = call fast float @llvm.log2.f32(float %halfy) %mul = fmul fast float %x, %log2 ret float %mul } @@ -652,12 +652,50 @@ define float @fdiv_constant_numerator_fmul(float %x) { ; CHECK-LABEL: @fdiv_constant_numerator_fmul( ; CHECK-NEXT: [[T3:%.*]] = fdiv reassoc float 1.200000e+07, [[X:%.*]] ; CHECK-NEXT: ret float [[T3]] +; + %t1 = fdiv reassoc float 2.0e+3, %x + %t3 = fmul reassoc float %t1, 6.0e+3 + ret float %t3 +} + +; C1/X * C2 => (C1*C2) / X with mixed fast-math flags + +define float @fdiv_constant_numerator_fmul_mixed(float %x) { +; CHECK-LABEL: @fdiv_constant_numerator_fmul_mixed( +; CHECK-NEXT: [[T3:%.*]] = fdiv reassoc float 1.200000e+07, [[X:%.*]] +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fdiv reassoc float 2.0e+3, %x + %t3 = fmul fast float %t1, 6.0e+3 + ret float %t3 +} + +; C1/X * C2 => (C1*C2) / X with full fast-math flags + +define float @fdiv_constant_numerator_fmul_fast(float %x) { +; CHECK-LABEL: @fdiv_constant_numerator_fmul_fast( +; CHECK-NEXT: [[T3:%.*]] = fdiv fast float 1.200000e+07, [[X:%.*]] +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fdiv fast float 2.0e+3, %x + %t3 = fmul fast float %t1, 6.0e+3 + ret float %t3 +} + +; C1/X * C2 => (C1*C2) / X with no fast-math flags on the fdiv + +define float @fdiv_constant_numerator_fmul_precdiv(float %x) { +; CHECK-LABEL: @fdiv_constant_numerator_fmul_precdiv( +; CHECK-NEXT: [[T1:%.*]] = fdiv float 2.000000e+03, [[X:%.*]] +; CHECK-NEXT: [[T4:%.*]] = fmul reassoc float [[T1]], 6.000000e+03 +; CHECK-NEXT: ret float [[T4]] ; %t1 = fdiv float 2.0e+3, %x %t3 = fmul reassoc float %t1, 6.0e+3 ret float %t3 } + ; C1/X * C2 => (C1*C2) / X is disabled if C1/X has multiple uses @fmul2_external = external global float @@ -682,7 +720,7 @@ define float @fdiv_constant_denominator_fmul(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00 ; CHECK-NEXT: ret float [[T3]] ; - %t1 = fdiv float %x, 2.0e+3 + %t1 = fdiv reassoc float %x, 2.0e+3 %t3 = fmul reassoc float %t1, 6.0e+3 ret float %t3 } @@ -692,7 +730,7 @@ define <4 x float> @fdiv_constant_denominator_fmul_vec(<4 x float> %x) { ; CHECK-NEXT: [[T3:%.*]] = fmul reassoc <4 x float> [[X:%.*]], ; CHECK-NEXT: ret <4 x float> [[T3]] ; - %t1 = fdiv <4 x float> %x, + %t1 = fdiv reassoc <4 x float> %x, %t3 = fmul reassoc <4 x float> %t1, ret <4 x float> %t3 } @@ -705,7 +743,7 @@ define <4 x float> @fdiv_constant_denominator_fmul_vec_constexpr(<4 x float> %x) ; CHECK-NEXT: ret <4 x float> [[T3]] ; %constExprMul = bitcast i128 trunc (i160 bitcast (<5 x float> to i160) to i128) to <4 x float> - %t1 = fdiv <4 x float> %x, + %t1 = fdiv reassoc <4 x float> %x, %t3 = fmul reassoc <4 x float> %t1, %constExprMul ret <4 x float> %t3 } @@ -734,7 +772,7 @@ define float @fdiv_constant_denominator_fmul_denorm(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fmul fast float [[X:%.*]], 0x3760620000000000 ; CHECK-NEXT: ret float [[T3]] ; - %t1 = fdiv float %x, 2.0e+3 + %t1 = fdiv fast float %x, 2.0e+3 %t3 = fmul fast float %t1, 0x3810000000000000 ret float %t3 } @@ -748,7 +786,7 @@ define float @fdiv_constant_denominator_fmul_denorm_try_harder(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fdiv reassoc float [[X:%.*]], 0x47E8000000000000 ; CHECK-NEXT: ret float [[T3]] ; - %t1 = fdiv float %x, 3.0 + %t1 = fdiv reassoc float %x, 3.0 %t3 = fmul reassoc float %t1, 0x3810000000000000 ret float %t3 } @@ -776,7 +814,7 @@ define float @fmul_fadd_distribute(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fadd reassoc float [[TMP1]], 6.000000e+00 ; CHECK-NEXT: ret float [[T3]] ; - %t2 = fadd float %x, 2.0 + %t2 = fadd reassoc float %x, 2.0 %t3 = fmul reassoc float %t2, 3.0 ret float %t3 } @@ -787,7 +825,7 @@ define <2 x float> @fmul_fadd_distribute_vec(<2 x float> %x) { ; CHECK-NEXT: [[T3:%.*]] = fadd reassoc <2 x float> [[TMP1]], ; CHECK-NEXT: ret <2 x float> [[T3]] ; - %t1 = fadd <2 x float> , %x + %t1 = fadd reassoc <2 x float> , %x %t3 = fmul reassoc <2 x float> %t1, ret <2 x float> %t3 } @@ -798,7 +836,7 @@ define @fmul_fadd_distribute_scalablevec( [[TMP1]], shufflevector ( insertelement ( poison, float 1.200000e+07, i64 0), poison, zeroinitializer) ; CHECK-NEXT: ret [[T3]] ; - %t1 = fadd splat (float 2.0e+3), %x + %t1 = fadd reassoc splat (float 2.0e+3), %x %t3 = fmul reassoc %t1, splat (float 6.0e+3) @@ -813,7 +851,7 @@ define float @fmul_fsub_distribute1(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fadd reassoc float [[TMP1]], -6.000000e+00 ; CHECK-NEXT: ret float [[T3]] ; - %t2 = fsub float %x, 2.0 + %t2 = fsub reassoc float %x, 2.0 %t3 = fmul reassoc float %t2, 3.0 ret float %t3 } @@ -826,7 +864,7 @@ define float @fmul_fsub_distribute2(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fsub reassoc float 6.000000e+00, [[TMP1]] ; CHECK-NEXT: ret float [[T3]] ; - %t2 = fsub float 2.0, %x + %t2 = fsub reassoc float 2.0, %x %t3 = fmul reassoc float %t2, 3.0 ret float %t3 } @@ -840,8 +878,8 @@ define float @fmul_fadd_fmul_distribute(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fadd fast float [[TMP1]], 1.000000e+01 ; CHECK-NEXT: ret float [[T3]] ; - %t1 = fmul float %x, 6.0 - %t2 = fadd float %t1, 2.0 + %t1 = fmul fast float %x, 6.0 + %t2 = fadd fast float %t1, 2.0 %t3 = fmul fast float %t2, 5.0 ret float %t3 } @@ -872,8 +910,8 @@ define double @fmul_fadd_fdiv_distribute2(double %x) { ; CHECK-NEXT: [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000 ; CHECK-NEXT: ret double [[T3]] ; - %t1 = fdiv double %x, 3.0 - %t2 = fadd double %t1, 5.0 + %t1 = fdiv reassoc double %x, 3.0 + %t2 = fadd reassoc double %t1, 5.0 %t3 = fmul reassoc double %t2, 0x10000000000000 ret double %t3 } @@ -887,8 +925,8 @@ define double @fmul_fadd_fdiv_distribute3(double %x) { ; CHECK-NEXT: [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000 ; CHECK-NEXT: ret double [[T3]] ; - %t1 = fdiv double %x, 3.0 - %t2 = fadd double %t1, 5.0 + %t1 = fdiv reassoc double %x, 3.0 + %t2 = fadd reassoc double %t1, 5.0 %t3 = fmul reassoc double %t2, 0x10000000000000 ret double %t3 } @@ -902,8 +940,8 @@ define float @fmul_fsub_fmul_distribute(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fsub fast float 1.000000e+01, [[TMP1]] ; CHECK-NEXT: ret float [[T3]] ; - %t1 = fmul float %x, 6.0 - %t2 = fsub float 2.0, %t1 + %t1 = fmul fast float %x, 6.0 + %t2 = fsub fast float 2.0, %t1 %t3 = fmul fast float %t2, 5.0 ret float %t3 } @@ -932,8 +970,8 @@ define float @fmul_fsub_fmul_distribute2(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fadd fast float [[TMP1]], -1.000000e+01 ; CHECK-NEXT: ret float [[T3]] ; - %t1 = fmul float %x, 6.0 - %t2 = fsub float %t1, 2.0 + %t1 = fmul fast float %x, 6.0 + %t2 = fsub fast float %t1, 2.0 %t3 = fmul fast float %t2, 5.0 ret float %t3 } @@ -986,8 +1024,8 @@ define double @fmul_fdivs_factor_common_denominator(double %x, double %y, double ; CHECK-NEXT: [[MUL:%.*]] = fdiv fast double [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret double [[MUL]] ; - %div1 = fdiv double %x, %z - %div2 = fdiv double %y, %z + %div1 = fdiv fast double %x, %z + %div2 = fdiv fast double %y, %z %mul = fmul fast double %div1, %div2 ret double %mul } @@ -999,8 +1037,8 @@ define double @fmul_fdivs_factor(double %x, double %y, double %z, double %w) { ; CHECK-NEXT: [[MUL:%.*]] = fdiv reassoc double [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret double [[MUL]] ; - %div1 = fdiv double %x, %y - %div2 = fdiv double %z, %w + %div1 = fdiv reassoc double %x, %y + %div2 = fdiv reassoc double %z, %w %mul = fmul reassoc double %div1, %div2 ret double %mul } @@ -1011,7 +1049,7 @@ define double @fmul_fdiv_factor(double %x, double %y, double %z) { ; CHECK-NEXT: [[MUL:%.*]] = fdiv reassoc double [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret double [[MUL]] ; - %div = fdiv double %x, %y + %div = fdiv reassoc double %x, %y %mul = fmul reassoc double %div, %z ret double %mul } @@ -1022,7 +1060,7 @@ define double @fmul_fdiv_factor_constant1(double %x, double %y) { ; CHECK-NEXT: [[MUL:%.*]] = fdiv reassoc double [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret double [[MUL]] ; - %div = fdiv double %x, %y + %div = fdiv reassoc double %x, %y %mul = fmul reassoc double %div, 42.0 ret double %mul } @@ -1033,7 +1071,7 @@ define <2 x float> @fmul_fdiv_factor_constant2(<2 x float> %x, <2 x float> %y) { ; CHECK-NEXT: [[MUL:%.*]] = fdiv reassoc <2 x float> [[TMP1]], ; CHECK-NEXT: ret <2 x float> [[MUL]] ; - %div = fdiv <2 x float> %x, + %div = fdiv reassoc <2 x float> %x, %mul = fmul reassoc <2 x float> %div, %y ret <2 x float> %mul } diff --git a/llvm/test/Transforms/InstCombine/fpcast.ll b/llvm/test/Transforms/InstCombine/fpcast.ll index ac4b88fcddd7ec..d2c932ba447e4e 100644 --- a/llvm/test/Transforms/InstCombine/fpcast.ll +++ b/llvm/test/Transforms/InstCombine/fpcast.ll @@ -170,7 +170,7 @@ define half @sint_to_fptrunc(i32 %x) { define half @masked_sint_to_fptrunc1(i32 %x) { ; CHECK-LABEL: @masked_sint_to_fptrunc1( ; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 16777215 -; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to half ; CHECK-NEXT: ret half [[R]] ; %m = and i32 %x, 16777215 @@ -182,7 +182,7 @@ define half @masked_sint_to_fptrunc1(i32 %x) { define half @masked_sint_to_fptrunc2(i32 %x) { ; CHECK-LABEL: @masked_sint_to_fptrunc2( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 8 -; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to half ; CHECK-NEXT: ret half [[R]] ; %m = lshr i32 %x, 8 @@ -194,7 +194,7 @@ define half @masked_sint_to_fptrunc2(i32 %x) { define half @masked_sint_to_fptrunc3(i32 %x) { ; CHECK-LABEL: @masked_sint_to_fptrunc3( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 7 -; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[M]] to float +; CHECK-NEXT: [[F:%.*]] = uitofp nneg i32 [[M]] to float ; CHECK-NEXT: [[R:%.*]] = fptrunc float [[F]] to half ; CHECK-NEXT: ret half [[R]] ; @@ -218,7 +218,7 @@ define double @sint_to_fpext(i32 %x) { define double @masked_sint_to_fpext1(i32 %x) { ; CHECK-LABEL: @masked_sint_to_fpext1( ; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 16777215 -; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to double +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to double ; CHECK-NEXT: ret double [[R]] ; %m = and i32 %x, 16777215 @@ -230,7 +230,7 @@ define double @masked_sint_to_fpext1(i32 %x) { define double @masked_sint_to_fpext2(i32 %x) { ; CHECK-LABEL: @masked_sint_to_fpext2( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 8 -; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to double +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to double ; CHECK-NEXT: ret double [[R]] ; %m = lshr i32 %x, 8 @@ -242,7 +242,7 @@ define double @masked_sint_to_fpext2(i32 %x) { define double @masked_sint_to_fpext3(i32 %x) { ; CHECK-LABEL: @masked_sint_to_fpext3( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 7 -; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[M]] to float +; CHECK-NEXT: [[F:%.*]] = uitofp nneg i32 [[M]] to float ; CHECK-NEXT: [[R:%.*]] = fpext float [[F]] to double ; CHECK-NEXT: ret double [[R]] ; @@ -266,7 +266,7 @@ define half @uint_to_fptrunc(i32 %x) { define half @masked_uint_to_fptrunc1(i32 %x) { ; CHECK-LABEL: @masked_uint_to_fptrunc1( ; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 16777215 -; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to half ; CHECK-NEXT: ret half [[R]] ; %m = and i32 %x, 16777215 @@ -278,7 +278,7 @@ define half @masked_uint_to_fptrunc1(i32 %x) { define half @masked_uint_to_fptrunc2(i32 %x) { ; CHECK-LABEL: @masked_uint_to_fptrunc2( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 8 -; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to half ; CHECK-NEXT: ret half [[R]] ; %m = lshr i32 %x, 8 @@ -290,7 +290,7 @@ define half @masked_uint_to_fptrunc2(i32 %x) { define half @masked_uint_to_fptrunc3(i32 %x) { ; CHECK-LABEL: @masked_uint_to_fptrunc3( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 7 -; CHECK-NEXT: [[F:%.*]] = uitofp i32 [[M]] to float +; CHECK-NEXT: [[F:%.*]] = uitofp nneg i32 [[M]] to float ; CHECK-NEXT: [[R:%.*]] = fptrunc float [[F]] to half ; CHECK-NEXT: ret half [[R]] ; @@ -314,7 +314,7 @@ define double @uint_to_fpext(i32 %x) { define double @masked_uint_to_fpext1(i32 %x) { ; CHECK-LABEL: @masked_uint_to_fpext1( ; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 16777215 -; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to double +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to double ; CHECK-NEXT: ret double [[R]] ; %m = and i32 %x, 16777215 @@ -326,7 +326,7 @@ define double @masked_uint_to_fpext1(i32 %x) { define double @masked_uint_to_fpext2(i32 %x) { ; CHECK-LABEL: @masked_uint_to_fpext2( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 8 -; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to double +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to double ; CHECK-NEXT: ret double [[R]] ; %m = lshr i32 %x, 8 @@ -338,7 +338,7 @@ define double @masked_uint_to_fpext2(i32 %x) { define double @masked_uint_to_fpext3(i32 %x) { ; CHECK-LABEL: @masked_uint_to_fpext3( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 7 -; CHECK-NEXT: [[F:%.*]] = uitofp i32 [[M]] to float +; CHECK-NEXT: [[F:%.*]] = uitofp nneg i32 [[M]] to float ; CHECK-NEXT: [[R:%.*]] = fpext float [[F]] to double ; CHECK-NEXT: ret double [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll index 85a21332b07889..8b4249b2c25a92 100644 --- a/llvm/test/Transforms/InstCombine/known-bits.ll +++ b/llvm/test/Transforms/InstCombine/known-bits.ll @@ -1223,7 +1223,7 @@ define i8 @known_reduce_and(<2 x i8> %xx) { ; CHECK-NEXT: ret i8 1 ; %x = or <2 x i8> %xx, - %v = call i8 @llvm.vector.reduce.or(<2 x i8> %x) + %v = call i8 @llvm.vector.reduce.and(<2 x i8> %x) %r = and i8 %v, 1 ret i8 %r } @@ -1231,12 +1231,12 @@ define i8 @known_reduce_and(<2 x i8> %xx) { define i8 @known_reduce_and_fail(<2 x i8> %xx) { ; CHECK-LABEL: @known_reduce_and_fail( ; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], -; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> [[X]]) +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> [[X]]) ; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 2 ; CHECK-NEXT: ret i8 [[R]] ; %x = or <2 x i8> %xx, - %v = call i8 @llvm.vector.reduce.or(<2 x i8> %x) + %v = call i8 @llvm.vector.reduce.and(<2 x i8> %x) %r = and i8 %v, 2 ret i8 %r } diff --git a/llvm/test/Transforms/InstCombine/known-fpclass-reduce-signbit.ll b/llvm/test/Transforms/InstCombine/known-fpclass-reduce-signbit.ll new file mode 100644 index 00000000000000..f46ea9db751ff4 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/known-fpclass-reduce-signbit.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -S -passes=instcombine | FileCheck %s + +define i1 @vector_reduce_maximum_signbit(<4 x double> nofpclass(nan nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_maximum_signbit +; CHECK-SAME: (<4 x double> nofpclass(nan nzero) [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + +define i1 @vector_reduce_maximum_signbit_fail_maybe_nan(<4 x double> nofpclass(nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_maximum_signbit_fail_maybe_nan +; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) { +; CHECK-NEXT: [[X_ABS:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[X]]) +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> [[X_ABS]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[OP]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + + +define i1 @vector_reduce_minimum_signbit(<4 x double> nofpclass(nan nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_minimum_signbit +; CHECK-SAME: (<4 x double> nofpclass(nan nzero) [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + +define i1 @vector_reduce_minimum_signbit_fail_maybe_nan(<4 x double> nofpclass(nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_minimum_signbit_fail_maybe_nan +; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) { +; CHECK-NEXT: [[X_ABS:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[X]]) +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[X_ABS]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[OP]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + +define i1 @vector_reduce_max_signbit(<4 x double> nofpclass(nan nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_max_signbit +; CHECK-SAME: (<4 x double> nofpclass(nan nzero) [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + +define i1 @vector_reduce_max_signbit_fail_maybe_nan(<4 x double> nofpclass(nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_max_signbit_fail_maybe_nan +; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) { +; CHECK-NEXT: [[X_ABS:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[X]]) +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[X_ABS]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[OP]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + + +define i1 @vector_reduce_min_signbit(<4 x double> nofpclass(nan nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_min_signbit +; CHECK-SAME: (<4 x double> nofpclass(nan nzero) [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + +define i1 @vector_reduce_min_signbit_fail_maybe_nan(<4 x double> nofpclass(nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_min_signbit_fail_maybe_nan +; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) { +; CHECK-NEXT: [[X_ABS:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[X]]) +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[X_ABS]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[OP]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + + + +define i1 @vector_reduce_min_signbit_nnan_from_fmf(<4 x double> nofpclass(nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_min_signbit_nnan_from_fmf +; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + + diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll index 8391fe33eb9b59..bbbbf9eb6eafe4 100644 --- a/llvm/test/Transforms/InstCombine/minmax-fold.ll +++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll @@ -131,7 +131,7 @@ define i64 @t9(i32 %a) { define float @t10(i32 %x) { ; CHECK-LABEL: @t10( ; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 255) -; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[R1]] to float +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[R1]] to float ; CHECK-NEXT: ret float [[R]] ; %f_x = sitofp i32 %x to float @@ -143,7 +143,7 @@ define float @t10(i32 %x) { define float @t11(i64 %x) { ; CHECK-LABEL: @t11( ; CHECK-NEXT: [[R1:%.*]] = call i64 @llvm.smax.i64(i64 [[X:%.*]], i64 255) -; CHECK-NEXT: [[R:%.*]] = sitofp i64 [[R1]] to float +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i64 [[R1]] to float ; CHECK-NEXT: ret float [[R]] ; %f_x = sitofp i64 %x to float @@ -526,7 +526,7 @@ falselabel: define double @PR31751_umin1(i32 %x) { ; CHECK-LABEL: @PR31751_umin1( ; CHECK-NEXT: [[SEL:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 2147483647) -; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: [[CONV:%.*]] = uitofp nneg i32 [[SEL]] to double ; CHECK-NEXT: ret double [[CONV]] ; %cmp = icmp slt i32 %x, 0 @@ -538,7 +538,7 @@ define double @PR31751_umin1(i32 %x) { define double @PR31751_umin2(i32 %x) { ; CHECK-LABEL: @PR31751_umin2( ; CHECK-NEXT: [[SEL:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 2147483647) -; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: [[CONV:%.*]] = uitofp nneg i32 [[SEL]] to double ; CHECK-NEXT: ret double [[CONV]] ; %cmp = icmp ult i32 %x, 2147483647 @@ -550,7 +550,7 @@ define double @PR31751_umin2(i32 %x) { define double @PR31751_umin3(i32 %x) { ; CHECK-LABEL: @PR31751_umin3( ; CHECK-NEXT: [[SEL:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 2147483647) -; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: [[CONV:%.*]] = uitofp nneg i32 [[SEL]] to double ; CHECK-NEXT: ret double [[CONV]] ; %cmp = icmp ugt i32 %x, 2147483647 diff --git a/llvm/test/Transforms/InstCombine/minmax-fp.ll b/llvm/test/Transforms/InstCombine/minmax-fp.ll index f89e8a18e63440..b9e46caa63753a 100644 --- a/llvm/test/Transforms/InstCombine/minmax-fp.ll +++ b/llvm/test/Transforms/InstCombine/minmax-fp.ll @@ -257,7 +257,7 @@ define double @t16(i32 %x) { define double @t17(i32 %x) { ; CHECK-LABEL: @t17( ; CHECK-NEXT: [[SEL1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 2) -; CHECK-NEXT: [[SEL:%.*]] = sitofp i32 [[SEL1]] to double +; CHECK-NEXT: [[SEL:%.*]] = uitofp nneg i32 [[SEL1]] to double ; CHECK-NEXT: ret double [[SEL]] ; %cmp = icmp sgt i32 %x, 2 diff --git a/llvm/test/Transforms/InstCombine/pr27236.ll b/llvm/test/Transforms/InstCombine/pr27236.ll index 61ea344b1bdbd4..67c320d3524664 100644 --- a/llvm/test/Transforms/InstCombine/pr27236.ll +++ b/llvm/test/Transforms/InstCombine/pr27236.ll @@ -4,7 +4,7 @@ define float @test1(i32 %scale) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[SCALE:%.*]], i32 1) -; CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[TMP2:%.*]] = uitofp nneg i32 [[TMP1]] to float ; CHECK-NEXT: ret float [[TMP2]] ; %1 = icmp sgt i32 1, %scale diff --git a/llvm/test/Transforms/InstCombine/sitofp.ll b/llvm/test/Transforms/InstCombine/sitofp.ll index cc6b6425eb03c8..51eff39cd900e2 100644 --- a/llvm/test/Transforms/InstCombine/sitofp.ll +++ b/llvm/test/Transforms/InstCombine/sitofp.ll @@ -256,7 +256,7 @@ define i25 @consider_lowbits_masked_input(i25 %A) { define i32 @overflow_masked_input(i32 %A) { ; CHECK-LABEL: @overflow_masked_input( ; CHECK-NEXT: [[M:%.*]] = and i32 [[A:%.*]], 16777217 -; CHECK-NEXT: [[B:%.*]] = uitofp i32 [[M]] to float +; CHECK-NEXT: [[B:%.*]] = uitofp nneg i32 [[M]] to float ; CHECK-NEXT: [[C:%.*]] = fptoui float [[B]] to i32 ; CHECK-NEXT: ret i32 [[C]] ; diff --git a/llvm/test/Transforms/InstSimplify/known-never-infinity.ll b/llvm/test/Transforms/InstSimplify/known-never-infinity.ll index 74039d3ffd56ca..4d662c08b1a7a1 100644 --- a/llvm/test/Transforms/InstSimplify/known-never-infinity.ll +++ b/llvm/test/Transforms/InstSimplify/known-never-infinity.ll @@ -1109,6 +1109,106 @@ define float @fcmp_ult_neginf_implies_class_assert(float %arg) { ret float %mul_by_zero } +define i1 @isKnownNeverInfinity_vector_reduce_maximum(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_maximum +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %ninf.x = fadd ninf <4 x double> %x, + %op = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + +define i1 @isKnownNeverInfinity_vector_reduce_maximum_fail(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_maximum_fail +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> [[NINF_X]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 +; CHECK-NEXT: ret i1 [[CMP]] +; + %ninf.x = fadd <4 x double> %x, + %op = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + +define i1 @isKnownNeverInfinity_vector_reduce_minimum(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_minimum +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %ninf.x = fadd ninf <4 x double> %x, + %op = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + +define i1 @isKnownNeverInfinity_vector_reduce_minimum_fail(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_minimum_fail +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[NINF_X]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 +; CHECK-NEXT: ret i1 [[CMP]] +; + %ninf.x = fadd <4 x double> %x, + %op = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + +define i1 @isKnownNeverInfinity_vector_reduce_fmax(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmax +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %ninf.x = fadd ninf <4 x double> %x, + %op = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + +define i1 @isKnownNeverInfinity_vector_reduce_fmax_fail(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmax_fail +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[NINF_X]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 +; CHECK-NEXT: ret i1 [[CMP]] +; + %ninf.x = fadd <4 x double> %x, + %op = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + +define i1 @isKnownNeverInfinity_vector_reduce_fmin(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmin +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %ninf.x = fadd ninf <4 x double> %x, + %op = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + +define i1 @isKnownNeverInfinity_vector_reduce_fmin_fail(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmin_fail +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[NINF_X]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 +; CHECK-NEXT: ret i1 [[CMP]] +; + %ninf.x = fadd <4 x double> %x, + %op = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + declare double @llvm.arithmetic.fence.f64(double) declare double @llvm.canonicalize.f64(double) declare double @llvm.ceil.f64(double) diff --git a/llvm/test/Transforms/JumpThreading/pr79175.ll b/llvm/test/Transforms/JumpThreading/pr79175.ll index cce30ce079999c..2c7ee0770cdc73 100644 --- a/llvm/test/Transforms/JumpThreading/pr79175.ll +++ b/llvm/test/Transforms/JumpThreading/pr79175.ll @@ -17,11 +17,11 @@ define i32 @test(i64 %idx, i32 %val) { ; CHECK: cond.end: ; CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt i32 [[VAL]], 0 ; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[CMP_I]] -; CHECK-NEXT: br i1 [[COND_FR]], label [[TMP0:%.*]], label [[COND_END_THREAD]] -; CHECK: 0: -; CHECK-NEXT: br label [[COND_END_THREAD]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[COND_END_THREAD]], label [[TMP0:%.*]] ; CHECK: cond.end.thread: -; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[VAL]], [[COND_END]] ], [ 0, [[TMP0]] ], [ 0, [[FOR_BODY]] ] +; CHECK-NEXT: br label [[TMP0]] +; CHECK: 0: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 0, [[COND_END_THREAD]] ], [ [[VAL]], [[COND_END]] ] ; CHECK-NEXT: [[F_IDX:%.*]] = getelementptr inbounds i32, ptr @f, i64 [[IDX]] ; CHECK-NEXT: store i32 [[TMP1]], ptr [[F_IDX]], align 4 ; CHECK-NEXT: [[F_RELOAD:%.*]] = load i32, ptr @f, align 4 diff --git a/llvm/test/Transforms/JumpThreading/select.ll b/llvm/test/Transforms/JumpThreading/select.ll index 27ebf4c25da509..4ec55a66bb8ac1 100644 --- a/llvm/test/Transforms/JumpThreading/select.ll +++ b/llvm/test/Transforms/JumpThreading/select.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -passes="jump-threading" -debug-only=branch-prob < %s 2>&1 | FileCheck %s -; RUN: opt -S -passes="require,jump-threading" -debug-only=branch-prob -disable-output < %s 2>&1 | FileCheck -check-prefix=CHECK-BPI %s +; RUN: opt -S -passes="require,jump-threading" -debug-only=branch-prob < %s 2>&1 | FileCheck -check-prefixes=CHECK,CHECK-BPI %s ; REQUIRES: asserts ; CHECK-BPI-LABEL: ---- Branch Probability Info : unfold1 ---- @@ -21,7 +21,7 @@ declare void @quux() ; booleans where at least one operand is true/false/undef. ;. -; CHECK: @anchor = constant [3 x ptr] [ptr blockaddress(@test_indirectbr, %L1), ptr inttoptr (i32 1 to ptr), ptr blockaddress(@test_indirectbr, %L3)] +; CHECK: @[[ANCHOR:[a-zA-Z0-9_$"\\.-]+]] = constant [3 x ptr] [ptr blockaddress(@test_indirectbr, [[L1:%.*]]), ptr inttoptr (i32 1 to ptr), ptr blockaddress(@test_indirectbr, [[L3:%.*]])] ;. define void @test_br(i1 %cond, i1 %value) nounwind { ; CHECK-LABEL: @test_br( @@ -66,8 +66,8 @@ define void @test_switch(i1 %cond, i8 %value) nounwind { ; CHECK-NEXT: call void @quux() ; CHECK-NEXT: [[EXPR:%.*]] = select i1 [[COND]], i8 1, i8 [[VALUE:%.*]] ; CHECK-NEXT: switch i8 [[EXPR]], label [[L3:%.*]] [ -; CHECK-NEXT: i8 1, label [[L1]] -; CHECK-NEXT: i8 2, label [[L2:%.*]] +; CHECK-NEXT: i8 1, label [[L1]] +; CHECK-NEXT: i8 2, label [[L2:%.*]] ; CHECK-NEXT: ] ; CHECK: L1: ; CHECK-NEXT: call void @foo() @@ -192,8 +192,8 @@ define void @test_switch_cmp(i1 %cond, i32 %val, i8 %value) nounwind { ; CHECK: 0: ; CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[VALUE:%.*]], [[L0]] ] ; CHECK-NEXT: switch i8 [[TMP1]], label [[L3:%.*]] [ -; CHECK-NEXT: i8 1, label [[L1]] -; CHECK-NEXT: i8 2, label [[L2:%.*]] +; CHECK-NEXT: i8 1, label [[L1]] +; CHECK-NEXT: i8 2, label [[L2:%.*]] ; CHECK-NEXT: ] ; CHECK: L1: ; CHECK-NEXT: call void @foo() @@ -237,8 +237,8 @@ define void @test_switch_default(ptr nocapture %status) nounwind { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[STATUS:%.*]], align 4 ; CHECK-NEXT: switch i32 [[TMP0]], label [[L2:%.*]] [ -; CHECK-NEXT: i32 5061, label [[L2_THREAD:%.*]] -; CHECK-NEXT: i32 0, label [[L2]] +; CHECK-NEXT: i32 5061, label [[L2_THREAD:%.*]] +; CHECK-NEXT: i32 0, label [[L2]] ; CHECK-NEXT: ] ; CHECK: L2.thread: ; CHECK-NEXT: store i32 10025, ptr [[STATUS]], align 4 @@ -377,21 +377,21 @@ define i32 @unfold3(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) noun ; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD4:%.*]], label [[COND_FALSE_I:%.*]] ; CHECK: cond.false.i: ; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]] -; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD4]], label [[COND_FALSE_6_I:%.*]] +; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_6_I:%.*]] ; CHECK: cond.false.6.i: ; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]] ; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD4]], label [[COND_FALSE_10_I:%.*]] ; CHECK: cond.false.10.i: ; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]] -; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD4]], label [[DOTEXIT:%.*]] +; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD]], label [[DOTEXIT:%.*]] ; CHECK: .exit: ; CHECK-NEXT: [[PHITMP:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]] ; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[PHITMP]] -; CHECK-NEXT: br i1 [[COND_FR]], label [[DOTEXIT_THREAD:%.*]], label [[DOTEXIT_THREAD4]] -; CHECK: 0: -; CHECK-NEXT: br label [[DOTEXIT_THREAD4]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD4]] ; CHECK: .exit.thread: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[ADD3]], [[DOTEXIT]] ], [ [[J]], [[DOTEXIT_THREAD]] ], [ [[J]], [[COND_FALSE_I]] ], [ [[J]], [[COND_FALSE_10_I]] ], [ [[ADD3]], [[ENTRY:%.*]] ], [ [[ADD3]], [[COND_FALSE_6_I]] ] +; CHECK-NEXT: br label [[DOTEXIT_THREAD4]] +; CHECK: .exit.thread4: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[ENTRY:%.*]] ], [ [[ADD3]], [[COND_FALSE_6_I]] ] ; CHECK-NEXT: ret i32 [[TMP0]] ; entry: @@ -430,23 +430,23 @@ define i32 @unfold4(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) noun ; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_I:%.*]] ; CHECK: cond.false.i: ; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]] -; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_6_I:%.*]] +; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD5:%.*]], label [[COND_FALSE_6_I:%.*]] ; CHECK: cond.false.6.i: ; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]] ; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_10_I:%.*]] ; CHECK: cond.false.10.i: ; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]] -; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD]], label [[DOTEXIT:%.*]] +; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD5]], label [[DOTEXIT:%.*]] ; CHECK: .exit: ; CHECK-NEXT: [[CMP19_I:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]] ; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP19_I]] to i32 ; CHECK-NEXT: [[LNOT_I18:%.*]] = icmp eq i32 [[CONV]], 1 ; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[LNOT_I18]] -; CHECK-NEXT: br i1 [[COND_FR]], label [[TMP1:%.*]], label [[DOTEXIT_THREAD]] -; CHECK: 0: -; CHECK-NEXT: br label [[DOTEXIT_THREAD]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD5]] ; CHECK: .exit.thread: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[ADD3]], [[DOTEXIT]] ], [ [[J]], [[TMP1]] ], [ [[J]], [[ENTRY:%.*]] ], [ [[J]], [[COND_FALSE_6_I]] ], [ [[ADD3]], [[COND_FALSE_I]] ], [ [[ADD3]], [[COND_FALSE_10_I]] ] +; CHECK-NEXT: br label [[DOTEXIT_THREAD5]] +; CHECK: .exit.thread5: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[COND_FALSE_I]] ], [ [[ADD3]], [[COND_FALSE_10_I]] ] ; CHECK-NEXT: ret i32 [[TMP0]] ; entry: @@ -560,10 +560,10 @@ define void @test_func(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr ; CHECK: if.end: ; CHECK-NEXT: [[LOCAL_VAR_0:%.*]] = phi i32 [ [[TMP1]], [[FOR_BODY]] ] ; CHECK-NEXT: switch i32 [[LOCAL_VAR_0]], label [[SW_DEFAULT]] [ -; CHECK-NEXT: i32 2, label [[SW_BB]] -; CHECK-NEXT: i32 4, label [[SW_BB7]] -; CHECK-NEXT: i32 5, label [[SW_BB8:%.*]] -; CHECK-NEXT: i32 7, label [[SW_BB9:%.*]] +; CHECK-NEXT: i32 2, label [[SW_BB]] +; CHECK-NEXT: i32 4, label [[SW_BB7]] +; CHECK-NEXT: i32 5, label [[SW_BB8:%.*]] +; CHECK-NEXT: i32 7, label [[SW_BB9:%.*]] ; CHECK-NEXT: ] ; CHECK: sw.bb: ; CHECK-NEXT: call void @foo() @@ -674,5 +674,3 @@ if.end: ; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1984} ; CHECK: [[PROF1]] = !{!"branch_weights", i64 1073741824, i64 3221225472} ;. -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-BPI: {{.*}} diff --git a/llvm/test/Transforms/JumpThreading/thread-prob-7.ll b/llvm/test/Transforms/JumpThreading/thread-prob-7.ll index 4623a579be48f6..8c9d89871d00b3 100644 --- a/llvm/test/Transforms/JumpThreading/thread-prob-7.ll +++ b/llvm/test/Transforms/JumpThreading/thread-prob-7.ll @@ -14,15 +14,15 @@ define i32 @func0(i32 %a0, i32 %a1) !prof !0 { ; CHECK-NEXT: br i1 [[CMP1]], label [[BB_JOIN_THREAD:%.*]], label [[TEST2_FALSE:%.*]], !prof [[PROF2:![0-9]+]] ; CHECK: test2_false: ; CHECK-NEXT: call void @foobar() -; CHECK-NEXT: br label [[BB_JOIN_THREAD]] +; CHECK-NEXT: br label [[TMP0:%.*]] ; CHECK: bb_join: ; CHECK-NEXT: [[C:%.*]] = phi i1 [ [[CX]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[C]] -; CHECK-NEXT: br i1 [[COND_FR]], label [[BB_JOIN_THREAD1:%.*]], label [[BB_JOIN_THREAD]], !prof [[PROF3:![0-9]+]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[BB_JOIN_THREAD]], label [[TMP0]], !prof [[PROF3:![0-9]+]] ; CHECK: bb_join.thread: -; CHECK-NEXT: br label [[BB_JOIN_THREAD]] +; CHECK-NEXT: br label [[TMP0]] ; CHECK: 0: -; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 7, [[BB_JOIN]] ], [ 7, [[TEST2_FALSE]] ], [ 42, [[TEST2]] ], [ 42, [[BB_JOIN_THREAD1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 42, [[BB_JOIN_THREAD]] ], [ 7, [[BB_JOIN]] ], [ 7, [[TEST2_FALSE]] ] ; CHECK-NEXT: ret i32 [[TMP1]] ; entry: diff --git a/llvm/test/Transforms/JumpThreading/uncond-no-phi.ll b/llvm/test/Transforms/JumpThreading/uncond-no-phi.ll deleted file mode 100644 index 6104e8f8778bc0..00000000000000 --- a/llvm/test/Transforms/JumpThreading/uncond-no-phi.ll +++ /dev/null @@ -1,123 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -passes=jump-threading -S < %s | FileCheck %s - -define i1 @if_else(i1 %c, i1 %c1) { -; CHECK-LABEL: define i1 @if_else( -; CHECK-SAME: i1 [[C:%.*]], i1 [[C1:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[RETURN:%.*]] -; CHECK: then: -; CHECK-NEXT: call void @dummy() -; CHECK-NEXT: br i1 [[C1]], label [[ELSE:%.*]], label [[RETURN]] -; CHECK: else: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: return: -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i1 [ false, [[THEN]] ], [ true, [[ENTRY:%.*]] ], [ true, [[ELSE]] ] -; CHECK-NEXT: ret i1 [[RETVAL_0]] -; -entry: - br i1 %c, label %then, label %else - -then: - call void @dummy() - br i1 %c1, label %else, label %return - -else: - br label %return - -return: - %retval.0 = phi i1 [ true, %else ], [ false, %then ] - ret i1 %retval.0 -} - -define i8 @switch_uncond(i8 %arg) { -; CHECK-LABEL: define i8 @switch_uncond( -; CHECK-SAME: i8 [[ARG:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: switch i8 [[ARG]], label [[DEFAULT:%.*]] [ -; CHECK-NEXT: i8 0, label [[BB1:%.*]] -; CHECK-NEXT: i8 1, label [[BB3:%.*]] -; CHECK-NEXT: i8 2, label [[BB2:%.*]] -; CHECK-NEXT: i8 3, label [[END:%.*]] -; CHECK-NEXT: ] -; CHECK: default: -; CHECK-NEXT: unreachable -; CHECK: bb: -; CHECK-NEXT: call void @dummy() -; CHECK-NEXT: br label [[END]] -; CHECK: bb1: -; CHECK-NEXT: call void @dummy() -; CHECK-NEXT: br label [[END]] -; CHECK: bb2: -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ 1, [[ENTRY:%.*]] ], [ 0, [[BB3]] ], [ 0, [[BB1]] ], [ 0, [[BB2]] ] -; CHECK-NEXT: ret i8 [[PHI]] -; -entry: - switch i8 %arg, label %default [ - i8 0, label %bb - i8 1, label %bb1 - i8 2, label %bb2 - i8 3, label %end - ] - -default: - unreachable - -bb: - call void @dummy() - br label %bb2 - -bb1: - call void @dummy() - br label %bb2 - -; Predecessors of %bb2 are %bb and %bb1, they are not identical. -; So we can thread %bb2. -bb2: - br label %end - -end: - %phi = phi i8 [ 0, %bb2 ], [ 1, %entry ] - ret i8 %phi -} - -define i8 @switch_uncond_fail(i8 %arg) { -; CHECK-LABEL: define i8 @switch_uncond_fail( -; CHECK-SAME: i8 [[ARG:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: switch i8 [[ARG]], label [[DEFAULT:%.*]] [ -; CHECK-NEXT: i8 0, label [[BB:%.*]] -; CHECK-NEXT: i8 1, label [[BB]] -; CHECK-NEXT: i8 2, label [[END:%.*]] -; CHECK-NEXT: ] -; CHECK: default: -; CHECK-NEXT: br label [[END]] -; CHECK: bb: -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ 0, [[BB]] ], [ 1, [[ENTRY:%.*]] ], [ 2, [[DEFAULT]] ] -; CHECK-NEXT: ret i8 [[PHI]] -; -entry: - switch i8 %arg, label %default [ - i8 0, label %bb - i8 1, label %bb - i8 2, label %end - ] - -default: - br label %end - -; Predecessor of %bb is only %entry (though there are two in predecessor list), -; thus it's unthreadable. -bb: - br label %end - -end: - %phi = phi i8 [ 0, %bb ], [ 1, %entry ], [ 2, %default ] - ret i8 %phi -} - -declare void @dummy() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll index 2470bca1e17b99..1c26ee8479e578 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll @@ -8,41 +8,39 @@ target triple = "aarch64-linux-gnu" define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @select_const_i32_from_icmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq [[VEC_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor [[VEC_ICMP]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select [[VEC_ICMP]], [[VEC_PHI]], shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 7, i32 3 +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne [[VEC_SEL]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[FIN_ICMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 ; CHECK-VF4IC4-LABEL: @select_const_i32_from_icmp ; CHECK-VF4IC4: vector.body: -; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] -; CHECK-VF4IC4: [[VEC_PHI2:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] -; CHECK-VF4IC4: [[VEC_PHI3:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] -; CHECK-VF4IC4: [[VEC_PHI4:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI2:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI3:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI4:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] ; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq {{.*}}, shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) ; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq {{.*}}, shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) ; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq {{.*}}, shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) ; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq {{.*}}, shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor [[VEC_ICMP1]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor [[VEC_ICMP2]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor [[VEC_ICMP3]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor [[VEC_ICMP4]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = or [[VEC_PHI1]], [[NOT1]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = or [[VEC_PHI2]], [[NOT2]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or [[VEC_PHI3]], [[NOT3]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = or [[VEC_PHI4]], [[NOT4]] +; CHECK-VF4IC4-NEXT: [[VEC_SEL1]] = select [[VEC_ICMP1]], [[VEC_PHI1]], shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) +; CHECK-VF4IC4-NEXT: [[VEC_SEL2]] = select [[VEC_ICMP2]], [[VEC_PHI2]], shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) +; CHECK-VF4IC4-NEXT: [[VEC_SEL3]] = select [[VEC_ICMP3]], [[VEC_PHI3]], shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) +; CHECK-VF4IC4-NEXT: [[VEC_SEL4]] = select [[VEC_ICMP4]], [[VEC_PHI4]], shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) ; CHECK-VF4IC4: middle.block: -; CHECK-VF4IC4-NEXT: [[OR1:%.*]] = or [[VEC_SEL2]], [[VEC_SEL1]] -; CHECK-VF4IC4-NEXT: [[OR2:%.*]] = or [[VEC_SEL3]], [[OR1]] -; CHECK-VF4IC4-NEXT: [[OR3:%.*]] = or [[VEC_SEL4]], [[OR2]] -; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[OR3]]) -; CHECK-VF4IC4-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[FR]], i32 7, i32 3 +; CHECK-VF4IC4-NEXT: [[VEC_ICMP5:%.*]] = icmp ne [[VEC_SEL1]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) +; CHECK-VF4IC4-NEXT: [[VEC_SEL5:%.*]] = select [[VEC_ICMP5]], [[VEC_SEL1]], [[VEC_SEL2]] +; CHECK-VF4IC4-NEXT: [[VEC_ICMP6:%.*]] = icmp ne [[VEC_SEL5]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) +; CHECK-VF4IC4-NEXT: [[VEC_SEL6:%.*]] = select [[VEC_ICMP6]], [[VEC_SEL5]], [[VEC_SEL3]] +; CHECK-VF4IC4-NEXT: [[VEC_ICMP7:%.*]] = icmp ne [[VEC_SEL6]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) +; CHECK-VF4IC4-NEXT: [[VEC_SEL7:%.*]] = select [[VEC_ICMP7]], [[VEC_SEL6]], [[VEC_SEL4]] +; CHECK-VF4IC4-NEXT: [[FIN_ICMP:%.*]] = icmp ne [[VEC_SEL7]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) +; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[FIN_ICMP]]) +; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 entry: br label %for.body @@ -64,18 +62,21 @@ exit: ; preds = %for.body define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @select_i32_from_icmp ; CHECK-VF4IC1: vector.ph: -; CHECK-VF4IC1-NOT: shufflevector -; CHECK-VF4IC1-NOT: shufflevector +; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement poison, i32 %a, i64 0 +; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = insertelement poison, i32 %b, i64 0 +; CHECK-VF4IC1-NEXT: [[SPLAT_OF_B:%.*]] = shufflevector [[TMP2]], poison, zeroinitializer ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq [[VEC_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor [[VEC_ICMP]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select [[VEC_ICMP]], [[VEC_PHI]], [[SPLAT_OF_B]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 %b, i32 %a +; CHECK-VF4IC1-NEXT: [[FIN_INS:%.*]] = insertelement poison, i32 %a, i64 0 +; CHECK-VF4IC1-NEXT: [[FIN_SPLAT:%.*]] = shufflevector [[FIN_INS]], poison, zeroinitializer +; CHECK-VF4IC1-NEXT: [[FIN_CMP:%.*]] = icmp ne [[VEC_SEL]], [[FIN_SPLAT]] +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[FIN_CMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 %b, i32 %a ; CHECK-VF4IC4-LABEL: @select_i32_from_icmp ; CHECK-VF4IC4: vector.body: @@ -100,15 +101,14 @@ exit: ; preds = %for.body define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @select_const_i32_from_fcmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = fcmp fast ueq [[VEC_LOAD]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor [[VEC_ICMP]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select [[VEC_ICMP]], [[VEC_PHI]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 1, i32 2 +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne [[VEC_SEL]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[FIN_ICMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 2 ; CHECK-VF4IC4-LABEL: @select_const_i32_from_fcmp ; CHECK-VF4IC4: vector.body: @@ -156,17 +156,17 @@ exit: ; preds = %for.body define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @pred_select_const_i32_from_icmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load ; CHECK-VF4IC1: [[MASK:%.*]] = icmp sgt [[VEC_LOAD]], shufflevector ( insertelement ( poison, i32 35, i64 0), poison, zeroinitializer) ; CHECK-VF4IC1: [[MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr {{%.*}}, i32 4, [[MASK]], poison) ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq [[MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) -; CHECK-VF4IC1-NEXT: [[VEC_SEL_TMP:%.*]] = or [[VEC_PHI]], [[VEC_ICMP]] -; CHECK-VF4IC1: [[VEC_SEL:%.*]] = select [[MASK]], [[VEC_SEL_TMP]], [[VEC_PHI]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL_TMP:%.*]] = select [[VEC_ICMP]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), [[VEC_PHI]] +; CHECK-VF4IC1: [[VEC_SEL:%.*]] = select [[MASK]], [[VEC_SEL_TMP]], [[VEC_PHI]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 1, i32 0 +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne [[VEC_SEL]], zeroinitializer +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[FIN_ICMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 0 ; CHECK-VF4IC4-LABEL: @pred_select_const_i32_from_icmp ; CHECK-VF4IC4: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll index 2b58acbfe9cc98..8a2dc0abb0de8e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S \ ; RUN: < %s | FileCheck %s ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 \ @@ -6,59 +7,109 @@ target triple = "riscv64" define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { -; CHECK-LABEL: @select_icmp +; CHECK-LABEL: define i32 @select_icmp( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], ptr nocapture readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_PHI]], <4 x i32> [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i32 [[Y]], i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[COND:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], [[X]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[A]], i32 [[Y]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[COND_LCSSA]] ; -; SCALABLE-LABEL: @select_icmp +; SCALABLE-LABEL: define i32 @select_icmp( +; SCALABLE-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], ptr nocapture readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP4]] -; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp slt [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; SCALABLE-NEXT: [[TMP9:%.*]] = icmp slt [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; SCALABLE-NEXT: [[TMP10]] = select [[TMP9]], [[VEC_PHI]], [[BROADCAST_SPLAT2]] +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 +; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne [[TMP10]], zeroinitializer +; SCALABLE-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[RDX_SELECT_CMP]]) +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP12]], i32 [[Y]], i32 0 +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] +; SCALABLE: for.body: +; SCALABLE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[A:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[COND:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] +; SCALABLE-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; SCALABLE-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP13]], [[X]] +; SCALABLE-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[A]], i32 [[Y]] +; SCALABLE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; SCALABLE: for.end: +; SCALABLE-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: ret i32 [[COND_LCSSA]] ; entry: br label %for.body @@ -79,59 +130,109 @@ for.end: } define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { -; CHECK-LABEL: @select_fcmp +; CHECK-LABEL: define i32 @select_fcmp( +; CHECK-SAME: float [[X:%.*]], i32 [[Y:%.*]], ptr nocapture readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_PHI]], <4 x i32> [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i32 [[Y]], i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[COND:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast olt float [[TMP7]], [[X]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[A]], i32 [[Y]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[COND_LCSSA]] ; -; SCALABLE-LABEL: @select_fcmp +; SCALABLE-LABEL: define i32 @select_fcmp( +; SCALABLE-SAME: float [[X:%.*]], i32 [[Y:%.*]], ptr nocapture readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[X:%.*]], i64 0 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[X]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[TMP4]] -; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 -; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast olt [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; SCALABLE-NEXT: [[TMP9:%.*]] = fcmp fast olt [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; SCALABLE-NEXT: [[TMP10]] = select [[TMP9]], [[VEC_PHI]], [[BROADCAST_SPLAT2]] +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 +; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne [[TMP10]], zeroinitializer +; SCALABLE-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[RDX_SELECT_CMP]]) +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP12]], i32 [[Y]], i32 0 +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] +; SCALABLE: for.body: +; SCALABLE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[A:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[COND:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] +; SCALABLE-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; SCALABLE-NEXT: [[CMP1:%.*]] = fcmp fast olt float [[TMP13]], [[X]] +; SCALABLE-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[A]], i32 [[Y]] +; SCALABLE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; SCALABLE: for.end: +; SCALABLE-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: ret i32 [[COND_LCSSA]] ; entry: br label %for.body @@ -152,55 +253,101 @@ for.end: } define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { -; CHECK-LABEL: @select_const_i32_from_icmp +; CHECK-LABEL: define i32 @select_const_i32_from_icmp( +; CHECK-SAME: ptr nocapture readonly [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 7, i32 3 +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 7, i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 3, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP15:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 3 +; CHECK-NEXT: [[TMP12]] = select i1 [[TMP11]], i32 [[TMP8]], i32 7 +; CHECK-NEXT: [[TMP13]] = add nuw nsw i64 [[TMP15]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], [[N]] +; CHECK-NEXT: br i1 [[TMP14]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP12]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[DOTLCSSA]] ; -; SCALABLE-LABEL: @select_const_i32_from_icmp +; SCALABLE-LABEL: define i32 @select_const_i32_from_icmp( +; SCALABLE-SAME: ptr nocapture readonly [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] +; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 7, i32 3 +; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne [[TMP9]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[RDX_SELECT_CMP]]) +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 7, i32 3 +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 3, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] +; SCALABLE: for.body: +; SCALABLE-NEXT: [[TMP21:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP14:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP21]] +; SCALABLE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 3 +; SCALABLE-NEXT: [[TMP18]] = select i1 [[TMP17]], i32 [[TMP14]], i32 7 +; SCALABLE-NEXT: [[TMP19]] = add nuw nsw i64 [[TMP21]], 1 +; SCALABLE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], [[N]] +; SCALABLE-NEXT: br i1 [[TMP20]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; SCALABLE: exit: +; SCALABLE-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP18]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: ret i32 [[DOTLCSSA]] ; entry: br label %for.body @@ -221,55 +368,113 @@ exit: ; preds = %for.body } define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 { -; CHECK-LABEL: @select_i32_from_icmp +; CHECK-LABEL: define i32 @select_i32_from_icmp( +; CHECK-SAME: ptr nocapture readonly [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %b, i32 %a +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP5]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 [[B]], i32 [[A]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[A]], [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP15:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 3 +; CHECK-NEXT: [[TMP12]] = select i1 [[TMP11]], i32 [[TMP8]], i32 [[B]] +; CHECK-NEXT: [[TMP13]] = add nuw nsw i64 [[TMP15]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], [[N]] +; CHECK-NEXT: br i1 [[TMP14]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP12]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[DOTLCSSA]] ; -; SCALABLE-LABEL: @select_i32_from_icmp +; SCALABLE-LABEL: define i32 @select_i32_from_icmp( +; SCALABLE-SAME: ptr nocapture readonly [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 +; SCALABLE-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement poison, i32 [[A]], i64 0 +; SCALABLE-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector [[MINMAX_IDENT_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[B]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] +; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], [[BROADCAST_SPLAT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %b, i32 %a +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[A]], i64 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne [[TMP9]], [[DOTSPLAT]] +; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[RDX_SELECT_CMP]]) +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[B]], i32 [[A]] +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[A]], [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] +; SCALABLE: for.body: +; SCALABLE-NEXT: [[TMP21:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP14:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP21]] +; SCALABLE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 3 +; SCALABLE-NEXT: [[TMP18]] = select i1 [[TMP17]], i32 [[TMP14]], i32 [[B]] +; SCALABLE-NEXT: [[TMP19]] = add nuw nsw i64 [[TMP21]], 1 +; SCALABLE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], [[N]] +; SCALABLE-NEXT: br i1 [[TMP20]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; SCALABLE: exit: +; SCALABLE-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP18]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: ret i32 [[DOTLCSSA]] ; entry: br label %for.body @@ -290,55 +495,101 @@ exit: ; preds = %for.body } define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { -; CHECK-LABEL: @select_const_i32_from_fcmp +; CHECK-LABEL: define i32 @select_const_i32_from_fcmp( +; CHECK-SAME: ptr nocapture readonly [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 2 +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 1, i32 2 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 2, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP15:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ueq float [[TMP10]], 3.000000e+00 +; CHECK-NEXT: [[TMP12]] = select i1 [[TMP11]], i32 [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP13]] = add nuw nsw i64 [[TMP15]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], [[N]] +; CHECK-NEXT: br i1 [[TMP14]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP12]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[DOTLCSSA]] ; -; SCALABLE-LABEL: @select_const_i32_from_fcmp +; SCALABLE-LABEL: define i32 @select_const_i32_from_fcmp( +; SCALABLE-SAME: ptr nocapture readonly [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast ueq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] +; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 2 +; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne [[TMP9]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[RDX_SELECT_CMP]]) +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 1, i32 2 +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 2, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] +; SCALABLE: for.body: +; SCALABLE-NEXT: [[TMP21:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP14:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP21]] +; SCALABLE-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 +; SCALABLE-NEXT: [[TMP17:%.*]] = fcmp fast ueq float [[TMP16]], 3.000000e+00 +; SCALABLE-NEXT: [[TMP18]] = select i1 [[TMP17]], i32 [[TMP14]], i32 1 +; SCALABLE-NEXT: [[TMP19]] = add nuw nsw i64 [[TMP21]], 1 +; SCALABLE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], [[N]] +; SCALABLE-NEXT: br i1 [[TMP20]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; SCALABLE: exit: +; SCALABLE-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP18]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: ret i32 [[DOTLCSSA]] ; entry: br label %for.body @@ -359,11 +610,41 @@ exit: ; preds = %for.body } define float @select_const_f32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { -; CHECK-LABEL: @select_const_f32_from_icmp -; CHECK-NOT: vector.body +; CHECK-LABEL: define float @select_const_f32_from_icmp( +; CHECK-SAME: ptr nocapture readonly [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi fast float [ 3.000000e+00, [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 3 +; CHECK-NEXT: [[TMP5]] = select fast i1 [[TMP4]], float [[TMP1]], float 7.000000e+00 +; CHECK-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP6]], [[N]] +; CHECK-NEXT: br i1 [[TMP7]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ] +; CHECK-NEXT: ret float [[DOTLCSSA]] ; -; SCALABLE-LABEL: @select_const_f32_from_icmp -; SCALABLE-NOT: vector.body +; SCALABLE-LABEL: define float @select_const_f32_from_icmp( +; SCALABLE-SAME: ptr nocapture readonly [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] +; SCALABLE: for.body: +; SCALABLE-NEXT: [[TMP0:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP1:%.*]] = phi fast float [ 3.000000e+00, [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; SCALABLE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; SCALABLE-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 3 +; SCALABLE-NEXT: [[TMP5]] = select fast i1 [[TMP4]], float [[TMP1]], float 7.000000e+00 +; SCALABLE-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 +; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP6]], [[N]] +; SCALABLE-NEXT: br i1 [[TMP7]], label [[EXIT:%.*]], label [[FOR_BODY]] +; SCALABLE: exit: +; SCALABLE-NEXT: [[DOTLCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ] +; SCALABLE-NEXT: ret float [[DOTLCSSA]] ; entry: br label %for.body @@ -384,63 +665,127 @@ exit: ; preds = %for.body } define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) #0 { -; CHECK-LABEL: @pred_select_const_i32_from_icmp +; CHECK-LABEL: define i32 @pred_select_const_i32_from_icmp( +; CHECK-SAME: ptr noalias nocapture readonly [[SRC1:%.*]], ptr noalias nocapture readonly [[SRC2:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[SRC2:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[TMP4]], <4 x i32> poison) -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_MASKED_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i1> [[VEC_PHI]], [[TMP8]] -; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP9]], <4 x i1> [[VEC_PHI]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[SRC2]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP5]], i32 4, <4 x i1> [[TMP3]], <4 x i32> poison) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_MASKED_LOAD]], +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> , <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP7]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[PREDPHI]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP12]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 0 +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[PREDPHI]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP9]], i32 1, i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[I_013]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP10]], 35 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; CHECK: if.then: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[I_013]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP11]], 2 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]] +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[R_1]] = phi i32 [ [[R_012]], [[FOR_BODY]] ], [ [[SPEC_SELECT]], [[IF_THEN]] ] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], [[FOR_INC]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[R_1_LCSSA]] ; -; SCALABLE-LABEL: @pred_select_const_i32_from_icmp +; SCALABLE-LABEL: define i32 @pred_select_const_i32_from_icmp( +; SCALABLE-SAME: ptr noalias nocapture readonly [[SRC1:%.*]], ptr noalias nocapture readonly [[SRC2:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[TMP4]] -; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp sgt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 35, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[SRC2:%.*]], i64 [[TMP4]] -; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 0 -; SCALABLE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP10]], i32 4, [[TMP8]], poison) +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; SCALABLE-NEXT: [[TMP9:%.*]] = icmp sgt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 35, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[SRC2]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 +; SCALABLE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP11]], i32 4, [[TMP9]], poison) ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP13:%.*]] = or [[VEC_PHI]], [[TMP12]] -; SCALABLE-NEXT: [[PREDPHI]] = select [[TMP8]], [[TMP13]], [[VEC_PHI]] -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] -; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; SCALABLE-NEXT: [[TMP13:%.*]] = select [[TMP12]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), [[VEC_PHI]] +; SCALABLE-NEXT: [[PREDPHI]] = select [[TMP9]], [[TMP13]], [[VEC_PHI]] +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[PREDPHI]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP18]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 0 +; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne [[PREDPHI]], zeroinitializer +; SCALABLE-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[RDX_SELECT_CMP]]) +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP15]], i32 1, i32 0 +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] +; SCALABLE: for.body: +; SCALABLE-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; SCALABLE-NEXT: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[I_013]] +; SCALABLE-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; SCALABLE-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP16]], 35 +; SCALABLE-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; SCALABLE: if.then: +; SCALABLE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[I_013]] +; SCALABLE-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; SCALABLE-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP17]], 2 +; SCALABLE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]] +; SCALABLE-NEXT: br label [[FOR_INC]] +; SCALABLE: for.inc: +; SCALABLE-NEXT: [[R_1]] = phi i32 [ [[R_012]], [[FOR_BODY]] ], [ [[SPEC_SELECT]], [[IF_THEN]] ] +; SCALABLE-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1 +; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; SCALABLE: for.end.loopexit: +; SCALABLE-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], [[FOR_INC]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: ret i32 [[R_1_LCSSA]] ; entry: br label %for.body @@ -472,3 +817,34 @@ for.end.loopexit: ; preds = %for.inc } attributes #0 = { "target-features"="+f,+v" } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} +;. +; SCALABLE: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; SCALABLE: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; SCALABLE: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; SCALABLE: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; SCALABLE: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; SCALABLE: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; SCALABLE: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; SCALABLE: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; SCALABLE: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; SCALABLE: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; SCALABLE: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; SCALABLE: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; SCALABLE: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; SCALABLE: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index c55e732c901475..59b8ce42380d9d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -25,7 +25,7 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; AUTO_VEC: vector.ph: ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[ZEXT]], 2147483616 -; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]] @@ -201,7 +201,7 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; AUTO_VEC: vector.ph: ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792 -; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to double +; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to double ; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast double [[DOTCAST]], 3.000000e+00 ; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTO_VEC: vector.body: @@ -366,7 +366,7 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; AUTO_VEC: vector.ph: ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967264 -; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; AUTO_VEC-NEXT: [[TMP1:%.*]] = fmul reassoc float [[DOTCAST]], 4.200000e+01 ; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd reassoc float [[TMP1]], 1.000000e+00 ; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/blend-in-header.ll b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll new file mode 100644 index 00000000000000..01e223a3243796 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll @@ -0,0 +1,233 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" + +; Test with blend recipe in header VPBB, from +; https://github.com/llvm/llvm-project/issues/88297. +define i64 @pr88297() { +; CHECK-LABEL: define i64 @pr88297() { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 false, label [[LOOP_LATCH]], label [[THEN:%.*]] +; CHECK: then: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[R:%.*]] = phi i64 [ 1, [[THEN]] ], [ 0, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt i32 [[IV]], 1000 +; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[R_LCSSA:%.*]] = phi i64 [ [[R]], [[LOOP_LATCH]] ], [ 1, [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[R_LCSSA]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 false, label %loop.latch, label %then + +then: + br label %loop.latch + +loop.latch: + %r = phi i64 [ 1, %then ], [ 0, %loop.header ] + %iv.next = add i32 %iv, 1 + %icmp = icmp sgt i32 %iv, 1000 + br i1 %icmp, label %exit, label %loop.header + +exit: + %r.lcssa = phi i64 [ %r, %loop.latch ] + ret i64 %r.lcssa +} + +define i64 @pr88297_incoming_ops_reordered() { +; CHECK-LABEL: define i64 @pr88297_incoming_ops_reordered() { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 false, label [[LOOP_LATCH]], label [[THEN:%.*]] +; CHECK: then: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[R:%.*]] = phi i64 [ 0, [[LOOP_HEADER]] ], [ 1, [[THEN]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt i32 [[IV]], 1000 +; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[R_LCSSA:%.*]] = phi i64 [ [[R]], [[LOOP_LATCH]] ], [ 1, [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[R_LCSSA]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 false, label %loop.latch, label %then + +then: + br label %loop.latch + +loop.latch: + %r = phi i64 [ 0, %loop.header ], [ 1, %then ] + %iv.next = add i32 %iv, 1 + %icmp = icmp sgt i32 %iv, 1000 + br i1 %icmp, label %exit, label %loop.header + +exit: + %r.lcssa = phi i64 [ %r, %loop.latch ] + ret i64 %r.lcssa +} + +define i64 @invar_cond(i1 %c) { +; CHECK-LABEL: define i64 @invar_cond( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 3 +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[THEN:%.*]] +; CHECK: then: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[R:%.*]] = phi i64 [ 1, [[THEN]] ], [ 0, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt i32 [[IV]], 1000 +; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[R_LCSSA:%.*]] = phi i64 [ [[R]], [[LOOP_LATCH]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[R_LCSSA]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 %c, label %loop.latch, label %then + +then: + br label %loop.latch + +loop.latch: + %r = phi i64 [ 1, %then ], [ 0, %loop.header ] + %iv.next = add i32 %iv, 1 + %icmp = icmp sgt i32 %iv, 1000 + br i1 %icmp, label %exit, label %loop.header + +exit: + %r.lcssa = phi i64 [ %r, %loop.latch ] + ret i64 %r.lcssa +} + +define i64 @invar_cond_incoming_ops_reordered(i1 %c) { +; CHECK-LABEL: define i64 @invar_cond_incoming_ops_reordered( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> , <4 x i64> zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 3 +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[THEN:%.*]] +; CHECK: then: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[R:%.*]] = phi i64 [ 0, [[LOOP_HEADER]] ], [ 1, [[THEN]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt i32 [[IV]], 1000 +; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[R_LCSSA:%.*]] = phi i64 [ [[R]], [[LOOP_LATCH]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[R_LCSSA]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 %c, label %loop.latch, label %then + +then: + br label %loop.latch + +loop.latch: + %r = phi i64 [ 0, %loop.header ], [ 1, %then ] + %iv.next = add i32 %iv, 1 + %icmp = icmp sgt i32 %iv, 1000 + br i1 %icmp, label %exit, label %loop.header + +exit: + %r.lcssa = phi i64 [ %r, %loop.latch ] + ret i64 %r.lcssa +} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index c721da7597b1c5..0b872709ec6c6e 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -19,20 +19,20 @@ define i32 @any_of_reduction_epilog(ptr %src, i64 %N) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] +; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP4]], <4 x i32> , <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 1, i32 0 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 1, i32 0 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: @@ -42,33 +42,32 @@ define i32 @any_of_reduction_epilog(ptr %src, i64 %N) { ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[BC_MERGE_RDX]], 0 ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] -; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP9]], i64 0 -; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_MERGE_RDX]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP17:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX5]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP12]], align 1 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD7]], zeroinitializer -; CHECK-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI6]], [[TMP13]] +; CHECK-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i32> , <4 x i32> [[VEC_PHI6]] ; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 4 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <4 x i32> [[TMP17]], zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) -; CHECK-NEXT: [[TMP17:%.*]] = freeze i1 [[TMP16]] -; CHECK-NEXT: [[RDX_SELECT9:%.*]] = select i1 [[TMP17]], i32 1, i32 0 +; CHECK-NEXT: [[RDX_SELECT9:%.*]] = select i1 [[TMP16]], i32 1, i32 0 ; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ 0, [[ITER_CHECK]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ 0, [[ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -102,104 +101,6 @@ exit: ret i32 %select } -define i32 @any_of_reduction_epilog_arg_as_start_value(ptr %src, i64 %N, i32 %start) { -; CHECK-LABEL: define i32 @any_of_reduction_epilog_arg_as_start_value( -; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { -; CHECK-NEXT: iter.check: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 1, i32 [[START]] -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] -; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[BC_MERGE_RDX]], [[START]] -; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 -; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] -; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP9]], i64 0 -; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX5]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD7]], zeroinitializer -; CHECK-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI6]], [[TMP13]] -; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) -; CHECK-NEXT: [[TMP17:%.*]] = freeze i1 [[TMP16]] -; CHECK-NEXT: [[RDX_SELECT9:%.*]] = select i1 [[TMP17]], i32 1, i32 [[START]] -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] -; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ [[START]], [[ITER_CHECK]] ], [ [[START]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i8 [[LOAD]], 0 -; CHECK-NEXT: [[SELECT]] = select i1 [[ICMP]], i32 1, i32 [[RED]] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[ICMP3:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[ICMP3]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: exit: -; CHECK-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ [[SELECT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SELECT_LCSSA]] -; -entry: - br label %loop - -loop: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %red = phi i32 [ %start, %entry ], [ %select, %loop ] - %gep = getelementptr inbounds i8, ptr %src, i64 %iv - %load = load i8, ptr %gep, align 1 - %icmp = icmp eq i8 %load, 0 - %select = select i1 %icmp, i32 1, i32 %red - %iv.next = add i64 %iv, 1 - %icmp3 = icmp eq i64 %iv, %N - br i1 %icmp3, label %exit, label %loop - -exit: - ret i32 %select -} define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-LABEL: define i1 @any_of_reduction_i1_epilog( @@ -223,15 +124,14 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], -; CHECK-NEXT: [[TMP3]] = or <4 x i1> [[VEC_PHI]], [[TMP2]] +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) -; CHECK-NEXT: [[TMP6:%.*]] = freeze i1 [[TMP5]] +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i1 false, i1 false ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -244,11 +144,10 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ false, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i1 [[BC_MERGE_RDX]], false ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] ; CHECK-NEXT: [[IND_END5:%.*]] = trunc i64 [[N_VEC3]] to i32 -; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[BC_MERGE_RDX]], i64 0 ; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -261,22 +160,21 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND11:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[VEC_IND11]], [[BROADCAST_SPLAT14]] -; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], -; CHECK-NEXT: [[TMP10]] = or <4 x i1> [[VEC_PHI10]], [[TMP9]] +; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP8]], <4 x i1> [[VEC_PHI10]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX9]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <4 x i32> [[VEC_IND11]], ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) -; CHECK-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]] +; CHECK-NEXT: [[RDX_SELECT_CMP16:%.*]] = icmp ne <4 x i1> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP16]]) ; CHECK-NEXT: [[RDX_SELECT16:%.*]] = select i1 [[TMP13]], i1 false, i1 false ; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N8]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i1 [ false, [[ITER_CHECK]] ], [ false, [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i1 [ false, [[ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -287,7 +185,7 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 ; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[CMP_2]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP_2]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i1 [ [[SEL]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i1 [[SEL_LCSSA]] @@ -321,7 +219,4 @@ exit: ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} ; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} -; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index caea114e3d4487..bd658c31768a84 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -29,7 +29,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 -; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -84,7 +84,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483640 -; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -142,7 +142,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: ; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -193,7 +193,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 @@ -276,7 +276,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 -; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -331,7 +331,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483640 -; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -389,7 +389,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: ; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -442,7 +442,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 @@ -526,7 +526,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 -; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], [[INIT:%.*]] ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -574,7 +574,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483640 -; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], [[INIT:%.*]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -625,7 +625,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: ; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], [[INIT:%.*]] ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -675,7 +675,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], [[INIT:%.*]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 @@ -758,10 +758,10 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483644 -; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 -; VEC4_INTERL1-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] ; VEC4_INTERL1-NEXT: [[IND_END3:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -835,10 +835,10 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483640 -; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 -; VEC4_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] ; VEC4_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -922,10 +922,10 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: ; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483646 -; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 -; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] ; VEC1_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1000,10 +1000,10 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END3:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 @@ -1113,7 +1113,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 -; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 ; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1158,7 +1158,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483640 -; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 ; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1206,7 +1206,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: ; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1256,7 +1256,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1319,7 +1319,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804 -; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL1: vector.body: ; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ] @@ -1396,7 +1396,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775800 -; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE17:%.*]] ] @@ -1512,7 +1512,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: ; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 -; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] @@ -1570,7 +1570,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll index 6a9f83a9e0aa2d..1b4bcf6a3739a1 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll @@ -1,114 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s --check-prefix=CHECK-VF2IC1 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC2 define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) { -; CHECK-VF2IC1-LABEL: @pred_select_const_i32_from_icmp( +; CHECK-VF2IC1-LABEL: define i32 @pred_select_const_i32_from_icmp( +; CHECK-VF2IC1-SAME: ptr noalias nocapture readonly [[SRC1:%.*]], ptr noalias nocapture readonly [[SRC2:%.*]], i64 [[N:%.*]]) { +; CHECK-VF2IC1-NEXT: entry: +; CHECK-VF2IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-VF2IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-VF2IC1: vector.ph: +; CHECK-VF2IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-VF2IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF2IC1-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF2IC1: vector.body: -; CHECK-VF2IC1: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue2 ] -; CHECK-VF2IC1: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr {{%.*}}, align 4 -; CHECK-VF2IC1-NEXT: [[TMP4:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], -; CHECK-VF2IC1-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 -; CHECK-VF2IC1-NEXT: br i1 [[TMP5]], label %pred.load.if, label %pred.load.continue +; CHECK-VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ] +; CHECK-VF2IC1-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_LOAD_CONTINUE2]] ] +; CHECK-VF2IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF2IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[TMP0]] +; CHECK-VF2IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-VF2IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 +; CHECK-VF2IC1-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], +; CHECK-VF2IC1-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; CHECK-VF2IC1-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK-VF2IC1: pred.load.if: -; CHECK-VF2IC1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC2:%.*]], i64 {{%.*}} -; CHECK-VF2IC1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -; CHECK-VF2IC1-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0 -; CHECK-VF2IC1-NEXT: br label %pred.load.continue +; CHECK-VF2IC1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP0]] +; CHECK-VF2IC1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-VF2IC1-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP6]], i32 0 +; CHECK-VF2IC1-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK-VF2IC1: pred.load.continue: -; CHECK-VF2IC1-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %vector.body ], [ [[TMP8]], %pred.load.if ] -; CHECK-VF2IC1-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 -; CHECK-VF2IC1-NEXT: br i1 [[TMP10]], label %pred.load.if1, label %pred.load.continue2 +; CHECK-VF2IC1-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] +; CHECK-VF2IC1-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; CHECK-VF2IC1-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2]] ; CHECK-VF2IC1: pred.load.if1: -; CHECK-VF2IC1: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 {{%.*}} -; CHECK-VF2IC1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; CHECK-VF2IC1-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 -; CHECK-VF2IC1-NEXT: br label %pred.load.continue2 +; CHECK-VF2IC1-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 +; CHECK-VF2IC1-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP10]] +; CHECK-VF2IC1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +; CHECK-VF2IC1-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP12]], i32 1 +; CHECK-VF2IC1-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK-VF2IC1: pred.load.continue2: -; CHECK-VF2IC1-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %pred.load.continue ], [ [[TMP14]], %pred.load.if1 ] -; CHECK-VF2IC1-NEXT: [[TMP16:%.*]] = icmp eq <2 x i32> [[TMP15]], -; CHECK-VF2IC1-NEXT: [[TMP17:%.*]] = or <2 x i1> [[VEC_PHI]], [[TMP16]] -; CHECK-VF2IC1-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP17]], <2 x i1> [[VEC_PHI]] -; CHECK-VF2IC1: br i1 {{%.*}}, label %middle.block, label %vector.body +; CHECK-VF2IC1-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] +; CHECK-VF2IC1-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP14]], +; CHECK-VF2IC1-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> , <2 x i32> [[VEC_PHI]] +; CHECK-VF2IC1-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP16]], <2 x i32> [[VEC_PHI]] +; CHECK-VF2IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-VF2IC1-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF2IC1-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF2IC1: middle.block: -; CHECK-VF2IC1-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[PREDPHI]]) -; CHECK-VF2IC1-NEXT: [[FR_TMP20:%.*]] = freeze i1 [[TMP20]] -; CHECK-VF2IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP20]], i32 1, i32 0 +; CHECK-VF2IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i32> [[PREDPHI]], zeroinitializer +; CHECK-VF2IC1-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) +; CHECK-VF2IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP18]], i32 1, i32 0 +; CHECK-VF2IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF2IC1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK-VF2IC1: scalar.ph: -; CHECK-VF2IC1: [[BC_RESUME_VAL:%.*]] = phi i64 [ {{%.*}}, %middle.block ], [ 0, %entry ] -; CHECK-VF2IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %entry ], [ [[RDX_SELECT]], %middle.block ] -; CHECK-VF2IC1-NEXT: br label %for.body +; CHECK-VF2IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-VF2IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF2IC1-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-VF2IC1: for.body: -; CHECK-VF2IC1: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], %for.inc ], [ [[BC_MERGE_RDX]], %scalar.ph ] -; CHECK-VF2IC1: [[TMP21:%.*]] = load i32, ptr {{%.*}}, align 4 -; CHECK-VF2IC1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP21]], 35 -; CHECK-VF2IC1-NEXT: br i1 [[CMP1]], label %if.then, label %for.inc +; CHECK-VF2IC1-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-VF2IC1-NEXT: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-VF2IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[I_013]] +; CHECK-VF2IC1-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-VF2IC1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP19]], 35 +; CHECK-VF2IC1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK-VF2IC1: if.then: -; CHECK-VF2IC1: [[TMP22:%.*]] = load i32, ptr {{%.*}}, align 4 -; CHECK-VF2IC1-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP22]], 2 +; CHECK-VF2IC1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[I_013]] +; CHECK-VF2IC1-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-VF2IC1-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP20]], 2 ; CHECK-VF2IC1-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]] -; CHECK-VF2IC1-NEXT: br label %for.inc +; CHECK-VF2IC1-NEXT: br label [[FOR_INC]] ; CHECK-VF2IC1: for.inc: -; CHECK-VF2IC1-NEXT: [[R_1]] = phi i32 [ [[R_012]], %for.body ], [ [[SPEC_SELECT]], %if.then ] +; CHECK-VF2IC1-NEXT: [[R_1]] = phi i32 [ [[R_012]], [[FOR_BODY]] ], [ [[SPEC_SELECT]], [[IF_THEN]] ] +; CHECK-VF2IC1-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1 +; CHECK-VF2IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-VF2IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-VF2IC1: for.end.loopexit: -; CHECK-VF2IC1-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], %for.inc ], [ [[RDX_SELECT]], %middle.block ] +; CHECK-VF2IC1-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], [[FOR_INC]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] ; CHECK-VF2IC1-NEXT: ret i32 [[R_1_LCSSA]] ; -; CHECK-VF1IC2-LABEL: @pred_select_const_i32_from_icmp( +; CHECK-VF1IC2-LABEL: define i32 @pred_select_const_i32_from_icmp( +; CHECK-VF1IC2-SAME: ptr noalias nocapture readonly [[SRC1:%.*]], ptr noalias nocapture readonly [[SRC2:%.*]], i64 [[N:%.*]]) { +; CHECK-VF1IC2-NEXT: entry: +; CHECK-VF1IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-VF1IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-VF1IC2: vector.ph: +; CHECK-VF1IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-VF1IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF1IC2-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF1IC2: vector.body: -; CHECK-VF1IC2: [[VEC_PHI:%.*]] = phi i1 [ false, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue3 ] -; CHECK-VF1IC2-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %vector.ph ], [ [[PREDPHI5:%.*]], %pred.load.continue3 ] -; CHECK-VF1IC2: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 {{%.*}} -; CHECK-VF1IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 {{%.*}} -; CHECK-VF1IC2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -; CHECK-VF1IC2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-VF1IC2-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], 35 -; CHECK-VF1IC2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], 35 -; CHECK-VF1IC2-NEXT: br i1 [[TMP4]], label %pred.load.if, label %pred.load.continue +; CHECK-VF1IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE3:%.*]] ] +; CHECK-VF1IC2-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_LOAD_CONTINUE3]] ] +; CHECK-VF1IC2-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI4:%.*]], [[PRED_LOAD_CONTINUE3]] ] +; CHECK-VF1IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF1IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-VF1IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[TMP0]] +; CHECK-VF1IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[TMP1]] +; CHECK-VF1IC2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-VF1IC2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +; CHECK-VF1IC2-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[TMP4]], 35 +; CHECK-VF1IC2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], 35 +; CHECK-VF1IC2-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK-VF1IC2: pred.load.if: -; CHECK-VF1IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC2:%.*]], i64 {{%.*}} -; CHECK-VF1IC2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -; CHECK-VF1IC2-NEXT: br label %pred.load.continue +; CHECK-VF1IC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP0]] +; CHECK-VF1IC2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-VF1IC2-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK-VF1IC2: pred.load.continue: -; CHECK-VF1IC2-NEXT: [[TMP8:%.*]] = phi i32 [ poison, %vector.body ], [ [[TMP7]], %pred.load.if ] -; CHECK-VF1IC2-NEXT: br i1 [[TMP5]], label %pred.load.if2, label %pred.load.continue3 +; CHECK-VF1IC2-NEXT: [[TMP10:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_LOAD_IF]] ] +; CHECK-VF1IC2-NEXT: br i1 [[TMP7]], label [[PRED_LOAD_IF2:%.*]], label [[PRED_LOAD_CONTINUE3]] ; CHECK-VF1IC2: pred.load.if2: -; CHECK-VF1IC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 {{%.*}} -; CHECK-VF1IC2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -; CHECK-VF1IC2-NEXT: br label %pred.load.continue3 +; CHECK-VF1IC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP1]] +; CHECK-VF1IC2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +; CHECK-VF1IC2-NEXT: br label [[PRED_LOAD_CONTINUE3]] ; CHECK-VF1IC2: pred.load.continue3: -; CHECK-VF1IC2-NEXT: [[TMP11:%.*]] = phi i32 [ poison, %pred.load.continue ], [ [[TMP10]], %pred.load.if2 ] -; CHECK-VF1IC2-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 2 -; CHECK-VF1IC2-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP11]], 2 -; CHECK-VF1IC2-NEXT: [[TMP14:%.*]] = or i1 [[VEC_PHI]], [[TMP12]] -; CHECK-VF1IC2-NEXT: [[TMP15:%.*]] = or i1 [[VEC_PHI2]], [[TMP13]] -; CHECK-VF1IC2-NEXT: [[PREDPHI]] = select i1 [[TMP4]], i1 [[TMP14]], i1 [[VEC_PHI]] -; CHECK-VF1IC2-NEXT: [[PREDPHI5]] = select i1 [[TMP5]], i1 [[TMP15]], i1 [[VEC_PHI2]] -; CHECK-VF1IC2: br i1 {{%.*}}, label %middle.block, label %vector.body +; CHECK-VF1IC2-NEXT: [[TMP13:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF2]] ] +; CHECK-VF1IC2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 2 +; CHECK-VF1IC2-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP13]], 2 +; CHECK-VF1IC2-NEXT: [[TMP16:%.*]] = select i1 [[TMP14]], i32 1, i32 [[VEC_PHI]] +; CHECK-VF1IC2-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i32 1, i32 [[VEC_PHI1]] +; CHECK-VF1IC2-NEXT: [[PREDPHI]] = select i1 [[TMP6]], i32 [[TMP16]], i32 [[VEC_PHI]] +; CHECK-VF1IC2-NEXT: [[PREDPHI4]] = select i1 [[TMP7]], i32 [[TMP17]], i32 [[VEC_PHI1]] +; CHECK-VF1IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-VF1IC2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF1IC2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF1IC2: middle.block: -; CHECK-VF1IC2-NEXT: [[OR:%.*]] = or i1 [[PREDPHI5]], [[PREDPHI]] -; CHECK-VF1IC2-NEXT: [[FR_OR:%.*]] = freeze i1 [[OR]] -; CHECK-VF1IC2-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_OR]], i32 1, i32 0 -; CHECK-VF1IC2: br i1 {{%.*}}, label %for.end.loopexit, label %scalar.ph +; CHECK-VF1IC2-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[PREDPHI]], 0 +; CHECK-VF1IC2-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[PREDPHI]], i32 [[PREDPHI4]] +; CHECK-VF1IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF1IC2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK-VF1IC2: scalar.ph: -; CHECK-VF1IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ {{%.*}}, %middle.block ], [ 0, %entry ] -; CHECK-VF1IC2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %entry ], [ [[RDX_SELECT]], %middle.block ] -; CHECK-VF1IC2-NEXT: br label %for.body +; CHECK-VF1IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-VF1IC2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF1IC2-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-VF1IC2: for.body: -; CHECK-VF1IC2-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], %for.inc ], [ [[BC_RESUME_VAL]], %scalar.ph ] -; CHECK-VF1IC2-NEXT: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], %for.inc ], [ [[BC_MERGE_RDX]], %scalar.ph ] -; CHECK-VF1IC2: [[TMP19:%.*]] = load i32, ptr {{%.*}}, align 4 +; CHECK-VF1IC2-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-VF1IC2-NEXT: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-VF1IC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[I_013]] +; CHECK-VF1IC2-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-VF1IC2-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP19]], 35 -; CHECK-VF1IC2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label %for.inc +; CHECK-VF1IC2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK-VF1IC2: if.then: -; CHECK-VF1IC2: [[TMP20:%.*]] = load i32, ptr {{%.*}}, align 4 +; CHECK-VF1IC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[I_013]] +; CHECK-VF1IC2-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 ; CHECK-VF1IC2-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP20]], 2 ; CHECK-VF1IC2-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]] -; CHECK-VF1IC2-NEXT: br label %for.inc +; CHECK-VF1IC2-NEXT: br label [[FOR_INC]] ; CHECK-VF1IC2: for.inc: -; CHECK-VF1IC2-NEXT: [[R_1]] = phi i32 [ [[R_012]], %for.body ], [ [[SPEC_SELECT]], %if.then ] -; CHECK-VF1IC2: br i1 {{%.*}}, label %for.end.loopexit, label %for.body +; CHECK-VF1IC2-NEXT: [[R_1]] = phi i32 [ [[R_012]], [[FOR_BODY]] ], [ [[SPEC_SELECT]], [[IF_THEN]] ] +; CHECK-VF1IC2-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1 +; CHECK-VF1IC2-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-VF1IC2-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-VF1IC2: for.end.loopexit: -; CHECK-VF1IC2-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], %for.inc ], [ [[RDX_SELECT]], %middle.block ] +; CHECK-VF1IC2-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], [[FOR_INC]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] ; CHECK-VF1IC2-NEXT: ret i32 [[R_1_LCSSA]] ; entry: @@ -139,3 +180,14 @@ for.end.loopexit: ; preds = %for.inc %r.1.lcssa = phi i32 [ %r.1, %for.inc ] ret i32 %r.1.lcssa } +;. +; CHECK-VF2IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-VF2IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-VF2IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-VF2IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;. +; CHECK-VF1IC2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-VF1IC2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-VF1IC2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-VF1IC2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll index 993b56a05207be..c9f2aaef6d5c8e 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll @@ -5,47 +5,45 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-LABEL: @select_const_i32_from_icmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> [[VEC_PHI]], <4 x i32> ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 ; CHECK-VF4IC4: vector.body: -; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] ; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <4 x i32> {{.*}}, ; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <4 x i32> {{.*}}, ; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <4 x i32> {{.*}}, ; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <4 x i32> {{.*}}, -; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor <4 x i1> [[VEC_ICMP1]], -; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor <4 x i1> [[VEC_ICMP2]], -; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor <4 x i1> [[VEC_ICMP3]], -; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor <4 x i1> [[VEC_ICMP4]], -; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = or <4 x i1> [[VEC_PHI1]], [[NOT1]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = or <4 x i1> [[VEC_PHI2]], [[NOT2]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or <4 x i1> [[VEC_PHI3]], [[NOT3]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = or <4 x i1> [[VEC_PHI4]], [[NOT4]] +; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = select <4 x i1> [[VEC_ICMP1]], <4 x i32> [[VEC_PHI1]], <4 x i32> +; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = select <4 x i1> [[VEC_ICMP2]], <4 x i32> [[VEC_PHI2]], <4 x i32> +; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = select <4 x i1> [[VEC_ICMP3]], <4 x i32> [[VEC_PHI3]], <4 x i32> +; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = select <4 x i1> [[VEC_ICMP4]], <4 x i32> [[VEC_PHI4]], <4 x i32> ; CHECK-VF4IC4: middle.block: -; CHECK-VF4IC4-NEXT: [[VEC_SEL5:%.*]] = or <4 x i1> [[VEC_SEL2]], [[VEC_SEL1]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL6:%.*]] = or <4 x i1> [[VEC_SEL3]], [[VEC_SEL5]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL7:%.*]] = or <4 x i1> [[VEC_SEL4]], [[VEC_SEL6]] -; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL7]]) -; CHECK-VF4IC4-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 +; CHECK-VF4IC4-NEXT: [[VEC_ICMP5:%.*]] = icmp ne <4 x i32> [[VEC_SEL1]], +; CHECK-VF4IC4-NEXT: [[VEC_SEL5:%.*]] = select <4 x i1> [[VEC_ICMP5]], <4 x i32> [[VEC_SEL1]], <4 x i32> [[VEC_SEL2]] +; CHECK-VF4IC4-NEXT: [[VEC_ICMP6:%.*]] = icmp ne <4 x i32> [[VEC_SEL5]], +; CHECK-VF4IC4-NEXT: [[VEC_SEL6:%.*]] = select <4 x i1> [[VEC_ICMP6]], <4 x i32> [[VEC_SEL5]], <4 x i32> [[VEC_SEL3]] +; CHECK-VF4IC4-NEXT: [[VEC_ICMP7:%.*]] = icmp ne <4 x i32> [[VEC_SEL6]], +; CHECK-VF4IC4-NEXT: [[VEC_SEL_FIN:%.*]] = select <4 x i1> [[VEC_ICMP7]], <4 x i32> [[VEC_SEL6]], <4 x i32> [[VEC_SEL4]] +; CHECK-VF4IC4-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL_FIN]], +; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) +; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 ; CHECK-VF1IC4: vector.body: -; CHECK-VF1IC4: [[VEC_PHI1:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI4:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] +; CHECK-VF1IC4: [[VEC_PHI1:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI4:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] ; CHECK-VF1IC4: [[VEC_LOAD1:%.*]] = load i32 ; CHECK-VF1IC4-NEXT: [[VEC_LOAD2:%.*]] = load i32 ; CHECK-VF1IC4-NEXT: [[VEC_LOAD3:%.*]] = load i32 @@ -54,20 +52,17 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF1IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq i32 [[VEC_LOAD2]], 3 ; CHECK-VF1IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq i32 [[VEC_LOAD3]], 3 ; CHECK-VF1IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq i32 [[VEC_LOAD4]], 3 -; CHECK-VF1IC4-NEXT: [[NOT1:%.*]] = xor i1 [[VEC_ICMP1]], true -; CHECK-VF1IC4-NEXT: [[NOT2:%.*]] = xor i1 [[VEC_ICMP2]], true -; CHECK-VF1IC4-NEXT: [[NOT3:%.*]] = xor i1 [[VEC_ICMP3]], true -; CHECK-VF1IC4-NEXT: [[NOT4:%.*]] = xor i1 [[VEC_ICMP4]], true -; CHECK-VF1IC4-NEXT: [[VEC_SEL1:%.*]] = or i1 [[VEC_PHI1]], [[NOT1]] -; CHECK-VF1IC4-NEXT: [[VEC_SEL2:%.*]] = or i1 [[VEC_PHI2]], [[NOT2]] -; CHECK-VF1IC4-NEXT: [[VEC_SEL3:%.*]] = or i1 [[VEC_PHI3]], [[NOT3]] -; CHECK-VF1IC4-NEXT: [[VEC_SEL4:%.*]] = or i1 [[VEC_PHI4]], [[NOT4]] +; CHECK-VF1IC4-NEXT: [[VEC_SEL1]] = select i1 [[VEC_ICMP1]], i32 [[VEC_PHI1]], i32 7 +; CHECK-VF1IC4-NEXT: [[VEC_SEL2]] = select i1 [[VEC_ICMP2]], i32 [[VEC_PHI2]], i32 7 +; CHECK-VF1IC4-NEXT: [[VEC_SEL3]] = select i1 [[VEC_ICMP3]], i32 [[VEC_PHI3]], i32 7 +; CHECK-VF1IC4-NEXT: [[VEC_SEL4]] = select i1 [[VEC_ICMP4]], i32 [[VEC_PHI4]], i32 7 ; CHECK-VF1IC4: middle.block: -; CHECK-VF1IC4-NEXT: [[VEC_SEL5:%.*]] = or i1 [[VEC_SEL2]], [[VEC_SEL1]] -; CHECK-VF1IC4-NEXT: [[VEC_SEL6:%.*]] = or i1 [[VEC_SEL3]], [[VEC_SEL5]] -; CHECK-VF1IC4-NEXT: [[OR_RDX:%.*]] = or i1 [[VEC_SEL4]], [[VEC_SEL6]] -; CHECK-VF1IC4-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF1IC4-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 +; CHECK-VF1IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp ne i32 [[VEC_SEL1]], 3 +; CHECK-VF1IC4-NEXT: [[VEC_SEL5:%.*]] = select i1 [[VEC_ICMP4]], i32 [[VEC_SEL1]], i32 [[VEC_SEL2]] +; CHECK-VF1IC4-NEXT: [[VEC_ICMP5:%.*]] = icmp ne i32 [[VEC_SEL5]], 3 +; CHECK-VF1IC4-NEXT: [[VEC_SEL6:%.*]] = select i1 [[VEC_ICMP5]], i32 [[VEC_SEL5]], i32 [[VEC_SEL3]] +; CHECK-VF1IC4-NEXT: [[VEC_ICMP6:%.*]] = icmp ne i32 [[VEC_SEL6]], 3 +; CHECK-VF1IC4-NEXT: {{.*}} = select i1 [[VEC_ICMP6]], i32 [[VEC_SEL6]], i32 [[VEC_SEL4]] entry: br label %for.body @@ -91,14 +86,14 @@ exit: ; preds = %for.body define i32 @select_const_i32_from_icmp2(ptr nocapture readonly %v, i64 %n) { ; CHECK-LABEL: @select_const_i32_from_icmp2 ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[VEC_ICMP]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> , <4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 entry: br label %for.body @@ -122,18 +117,21 @@ exit: ; preds = %for.body define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) { ; CHECK-LABEL: @select_i32_from_icmp ; CHECK-VF4IC1: vector.ph: -; CHECK-VF4IC1-NOT: shufflevector <4 x i32> -; CHECK-VF4IC1-NOT: shufflevector <4 x i32> +; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 +; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0 +; CHECK-VF4IC1-NEXT: [[SPLAT_OF_B:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> [[VEC_PHI]], <4 x i32> [[SPLAT_OF_B]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 %b, i32 %a +; CHECK-VF4IC1-NEXT: [[FIN_INS:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 +; CHECK-VF4IC1-NEXT: [[FIN_SPLAT:%.*]] = shufflevector <4 x i32> [[FIN_INS]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-VF4IC1-NEXT: [[FIN_CMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], [[FIN_SPLAT]] +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_CMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 %b, i32 %a entry: br label %for.body @@ -156,15 +154,14 @@ exit: ; preds = %for.body define i32 @select_const_i32_from_fcmp_fast(ptr nocapture readonly %v, i64 %n) { ; CHECK-LABEL: @select_const_i32_from_fcmp_fast ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x float> ; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp fast ueq <4 x float> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_FCMP]], <4 x i32> [[VEC_PHI]], <4 x i32> ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 1, i32 2 +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 2 entry: br label %for.body @@ -187,15 +184,14 @@ exit: ; preds = %for.body define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-LABEL: @select_const_i32_from_fcmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x float> ; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp ueq <4 x float> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_FCMP]], <4 x i32> [[VEC_PHI]], <4 x i32> ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 1, i32 2 +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 2 entry: br label %for.body @@ -220,16 +216,18 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) { ; CHECK-VF4IC1: vector.ph: ; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 ; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-VF4IC1-NOT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0 +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0 +; CHECK-VF4IC1-NEXT: [[SPLAT_OF_B:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] -; CHECK-VF4IC1: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[SPLAT_OF_A]], -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_PHI]], +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> [[VEC_PHI]], <4 x i32> [[SPLAT_OF_B]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 %b, i32 %a +; CHECK-VF4IC1-NEXT: [[FIN_INS:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 +; CHECK-VF4IC1-NEXT: [[FIN_SPLAT:%.*]] = shufflevector <4 x i32> [[FIN_INS]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-VF4IC1-NEXT: [[FIN_CMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], [[FIN_SPLAT]] +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_CMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 %b, i32 %a entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll index 55e61158a79c61..16ab45415b5cc5 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll @@ -8,25 +8,26 @@ define i64 @pr62565_incoming_value_known_undef(i64 %a, ptr %src) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ undef, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%*]] = xor <2 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP4]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_PHI]], <2 x i64> [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) -; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 undef +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i64> [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i64 [[A]], i64 undef ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] @@ -71,25 +72,26 @@ define i64 @pr62565_incoming_value_known_poison(i64 %a, ptr %src) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ poison, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP4]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_PHI]], <2 x i64> [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) -; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 poison +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i64> [[TMP4]], poison +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i64 [[A]], i64 poison ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] @@ -134,25 +136,30 @@ define i64 @pr62565_incoming_value_may_be_poison(i64 %a, ptr %src, i64 %start) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <2 x i64> [[MINMAX_IDENT_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP4]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_PHI]], <2 x i64> [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) -; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 [[START]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i64> [[TMP4]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i64 [[A]], i64 [[START]] ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll index a90b38c6a96056..fe98ca167a089e 100644 --- a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll @@ -9,6 +9,11 @@ ; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -force-vector-width=4 \ ; RUN: -force-target-supports-scalable-vectors -scalable-vectorization=on -S < %s | FileCheck --check-prefix=NO-VP %s +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -force-target-supports-scalable-vectors -scalable-vectorization=on -S < %s | FileCheck --check-prefix=NO-VP-DEF %s + ; The target does not support predicated vectorization. define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-LABEL: @foo( @@ -80,6 +85,54 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP: for.cond.cleanup: ; NO-VP-NEXT: ret void ; +; NO-VP-DEF-LABEL: @foo( +; NO-VP-DEF-NEXT: entry: +; NO-VP-DEF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-DEF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP0]] +; NO-VP-DEF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; NO-VP-DEF: vector.ph: +; NO-VP-DEF-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-DEF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]] +; NO-VP-DEF-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; NO-VP-DEF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-DEF-NEXT: br label [[VECTOR_BODY:%.*]] +; NO-VP-DEF: vector.body: +; NO-VP-DEF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; NO-VP-DEF-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; NO-VP-DEF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP3]] +; NO-VP-DEF-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +; NO-VP-DEF-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; NO-VP-DEF-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP3]] +; NO-VP-DEF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 +; NO-VP-DEF-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP7]], align 4 +; NO-VP-DEF-NEXT: [[TMP8:%.*]] = add nsw [[WIDE_LOAD1]], [[WIDE_LOAD]] +; NO-VP-DEF-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP3]] +; NO-VP-DEF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 +; NO-VP-DEF-NEXT: store [[TMP8]], ptr [[TMP10]], align 4 +; NO-VP-DEF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] +; NO-VP-DEF-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-DEF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP-DEF: middle.block: +; NO-VP-DEF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; NO-VP-DEF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; NO-VP-DEF: scalar.ph: +; NO-VP-DEF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; NO-VP-DEF-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP-DEF: for.body: +; NO-VP-DEF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; NO-VP-DEF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; NO-VP-DEF-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-DEF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; NO-VP-DEF-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; NO-VP-DEF-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] +; NO-VP-DEF-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; NO-VP-DEF-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; NO-VP-DEF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-VP-DEF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; NO-VP-DEF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; NO-VP-DEF: for.cond.cleanup: +; NO-VP-DEF-NEXT: ret void +; entry: br label %for.body diff --git a/llvm/test/Transforms/PhaseOrdering/thread-uncond-bb.ll b/llvm/test/Transforms/PhaseOrdering/thread-uncond-bb.ll deleted file mode 100644 index 17146d7d5987fc..00000000000000 --- a/llvm/test/Transforms/PhaseOrdering/thread-uncond-bb.ll +++ /dev/null @@ -1,62 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt < %s -O3 -S | FileCheck %s - -define i32 @thread_uncond_bb_cmp(i1 %c, i32 %v) { -; CHECK-LABEL: define i32 @thread_uncond_bb_cmp( -; CHECK-SAME: i1 [[C:%.*]], i32 [[V:%.*]]) local_unnamed_addr { -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C]], label [[DO_END:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: tail call void @dummy() -; CHECK-NEXT: br label [[DO_END]] -; CHECK: return: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[V]], [[IF_THEN]] ] -; CHECK-NEXT: ret i32 [[RETVAL]] -; -entry: - br i1 %c, label %do.end, label %if.then - -if.then: ; preds = %entry - call void @dummy() - %tobool = icmp eq i32 %v, 0 - br i1 %tobool, label %do.end, label %return - -do.end: ; preds = %entry, %if.then - br label %return - -return: ; preds = %if.then, %do.end - %retval = phi i32 [ 0, %do.end ], [ %v, %if.then ] - ret i32 %retval -} - -define i32 @thread_uncond_bb_cmp_zext(i1 %c, i32 %v) { -; CHECK-LABEL: define i32 @thread_uncond_bb_cmp_zext( -; CHECK-SAME: i1 [[C:%.*]], i32 [[V:%.*]]) local_unnamed_addr { -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C]], label [[DO_END:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: tail call void @dummy() -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[V]], 0 -; CHECK-NEXT: [[SPEC_SELECT:%.*]] = zext i1 [[TOBOOL]] to i32 -; CHECK-NEXT: br label [[DO_END]] -; CHECK: return: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[IF_THEN]] ] -; CHECK-NEXT: ret i32 [[RETVAL]] -; -entry: - br i1 %c, label %do.end, label %if.then - -if.then: ; preds = %entry - call void @dummy() - %tobool = icmp eq i32 %v, 0 - br i1 %tobool, label %do.end, label %return - -do.end: ; preds = %entry, %if.then - br label %return - -return: ; preds = %if.then, %do.end - %retval = phi i32 [ 0, %do.end ], [ 1, %if.then ] - ret i32 %retval -} - -declare void @dummy() diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/smax-unsigned-operand.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/smax-unsigned-operand.ll new file mode 100644 index 00000000000000..5db148ac1b4855 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/smax-unsigned-operand.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64-unknown-linux-gnu" + +@e = global [2 x i8] zeroinitializer + +define void @main(ptr noalias %p) { +; CHECK-LABEL: define void @main( +; CHECK-SAME: ptr noalias [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[CONV_4:%.*]] = zext i32 0 to i64 +; CHECK-NEXT: [[COND_4:%.*]] = tail call i64 @llvm.smax.i64(i64 [[CONV_4]], i64 0) +; CHECK-NEXT: [[CONV5_4:%.*]] = trunc i64 [[COND_4]] to i8 +; CHECK-NEXT: store i8 [[CONV5_4]], ptr getelementptr inbounds ([11 x i8], ptr @e, i64 0, i64 4), align 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[CONV_5:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[COND_5:%.*]] = tail call i64 @llvm.smax.i64(i64 [[CONV_5]], i64 1) +; CHECK-NEXT: [[CONV5_5:%.*]] = trunc i64 [[COND_5]] to i8 +; CHECK-NEXT: store i8 [[CONV5_5]], ptr getelementptr inbounds ([11 x i8], ptr @e, i64 0, i64 5), align 1 +; CHECK-NEXT: ret void +; +bb: + %conv.4 = zext i32 0 to i64 + %cond.4 = tail call i64 @llvm.smax.i64(i64 %conv.4, i64 0) + %conv5.4 = trunc i64 %cond.4 to i8 + store i8 %conv5.4, ptr getelementptr inbounds ([11 x i8], ptr @e, i64 0, i64 4), align 1 + %0 = load i32, ptr %p, align 4 + %conv.5 = zext i32 %0 to i64 + %cond.5 = tail call i64 @llvm.smax.i64(i64 %conv.5, i64 1) + %conv5.5 = trunc i64 %cond.5 to i8 + store i8 %conv5.5, ptr getelementptr inbounds ([11 x i8], ptr @e, i64 0, i64 5), align 1 + ret void +} + diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll index 096f57d100a50f..c600d75ed1e8c4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll @@ -13,7 +13,7 @@ define i32 @fn1() { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 11 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64> ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[TMP4]], align 8 ; CHECK-NEXT: ret i32 undef @@ -92,7 +92,7 @@ define void @externally_used_ptrs() { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 11 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP5]], [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll index aa679743583064..e459cd8c6955b0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll @@ -13,25 +13,26 @@ define dso_local i32 @g() local_unnamed_addr { ; CHECK: while.body: ; CHECK-NEXT: [[C_022:%.*]] = phi ptr [ [[C_022_BE:%.*]], [[WHILE_BODY_BACKEDGE:%.*]] ], [ undef, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x ptr> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ undef, [[ENTRY]] ] -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[C_022]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP9]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> ; CHECK-NEXT: switch i32 [[TMP3]], label [[WHILE_BODY_BACKEDGE]] [ -; CHECK-NEXT: i32 2, label [[SW_BB:%.*]] -; CHECK-NEXT: i32 4, label [[SW_BB6:%.*]] +; CHECK-NEXT: i32 2, label [[SW_BB:%.*]] +; CHECK-NEXT: i32 4, label [[SW_BB6:%.*]] ; CHECK-NEXT: ] ; CHECK: sw.bb: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP4]], i32 1 -; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 2 +; CHECK-NEXT: store i32 [[TMP7]], ptr [[INCDEC_PTR1]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> ; CHECK-NEXT: br label [[WHILE_BODY_BACKEDGE]] ; CHECK: sw.bb6: ; CHECK-NEXT: [[INCDEC_PTR8:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 2 +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 1 ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[INCDEC_PTR]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> @@ -39,7 +40,7 @@ define dso_local i32 @g() local_unnamed_addr { ; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP13]], align 4 ; CHECK-NEXT: br label [[WHILE_BODY_BACKEDGE]] ; CHECK: while.body.backedge: -; CHECK-NEXT: [[C_022_BE]] = phi ptr [ [[INCDEC_PTR]], [[WHILE_BODY]] ], [ [[INCDEC_PTR8]], [[SW_BB6]] ], [ [[INCDEC_PTR5]], [[SW_BB]] ] +; CHECK-NEXT: [[C_022_BE]] = phi ptr [ [[INCDEC_PTR1]], [[WHILE_BODY]] ], [ [[INCDEC_PTR8]], [[SW_BB6]] ], [ [[INCDEC_PTR5]], [[SW_BB]] ] ; CHECK-NEXT: [[TMP14]] = phi <2 x ptr> [ [[TMP4]], [[WHILE_BODY]] ], [ [[TMP12]], [[SW_BB6]] ], [ [[TMP8]], [[SW_BB]] ] ; CHECK-NEXT: br label [[WHILE_BODY]] ; CHECK: while.end: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll index 3801fa5c787b6d..c40be9690cce1d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll @@ -52,17 +52,14 @@ define void @test(ptr %r, ptr %p, ptr %q) #0 { define void @test2(ptr %a, ptr %b) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 1 -; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 2 -; CHECK-NEXT: [[I1:%.*]] = ptrtoint ptr [[A1]] to i64 -; CHECK-NEXT: [[B3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 3 -; CHECK-NEXT: [[I2:%.*]] = ptrtoint ptr [[B3]] to i64 -; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A1]], align 8 -; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8 -; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[I1]], [[V1]] -; CHECK-NEXT: [[ADD2:%.*]] = add i64 [[I2]], [[V2]] -; CHECK-NEXT: store i64 [[ADD1]], ptr [[A1]], align 8 -; CHECK-NEXT: store i64 [[ADD2]], ptr [[A2]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> +; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[A1]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP4]], [[TMP5]] +; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr [[A1]], align 8 ; CHECK-NEXT: ret void ; %a1 = getelementptr inbounds i64, ptr %a, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll index ddc2a1b819041f..30f328293cdaa3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll @@ -9,7 +9,7 @@ define void @"foo"(ptr addrspace(1) %0, ptr addrspace(1) %1) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr addrspace(1)> poison, ptr addrspace(1) [[TMP0:%.*]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr addrspace(1)> [[TMP3]], <4 x ptr addrspace(1)> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <4 x ptr addrspace(1)> [[TMP4]], <4 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr addrspace(1)> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 8 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1:%.*]], i64 8 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p1(<4 x ptr addrspace(1)> [[TMP5]], i32 4, <4 x i1> , <4 x float> poison) ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll index 0125e5fab089b2..e93c5244dfbe2c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll @@ -35,7 +35,7 @@ define void @allocas(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1 ; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8 ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 ; CHECK-NEXT: ret void @@ -127,7 +127,7 @@ define void @stacksave2(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1 ; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8 ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/trunc-store-value-ty-not-power-of-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/trunc-store-value-ty-not-power-of-2.ll index 81b4ee40e7fdf3..2f0fad70b593b5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/trunc-store-value-ty-not-power-of-2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/trunc-store-value-ty-not-power-of-2.ll @@ -107,3 +107,36 @@ define void @test_4_trunc_i24_to_i16(i24 %x, ptr %A) { store i16 %t, ptr %gep.3, align 1 ret void } + +%struct.d = type { [3 x i8], [3 x i8], [2 x i8] } + +; Test case for https://github.com/llvm/llvm-project/issues/88640. +define void @test_access_i24_directly(ptr %src, ptr noalias %dst) "target-cpu"="btver2" { +; CHECK-LABEL: define void @test_access_i24_directly( +; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[SRC]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i24 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds [[STRUCT_D:%.*]], ptr [[SRC]], i64 0, i32 1 +; CHECK-NEXT: [[BF_LOAD:%.*]] = load i24, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i24 [[TMP1]], 8388607 +; CHECK-NEXT: [[BF_CLEAR:%.*]] = and i24 [[BF_LOAD]], -8388608 +; CHECK-NEXT: [[BF_SET:%.*]] = or disjoint i24 [[BF_CLEAR]], [[BF_VALUE]] +; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds [[STRUCT_D]], ptr [[DST]], i64 0, i32 1 +; CHECK-NEXT: store i24 [[BF_SET]], ptr [[GEP_DST]], align 1 +; CHECK-NEXT: store i24 0, ptr [[DST]], align 8 +; CHECK-NEXT: ret void +; +entry: + %0 = load i64, ptr %src, align 8 + %1 = trunc i64 %0 to i24 + %gep.src = getelementptr inbounds %struct.d, ptr %src, i64 0, i32 1 + %bf.load = load i24, ptr %gep.src, align 1 + %bf.value = and i24 %1, 8388607 + %bf.clear = and i24 %bf.load, -8388608 + %bf.set = or disjoint i24 %bf.clear, %bf.value + %gep.dst = getelementptr inbounds %struct.d, ptr %dst, i64 0, i32 1 + store i24 %bf.set, ptr %gep.dst, align 1 + store i24 0, ptr %dst, align 8 + ret void +} diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll new file mode 100644 index 00000000000000..e2ff343944cf2a --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll @@ -0,0 +1,204 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX + +declare void @use(<4 x i32>) + +; Shuffle is much cheaper than fdiv. FMF are intersected. + +define <4 x float> @shuf_fdiv_v4f32_yy(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; CHECK-LABEL: define <4 x float> @shuf_fdiv_v4f32_yy( +; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Z]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fdiv arcp <4 x float> [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret <4 x float> [[R]] +; + %b0 = fdiv fast <4 x float> %x, %y + %b1 = fdiv arcp <4 x float> %z, %y + %r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> + ret <4 x float> %r +} + +; Common operand is op0 of the binops. + +define <4 x i32> @shuf_add_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: define <4 x i32> @shuf_add_v4i32_xx( +; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[Z]], <4 x i32> +; CHECK-NEXT: [[R2:%.*]] = add <4 x i32> [[TMP1]], [[R1]] +; CHECK-NEXT: ret <4 x i32> [[R2]] +; + %b0 = add <4 x i32> %x, %y + %b1 = add <4 x i32> %x, %z + %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> + ret <4 x i32> %r +} + +; For commutative instructions, common operand may be swapped. + +define <4 x float> @shuf_fmul_v4f32_xx_swap(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; CHECK-LABEL: define <4 x float> @shuf_fmul_v4f32_xx_swap( +; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[R]] +; + %b0 = fmul <4 x float> %x, %y + %b1 = fmul <4 x float> %z, %x + %r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> + ret <4 x float> %r +} + +; For commutative instructions, common operand may be swapped. + +define <2 x i64> @shuf_and_v2i64_yy_swap(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { +; CHECK-LABEL: define <2 x i64> @shuf_and_v2i64_yy_swap( +; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[Y]], <2 x i64> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Z]], <2 x i32> +; CHECK-NEXT: [[R:%.*]] = and <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x i64> [[R]] +; + %b0 = and <2 x i64> %x, %y + %b1 = and <2 x i64> %y, %z + %r = shufflevector <2 x i64> %b0, <2 x i64> %b1, <2 x i32> + ret <2 x i64> %r +} + +; non-commutative binop, but common op0 + +define <4 x i32> @shuf_shl_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: define <4 x i32> @shuf_shl_v4i32_xx( +; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[Z]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shl <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %b0 = shl <4 x i32> %x, %y + %b1 = shl <4 x i32> %x, %z + %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> + ret <4 x i32> %r +} + +; negative test - common operand, but not commutable + +define <4 x i32> @shuf_shl_v4i32_xx_swap(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: define <4 x i32> @shuf_shl_v4i32_xx_swap( +; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:%.*]] = shl <4 x i32> [[X]], [[Y]] +; CHECK-NEXT: [[B1:%.*]] = shl <4 x i32> [[Z]], [[X]] +; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[R1]] +; + %b0 = shl <4 x i32> %x, %y + %b1 = shl <4 x i32> %z, %x + %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> + ret <4 x i32> %r +} + +; negative test - mismatched opcodes + +define <2 x i64> @shuf_sub_add_v2i64_yy(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { +; CHECK-LABEL: define <2 x i64> @shuf_sub_add_v2i64_yy( +; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:%.*]] = sub <2 x i64> [[X]], [[Y]] +; CHECK-NEXT: [[B1:%.*]] = add <2 x i64> [[Z]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i64> [[B0]], <2 x i64> [[B1]], <2 x i32> +; CHECK-NEXT: ret <2 x i64> [[R]] +; + %b0 = sub <2 x i64> %x, %y + %b1 = add <2 x i64> %z, %y + %r = shufflevector <2 x i64> %b0, <2 x i64> %b1, <2 x i32> + ret <2 x i64> %r +} + +; negative test - type change via shuffle + +define <8 x float> @shuf_fmul_v4f32_xx_type(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; CHECK-LABEL: define <8 x float> @shuf_fmul_v4f32_xx_type( +; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:%.*]] = fmul <4 x float> [[X]], [[Y]] +; CHECK-NEXT: [[B1:%.*]] = fmul <4 x float> [[Z]], [[X]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[R]] +; + %b0 = fmul <4 x float> %x, %y + %b1 = fmul <4 x float> %z, %x + %r = shufflevector <4 x float> %b0, <4 x float> %b1, <8 x i32> + ret <8 x float> %r +} + +; negative test - uses + +define <4 x i32> @shuf_lshr_v4i32_yy_use1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: define <4 x i32> @shuf_lshr_v4i32_yy_use1( +; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:%.*]] = lshr <4 x i32> [[X]], [[Y]] +; CHECK-NEXT: call void @use(<4 x i32> [[B0]]) +; CHECK-NEXT: [[B1:%.*]] = lshr <4 x i32> [[Z]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %b0 = lshr <4 x i32> %x, %y + call void @use(<4 x i32> %b0) + %b1 = lshr <4 x i32> %z, %y + %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> + ret <4 x i32> %r +} + +; negative test - uses + +define <4 x i32> @shuf_mul_v4i32_yy_use2(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: define <4 x i32> @shuf_mul_v4i32_yy_use2( +; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:%.*]] = mul <4 x i32> [[X]], [[Y]] +; CHECK-NEXT: [[B1:%.*]] = mul <4 x i32> [[Z]], [[Y]] +; CHECK-NEXT: call void @use(<4 x i32> [[B1]]) +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %b0 = mul <4 x i32> %x, %y + %b1 = mul <4 x i32> %z, %y + call void @use(<4 x i32> %b1) + %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> + ret <4 x i32> %r +} + +; negative test - must have matching operand + +define <4 x float> @shuf_fadd_v4f32_no_common_op(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) { +; CHECK-LABEL: define <4 x float> @shuf_fadd_v4f32_no_common_op( +; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]], <4 x float> [[W:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:%.*]] = fadd <4 x float> [[X]], [[Y]] +; CHECK-NEXT: [[B1:%.*]] = fadd <4 x float> [[Z]], [[W]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[R]] +; + %b0 = fadd <4 x float> %x, %y + %b1 = fadd <4 x float> %z, %w + %r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> + ret <4 x float> %r +} + +; negative test - binops may be relatively cheap + +define <16 x i16> @shuf_and_v16i16_yy_expensive_shuf(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { +; CHECK-LABEL: define <16 x i16> @shuf_and_v16i16_yy_expensive_shuf( +; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]], <16 x i16> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:%.*]] = and <16 x i16> [[X]], [[Y]] +; CHECK-NEXT: [[B1:%.*]] = and <16 x i16> [[Y]], [[Z]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <16 x i16> [[B0]], <16 x i16> [[B1]], <16 x i32> +; CHECK-NEXT: ret <16 x i16> [[R]] +; + %b0 = and <16 x i16> %x, %y + %b1 = and <16 x i16> %y, %z + %r = shufflevector <16 x i16> %b0, <16 x i16> %b1, <16 x i32> + ret <16 x i16> %r +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} +; SSE: {{.*}} diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll index 7d9f7e390b9c04..3a5d2095e2b93b 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll @@ -1,12 +1,13 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX ; standard vector concatenations define <16 x i32> @concat_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: @concat_zext_v8i16_v16i32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> +; CHECK-LABEL: define <16 x i32> @concat_zext_v8i16_v16i32( +; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> ; CHECK-NEXT: [[R:%.*]] = zext <16 x i16> [[TMP1]] to <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[R]] ; @@ -17,8 +18,9 @@ define <16 x i32> @concat_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { } define <16 x i32> @concat_zext_nneg_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: @concat_zext_nneg_v8i16_v16i32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> +; CHECK-LABEL: define <16 x i32> @concat_zext_nneg_v8i16_v16i32( +; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> ; CHECK-NEXT: [[R:%.*]] = zext nneg <16 x i16> [[TMP1]] to <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[R]] ; @@ -29,14 +31,16 @@ define <16 x i32> @concat_zext_nneg_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { } define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32(<8 x i16> %a0, <8 x i16> %a1) { -; SSE-LABEL: @concat_sext_zext_nneg_v8i16_v8i32( -; SSE-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32> -; SSE-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1:%.*]] to <8 x i32> +; SSE-LABEL: define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32( +; SSE-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0]] to <8 x i32> +; SSE-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1]] to <8 x i32> ; SSE-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> ; SSE-NEXT: ret <16 x i32> [[R]] ; -; AVX-LABEL: @concat_sext_zext_nneg_v8i16_v8i32( -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> +; AVX-LABEL: define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32( +; AVX-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> ; AVX-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32> ; AVX-NEXT: ret <16 x i32> [[R]] ; @@ -47,8 +51,9 @@ define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32(<8 x i16> %a0, <8 x i16> %a } define <16 x i32> @concat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: @concat_sext_v8i16_v16i32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> +; CHECK-LABEL: define <16 x i32> @concat_sext_v8i16_v16i32( +; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> ; CHECK-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[R]] ; @@ -59,8 +64,9 @@ define <16 x i32> @concat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { } define <8 x i32> @concat_sext_v4i1_v8i32(<4 x i1> %a0, <4 x i1> %a1) { -; CHECK-LABEL: @concat_sext_v4i1_v8i32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A0:%.*]], <4 x i1> [[A1:%.*]], <8 x i32> +; CHECK-LABEL: define <8 x i32> @concat_sext_v4i1_v8i32( +; CHECK-SAME: <4 x i1> [[A0:%.*]], <4 x i1> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A0]], <4 x i1> [[A1]], <8 x i32> ; CHECK-NEXT: [[R:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; @@ -71,8 +77,9 @@ define <8 x i32> @concat_sext_v4i1_v8i32(<4 x i1> %a0, <4 x i1> %a1) { } define <8 x i16> @concat_trunc_v4i32_v8i16(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: @concat_trunc_v4i32_v8i16( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> +; CHECK-LABEL: define <8 x i16> @concat_trunc_v4i32_v8i16( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[A1]], <8 x i32> ; CHECK-NEXT: [[R:%.*]] = trunc <8 x i32> [[TMP1]] to <8 x i16> ; CHECK-NEXT: ret <8 x i16> [[R]] ; @@ -83,8 +90,9 @@ define <8 x i16> @concat_trunc_v4i32_v8i16(<4 x i32> %a0, <4 x i32> %a1) { } define <8 x ptr> @concat_inttoptr_v4i32_v8iptr(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: @concat_inttoptr_v4i32_v8iptr( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> +; CHECK-LABEL: define <8 x ptr> @concat_inttoptr_v4i32_v8iptr( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[A1]], <8 x i32> ; CHECK-NEXT: [[R:%.*]] = inttoptr <8 x i32> [[TMP1]] to <8 x ptr> ; CHECK-NEXT: ret <8 x ptr> [[R]] ; @@ -95,8 +103,9 @@ define <8 x ptr> @concat_inttoptr_v4i32_v8iptr(<4 x i32> %a0, <4 x i32> %a1) { } define <16 x i64> @concat_ptrtoint_v8i16_v16i32(<8 x ptr> %a0, <8 x ptr> %a1) { -; CHECK-LABEL: @concat_ptrtoint_v8i16_v16i32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[A0:%.*]], <8 x ptr> [[A1:%.*]], <16 x i32> +; CHECK-LABEL: define <16 x i64> @concat_ptrtoint_v8i16_v16i32( +; CHECK-SAME: <8 x ptr> [[A0:%.*]], <8 x ptr> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[A0]], <8 x ptr> [[A1]], <16 x i32> ; CHECK-NEXT: [[R:%.*]] = ptrtoint <16 x ptr> [[TMP1]] to <16 x i64> ; CHECK-NEXT: ret <16 x i64> [[R]] ; @@ -107,14 +116,16 @@ define <16 x i64> @concat_ptrtoint_v8i16_v16i32(<8 x ptr> %a0, <8 x ptr> %a1) { } define <8 x double> @concat_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) { -; SSE-LABEL: @concat_fpext_v4f32_v8f64( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <8 x i32> +; SSE-LABEL: define <8 x double> @concat_fpext_v4f32_v8f64( +; SSE-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> ; SSE-NEXT: [[R:%.*]] = fpext <8 x float> [[TMP1]] to <8 x double> ; SSE-NEXT: ret <8 x double> [[R]] ; -; AVX-LABEL: @concat_fpext_v4f32_v8f64( -; AVX-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0:%.*]] to <4 x double> -; AVX-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1:%.*]] to <4 x double> +; AVX-LABEL: define <8 x double> @concat_fpext_v4f32_v8f64( +; AVX-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0]] to <4 x double> +; AVX-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1]] to <4 x double> ; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X1]], <8 x i32> ; AVX-NEXT: ret <8 x double> [[R]] ; @@ -125,9 +136,10 @@ define <8 x double> @concat_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) } define <16 x float> @concat_fptrunc_v8f64_v16f32(<8 x double> %a0, <8 x double> %a1) { -; CHECK-LABEL: @concat_fptrunc_v8f64_v16f32( -; CHECK-NEXT: [[X0:%.*]] = fptrunc <8 x double> [[A0:%.*]] to <8 x float> -; CHECK-NEXT: [[X1:%.*]] = fptrunc <8 x double> [[A1:%.*]] to <8 x float> +; CHECK-LABEL: define <16 x float> @concat_fptrunc_v8f64_v16f32( +; CHECK-SAME: <8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X0:%.*]] = fptrunc <8 x double> [[A0]] to <8 x float> +; CHECK-NEXT: [[X1:%.*]] = fptrunc <8 x double> [[A1]] to <8 x float> ; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x float> [[X0]], <8 x float> [[X1]], <16 x i32> ; CHECK-NEXT: ret <16 x float> [[R]] ; @@ -140,8 +152,9 @@ define <16 x float> @concat_fptrunc_v8f64_v16f32(<8 x double> %a0, <8 x double> ; commuted vector concatenation define <16 x i32> @rconcat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: @rconcat_sext_v8i16_v16i32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> +; CHECK-LABEL: define <16 x i32> @rconcat_sext_v8i16_v16i32( +; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> ; CHECK-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[R]] ; @@ -154,8 +167,9 @@ define <16 x i32> @rconcat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { ; interleaved shuffle define <8 x double> @interleave_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) { -; CHECK-LABEL: @interleave_fpext_v4f32_v8f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <8 x i32> +; CHECK-LABEL: define <8 x double> @interleave_fpext_v4f32_v8f64( +; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> ; CHECK-NEXT: [[R:%.*]] = fpext <8 x float> [[TMP1]] to <8 x double> ; CHECK-NEXT: ret <8 x double> [[R]] ; @@ -168,8 +182,9 @@ define <8 x double> @interleave_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> % ; bitcasts (same element count) define <8 x float> @concat_bitcast_v4i32_v8f32(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: @concat_bitcast_v4i32_v8f32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> +; CHECK-LABEL: define <8 x float> @concat_bitcast_v4i32_v8f32( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[A1]], <8 x i32> ; CHECK-NEXT: [[R:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float> ; CHECK-NEXT: ret <8 x float> [[R]] ; @@ -182,8 +197,9 @@ define <8 x float> @concat_bitcast_v4i32_v8f32(<4 x i32> %a0, <4 x i32> %a1) { ; bitcasts (lower element count) define <4 x double> @concat_bitcast_v8i16_v4f64(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: @concat_bitcast_v8i16_v4f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> +; CHECK-LABEL: define <4 x double> @concat_bitcast_v8i16_v4f64( +; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> ; CHECK-NEXT: [[R:%.*]] = bitcast <16 x i16> [[TMP1]] to <4 x double> ; CHECK-NEXT: ret <4 x double> [[R]] ; @@ -196,8 +212,9 @@ define <4 x double> @concat_bitcast_v8i16_v4f64(<8 x i16> %a0, <8 x i16> %a1) { ; bitcasts (higher element count) define <16 x i16> @concat_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: @concat_bitcast_v4i32_v16i16( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> +; CHECK-LABEL: define <16 x i16> @concat_bitcast_v4i32_v16i16( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[A1]], <8 x i32> ; CHECK-NEXT: [[R:%.*]] = bitcast <8 x i32> [[TMP1]] to <16 x i16> ; CHECK-NEXT: ret <16 x i16> [[R]] ; @@ -210,11 +227,12 @@ define <16 x i16> @concat_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) { ; negative - multiuse define <8 x i16> @concat_trunc_v4i32_v8i16_multiuse(<4 x i32> %a0, <4 x i32> %a1, ptr %a2) { -; CHECK-LABEL: @concat_trunc_v4i32_v8i16_multiuse( -; CHECK-NEXT: [[X0:%.*]] = trunc <4 x i32> [[A0:%.*]] to <4 x i16> -; CHECK-NEXT: [[X1:%.*]] = trunc <4 x i32> [[A1:%.*]] to <4 x i16> +; CHECK-LABEL: define <8 x i16> @concat_trunc_v4i32_v8i16_multiuse( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], ptr [[A2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X0:%.*]] = trunc <4 x i32> [[A0]] to <4 x i16> +; CHECK-NEXT: [[X1:%.*]] = trunc <4 x i32> [[A1]] to <4 x i16> ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[X0]], <4 x i16> [[X1]], <8 x i32> -; CHECK-NEXT: store <4 x i16> [[X0]], ptr [[A2:%.*]], align 8 +; CHECK-NEXT: store <4 x i16> [[X0]], ptr [[A2]], align 8 ; CHECK-NEXT: ret <8 x i16> [[R]] ; %x0 = trunc <4 x i32> %a0 to <4 x i16> @@ -227,9 +245,10 @@ define <8 x i16> @concat_trunc_v4i32_v8i16_multiuse(<4 x i32> %a0, <4 x i32> %a1 ; negative - bitcasts (unscalable higher element count) define <16 x i16> @revpair_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: @revpair_bitcast_v4i32_v16i16( -; CHECK-NEXT: [[X0:%.*]] = bitcast <4 x i32> [[A0:%.*]] to <8 x i16> -; CHECK-NEXT: [[X1:%.*]] = bitcast <4 x i32> [[A1:%.*]] to <8 x i16> +; CHECK-LABEL: define <16 x i16> @revpair_bitcast_v4i32_v16i16( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X0:%.*]] = bitcast <4 x i32> [[A0]] to <8 x i16> +; CHECK-NEXT: [[X1:%.*]] = bitcast <4 x i32> [[A1]] to <8 x i16> ; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[X0]], <8 x i16> [[X1]], <16 x i32> ; CHECK-NEXT: ret <16 x i16> [[R]] ; @@ -242,9 +261,10 @@ define <16 x i16> @revpair_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) { ; negative - bitcasts (unscalable element counts) define <4 x i32> @shuffle_bitcast_v32i40_v4i32(<32 x i40> %a0, <32 x i40> %a1) { -; CHECK-LABEL: @shuffle_bitcast_v32i40_v4i32( -; CHECK-NEXT: [[X0:%.*]] = bitcast <32 x i40> [[A0:%.*]] to <40 x i32> -; CHECK-NEXT: [[X1:%.*]] = bitcast <32 x i40> [[A1:%.*]] to <40 x i32> +; CHECK-LABEL: define <4 x i32> @shuffle_bitcast_v32i40_v4i32( +; CHECK-SAME: <32 x i40> [[A0:%.*]], <32 x i40> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X0:%.*]] = bitcast <32 x i40> [[A0]] to <40 x i32> +; CHECK-NEXT: [[X1:%.*]] = bitcast <32 x i40> [[A1]] to <40 x i32> ; CHECK-NEXT: [[R:%.*]] = shufflevector <40 x i32> [[X0]], <40 x i32> [[X1]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; @@ -257,9 +277,10 @@ define <4 x i32> @shuffle_bitcast_v32i40_v4i32(<32 x i40> %a0, <32 x i40> %a1) { ; negative - src type mismatch define <8 x i32> @concat_sext_v4i8_v4i16_v8i32(<4 x i8> %a0, <4 x i16> %a1) { -; CHECK-LABEL: @concat_sext_v4i8_v4i16_v8i32( -; CHECK-NEXT: [[X0:%.*]] = sext <4 x i8> [[A0:%.*]] to <4 x i32> -; CHECK-NEXT: [[X1:%.*]] = sext <4 x i16> [[A1:%.*]] to <4 x i32> +; CHECK-LABEL: define <8 x i32> @concat_sext_v4i8_v4i16_v8i32( +; CHECK-SAME: <4 x i8> [[A0:%.*]], <4 x i16> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X0:%.*]] = sext <4 x i8> [[A0]] to <4 x i32> +; CHECK-NEXT: [[X1:%.*]] = sext <4 x i16> [[A1]] to <4 x i32> ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[X0]], <4 x i32> [[X1]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; @@ -272,9 +293,10 @@ define <8 x i32> @concat_sext_v4i8_v4i16_v8i32(<4 x i8> %a0, <4 x i16> %a1) { ; negative - castop mismatch define <16 x i32> @concat_sext_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: @concat_sext_zext_v8i16_v16i32( -; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32> -; CHECK-NEXT: [[X1:%.*]] = zext <8 x i16> [[A1:%.*]] to <8 x i32> +; CHECK-LABEL: define <16 x i32> @concat_sext_zext_v8i16_v16i32( +; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0]] to <8 x i32> +; CHECK-NEXT: [[X1:%.*]] = zext <8 x i16> [[A1]] to <8 x i32> ; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[R]] ; diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll new file mode 100644 index 00000000000000..b5b5bb997c6c7a --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s + +; TODO: fold to identity + +define <8 x i32> @concat_extract_subvectors(<8 x i32> %x) { +; CHECK-LABEL: define <8 x i32> @concat_extract_subvectors( +; CHECK-SAME: <8 x i32> [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[LO:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[HI:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <4 x i32> [[LO]], <4 x i32> [[HI]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[CONCAT]] +; + %lo = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> + %hi = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> + %concat = shufflevector <4 x i32> %lo, <4 x i32> %hi, <8 x i32> + ret <8 x i32> %concat +} + +; negative test - shuffle contains undef + +define <8 x i32> @concat_extract_subvectors_undef(<8 x i32> %x) { +; CHECK-LABEL: define <8 x i32> @concat_extract_subvectors_undef( +; CHECK-SAME: <8 x i32> [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LO:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> undef, <4 x i32> +; CHECK-NEXT: [[HI:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> undef, <4 x i32> +; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <4 x i32> [[LO]], <4 x i32> [[HI]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[CONCAT]] +; + %lo = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> + %hi = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> + %concat = shufflevector <4 x i32> %lo, <4 x i32> %hi, <8 x i32> + ret <8 x i32> %concat +} + +; negative test - shuffle contains poision + +define <8 x i32> @concat_extract_subvectors_poison(<8 x i32> %x) { +; CHECK-LABEL: define <8 x i32> @concat_extract_subvectors_poison( +; CHECK-SAME: <8 x i32> [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LO:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[HI:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <4 x i32> [[LO]], <4 x i32> [[HI]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[CONCAT]] +; + %lo = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> + %hi = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> + %concat = shufflevector <4 x i32> %lo, <4 x i32> %hi, <8 x i32> + ret <8 x i32> %concat +} diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll index 8337bb37bc549d..c8c9aa161ae289 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll @@ -1,17 +1,21 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX +declare void @use(<4 x i32>) + ; x86 does not have a cheap v16i8 shuffle until SSSE3 (pshufb) define <16 x i8> @bitcast_shuf_narrow_element(<4 x i32> %v) { -; SSE-LABEL: @bitcast_shuf_narrow_element( -; SSE-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> +; SSE-LABEL: define <16 x i8> @bitcast_shuf_narrow_element( +; SSE-SAME: <4 x i32> [[V:%.*]]) #[[ATTR0:[0-9]+]] { +; SSE-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> ; SSE-NEXT: ret <16 x i8> [[R]] ; -; AVX-LABEL: @bitcast_shuf_narrow_element( -; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> +; AVX-LABEL: define <16 x i8> @bitcast_shuf_narrow_element( +; AVX-SAME: <4 x i32> [[V:%.*]]) #[[ATTR0:[0-9]+]] { +; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V]] to <16 x i8> ; AVX-NEXT: [[R:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: ret <16 x i8> [[R]] ; @@ -23,8 +27,9 @@ define <16 x i8> @bitcast_shuf_narrow_element(<4 x i32> %v) { ; v4f32 is the same cost as v4i32, so this always works define <4 x float> @bitcast_shuf_same_size(<4 x i32> %v) { -; CHECK-LABEL: @bitcast_shuf_same_size( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <4 x float> +; CHECK-LABEL: define <4 x float> @bitcast_shuf_same_size( +; CHECK-SAME: <4 x i32> [[V:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V]] to <4 x float> ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; @@ -36,13 +41,15 @@ define <4 x float> @bitcast_shuf_same_size(<4 x i32> %v) { ; Length-changing shuffles define <16 x i8> @bitcast_shuf_narrow_element_subvector(<2 x i32> %v) { -; SSE-LABEL: @bitcast_shuf_narrow_element_subvector( -; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> poison, <4 x i32> +; SSE-LABEL: define <16 x i8> @bitcast_shuf_narrow_element_subvector( +; SSE-SAME: <2 x i32> [[V:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[V]], <2 x i32> poison, <4 x i32> ; SSE-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> ; SSE-NEXT: ret <16 x i8> [[R]] ; -; AVX-LABEL: @bitcast_shuf_narrow_element_subvector( -; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> +; AVX-LABEL: define <16 x i8> @bitcast_shuf_narrow_element_subvector( +; AVX-SAME: <2 x i32> [[V:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V]] to <8 x i8> ; AVX-NEXT: [[R:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <16 x i32> ; AVX-NEXT: ret <16 x i8> [[R]] ; @@ -52,13 +59,15 @@ define <16 x i8> @bitcast_shuf_narrow_element_subvector(<2 x i32> %v) { } define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors(<2 x i64> %v) { -; SSE-LABEL: @bitcast_shuf_narrow_element_concat_subvectors( -; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[V:%.*]], <2 x i64> poison, <4 x i32> +; SSE-LABEL: define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors( +; SSE-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[V]], <2 x i64> poison, <4 x i32> ; SSE-NEXT: [[R:%.*]] = bitcast <4 x i64> [[SHUF]] to <16 x i16> ; SSE-NEXT: ret <16 x i16> [[R]] ; -; AVX-LABEL: @bitcast_shuf_narrow_element_concat_subvectors( -; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16> +; AVX-LABEL: define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors( +; AVX-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V]] to <8 x i16> ; AVX-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <16 x i32> ; AVX-NEXT: ret <16 x i16> [[R]] ; @@ -68,8 +77,9 @@ define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors(<2 x i64> %v) { } define <16 x i8> @bitcast_shuf_extract_subvector(<8 x i32> %v) { -; CHECK-LABEL: @bitcast_shuf_extract_subvector( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i32> [[V:%.*]] to <32 x i8> +; CHECK-LABEL: define <16 x i8> @bitcast_shuf_extract_subvector( +; CHECK-SAME: <8 x i32> [[V:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i32> [[V]] to <32 x i8> ; CHECK-NEXT: [[R:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[R]] ; @@ -81,8 +91,9 @@ define <16 x i8> @bitcast_shuf_extract_subvector(<8 x i32> %v) { ; Negative test - must cast to vector type define i128 @bitcast_shuf_narrow_element_wrong_type(<4 x i32> %v) { -; CHECK-LABEL: @bitcast_shuf_narrow_element_wrong_type( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-LABEL: define i128 @bitcast_shuf_narrow_element_wrong_type( +; CHECK-SAME: <4 x i32> [[V:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to i128 ; CHECK-NEXT: ret i128 [[R]] ; @@ -94,8 +105,9 @@ define i128 @bitcast_shuf_narrow_element_wrong_type(<4 x i32> %v) { ; Widen shuffle elements define <4 x i32> @bitcast_shuf_wide_element(<8 x i16> %v) { -; CHECK-LABEL: @bitcast_shuf_wide_element( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V:%.*]] to <4 x i32> +; CHECK-LABEL: define <4 x i32> @bitcast_shuf_wide_element( +; CHECK-SAME: <8 x i16> [[V:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V]] to <4 x i32> ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; @@ -104,13 +116,12 @@ define <4 x i32> @bitcast_shuf_wide_element(<8 x i16> %v) { ret <4 x i32> %r } -declare void @use(<4 x i32>) - ; Negative test - don't create an extra shuffle define <16 x i8> @bitcast_shuf_uses(<4 x i32> %v) { -; CHECK-LABEL: @bitcast_shuf_uses( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-LABEL: define <16 x i8> @bitcast_shuf_uses( +; CHECK-SAME: <4 x i32> [[V:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: call void @use(<4 x i32> [[SHUF]]) ; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> ; CHECK-NEXT: ret <16 x i8> [[R]] @@ -125,8 +136,9 @@ define <16 x i8> @bitcast_shuf_uses(<4 x i32> %v) { ; TODO - can we remove the empty bitcast(bitcast()) ? define <4 x i64> @bitcast_shuf_remove_bitcasts(<2 x i64> %a0, <2 x i64> %a1) { -; CHECK-LABEL: @bitcast_shuf_remove_bitcasts( -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]], <4 x i32> +; CHECK-LABEL: define <4 x i64> @bitcast_shuf_remove_bitcasts( +; CHECK-SAME: <2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i64> [[A0]], <2 x i64> [[A1]], <4 x i32> ; CHECK-NEXT: [[SHUF:%.*]] = bitcast <4 x i64> [[R]] to <8 x i32> ; CHECK-NEXT: [[R1:%.*]] = bitcast <8 x i32> [[SHUF]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[R1]] @@ -141,9 +153,10 @@ define <4 x i64> @bitcast_shuf_remove_bitcasts(<2 x i64> %a0, <2 x i64> %a1) { ; shuffle of 2 operands must reduce bitcasts define <8 x i32> @bitcast_shuf_one_bitcast(<4 x i32> %a0, <2 x i64> %a1) { -; CHECK-LABEL: @bitcast_shuf_one_bitcast( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A1:%.*]] to <4 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[TMP1]], <8 x i32> +; CHECK-LABEL: define <8 x i32> @bitcast_shuf_one_bitcast( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <2 x i64> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A1]] to <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %bc0 = bitcast <4 x i32> %a0 to <2 x i64> @@ -155,8 +168,9 @@ define <8 x i32> @bitcast_shuf_one_bitcast(<4 x i32> %a0, <2 x i64> %a1) { ; Negative test - shuffle of 2 operands must not increase bitcasts define <8 x i32> @bitcast_shuf_too_many_bitcasts(<2 x i64> %a0, <2 x i64> %a1) { -; CHECK-LABEL: @bitcast_shuf_too_many_bitcasts( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]], <4 x i32> +; CHECK-LABEL: define <8 x i32> @bitcast_shuf_too_many_bitcasts( +; CHECK-SAME: <2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[A0]], <2 x i64> [[A1]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i64> [[SHUF]] to <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; @@ -166,8 +180,9 @@ define <8 x i32> @bitcast_shuf_too_many_bitcasts(<2 x i64> %a0, <2 x i64> %a1) { } define <2 x i64> @PR35454_1(<2 x i64> %v) { -; SSE-LABEL: @PR35454_1( -; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> +; SSE-LABEL: define <2 x i64> @PR35454_1( +; SSE-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V]] to <4 x i32> ; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <16 x i8> ; SSE-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], @@ -176,8 +191,9 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { ; SSE-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> ; SSE-NEXT: ret <2 x i64> [[BC3]] ; -; AVX-LABEL: @PR35454_1( -; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8> +; AVX-LABEL: define <2 x i64> @PR35454_1( +; AVX-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V]] to <16 x i8> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], ; AVX-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> @@ -196,8 +212,9 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { } define <2 x i64> @PR35454_2(<2 x i64> %v) { -; SSE-LABEL: @PR35454_2( -; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> +; SSE-LABEL: define <2 x i64> @PR35454_2( +; SSE-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V]] to <4 x i32> ; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <8 x i16> ; SSE-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], @@ -206,8 +223,9 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { ; SSE-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> ; SSE-NEXT: ret <2 x i64> [[BC3]] ; -; AVX-LABEL: @PR35454_2( -; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16> +; AVX-LABEL: define <2 x i64> @PR35454_2( +; AVX-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V]] to <8 x i16> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], ; AVX-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> @@ -224,187 +242,3 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { %bc3 = bitcast <4 x i32> %permil1 to <2 x i64> ret <2 x i64> %bc3 } - -; Shuffle is much cheaper than fdiv. FMF are intersected. - -define <4 x float> @shuf_fdiv_v4f32_yy(<4 x float> %x, <4 x float> %y, <4 x float> %z) { -; CHECK-LABEL: @shuf_fdiv_v4f32_yy( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Z:%.*]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = fdiv arcp <4 x float> [[TMP1]], [[TMP2]] -; CHECK-NEXT: ret <4 x float> [[R]] -; - %b0 = fdiv fast <4 x float> %x, %y - %b1 = fdiv arcp <4 x float> %z, %y - %r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> - ret <4 x float> %r -} - -; Common operand is op0 of the binops. - -define <4 x i32> @shuf_add_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { -; CHECK-LABEL: @shuf_add_v4i32_xx( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: ret <4 x i32> [[R]] -; - %b0 = add <4 x i32> %x, %y - %b1 = add <4 x i32> %x, %z - %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> - ret <4 x i32> %r -} - -; For commutative instructions, common operand may be swapped. - -define <4 x float> @shuf_fmul_v4f32_xx_swap(<4 x float> %x, <4 x float> %y, <4 x float> %z) { -; CHECK-LABEL: @shuf_fmul_v4f32_xx_swap( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] -; CHECK-NEXT: ret <4 x float> [[R]] -; - %b0 = fmul <4 x float> %x, %y - %b1 = fmul <4 x float> %z, %x - %r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> - ret <4 x float> %r -} - -; For commutative instructions, common operand may be swapped. - -define <2 x i64> @shuf_and_v2i64_yy_swap(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { -; CHECK-LABEL: @shuf_and_v2i64_yy_swap( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[Y:%.*]], <2 x i64> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X:%.*]], <2 x i64> [[Z:%.*]], <2 x i32> -; CHECK-NEXT: [[R:%.*]] = and <2 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: ret <2 x i64> [[R]] -; - %b0 = and <2 x i64> %x, %y - %b1 = and <2 x i64> %y, %z - %r = shufflevector <2 x i64> %b0, <2 x i64> %b1, <2 x i32> - ret <2 x i64> %r -} - -; non-commutative binop, but common op0 - -define <4 x i32> @shuf_shl_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { -; CHECK-LABEL: @shuf_shl_v4i32_xx( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = shl <4 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: ret <4 x i32> [[R]] -; - %b0 = shl <4 x i32> %x, %y - %b1 = shl <4 x i32> %x, %z - %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> - ret <4 x i32> %r -} - -; negative test - common operand, but not commutable - -define <4 x i32> @shuf_shl_v4i32_xx_swap(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { -; CHECK-LABEL: @shuf_shl_v4i32_xx_swap( -; CHECK-NEXT: [[B0:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B1:%.*]] = shl <4 x i32> [[Z:%.*]], [[X]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[R]] -; - %b0 = shl <4 x i32> %x, %y - %b1 = shl <4 x i32> %z, %x - %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> - ret <4 x i32> %r -} - -; negative test - mismatched opcodes - -define <2 x i64> @shuf_sub_add_v2i64_yy(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { -; CHECK-LABEL: @shuf_sub_add_v2i64_yy( -; CHECK-NEXT: [[B0:%.*]] = sub <2 x i64> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B1:%.*]] = add <2 x i64> [[Z:%.*]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i64> [[B0]], <2 x i64> [[B1]], <2 x i32> -; CHECK-NEXT: ret <2 x i64> [[R]] -; - %b0 = sub <2 x i64> %x, %y - %b1 = add <2 x i64> %z, %y - %r = shufflevector <2 x i64> %b0, <2 x i64> %b1, <2 x i32> - ret <2 x i64> %r -} - -; negative test - type change via shuffle - -define <8 x float> @shuf_fmul_v4f32_xx_type(<4 x float> %x, <4 x float> %y, <4 x float> %z) { -; CHECK-LABEL: @shuf_fmul_v4f32_xx_type( -; CHECK-NEXT: [[B0:%.*]] = fmul <4 x float> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B1:%.*]] = fmul <4 x float> [[Z:%.*]], [[X]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <8 x i32> -; CHECK-NEXT: ret <8 x float> [[R]] -; - %b0 = fmul <4 x float> %x, %y - %b1 = fmul <4 x float> %z, %x - %r = shufflevector <4 x float> %b0, <4 x float> %b1, <8 x i32> - ret <8 x float> %r -} - -; negative test - uses - -define <4 x i32> @shuf_lshr_v4i32_yy_use1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { -; CHECK-LABEL: @shuf_lshr_v4i32_yy_use1( -; CHECK-NEXT: [[B0:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: call void @use(<4 x i32> [[B0]]) -; CHECK-NEXT: [[B1:%.*]] = lshr <4 x i32> [[Z:%.*]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[R]] -; - %b0 = lshr <4 x i32> %x, %y - call void @use(<4 x i32> %b0) - %b1 = lshr <4 x i32> %z, %y - %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> - ret <4 x i32> %r -} - -; negative test - uses - -define <4 x i32> @shuf_mul_v4i32_yy_use2(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { -; CHECK-LABEL: @shuf_mul_v4i32_yy_use2( -; CHECK-NEXT: [[B0:%.*]] = mul <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B1:%.*]] = mul <4 x i32> [[Z:%.*]], [[Y]] -; CHECK-NEXT: call void @use(<4 x i32> [[B1]]) -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[R]] -; - %b0 = mul <4 x i32> %x, %y - %b1 = mul <4 x i32> %z, %y - call void @use(<4 x i32> %b1) - %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> - ret <4 x i32> %r -} - -; negative test - must have matching operand - -define <4 x float> @shuf_fadd_v4f32_no_common_op(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) { -; CHECK-LABEL: @shuf_fadd_v4f32_no_common_op( -; CHECK-NEXT: [[B0:%.*]] = fadd <4 x float> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B1:%.*]] = fadd <4 x float> [[Z:%.*]], [[W:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <4 x i32> -; CHECK-NEXT: ret <4 x float> [[R]] -; - %b0 = fadd <4 x float> %x, %y - %b1 = fadd <4 x float> %z, %w - %r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> - ret <4 x float> %r -} - -; negative test - binops may be relatively cheap - -define <16 x i16> @shuf_and_v16i16_yy_expensive_shuf(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { -; CHECK-LABEL: @shuf_and_v16i16_yy_expensive_shuf( -; CHECK-NEXT: [[B0:%.*]] = and <16 x i16> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B1:%.*]] = and <16 x i16> [[Y]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <16 x i16> [[B0]], <16 x i16> [[B1]], <16 x i32> -; CHECK-NEXT: ret <16 x i16> [[R]] -; - %b0 = and <16 x i16> %x, %y - %b1 = and <16 x i16> %y, %z - %r = shufflevector <16 x i16> %b0, <16 x i16> %b1, <16 x i32> - ret <16 x i16> %r -} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll new file mode 100644 index 00000000000000..4eb05b943f5067 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -S | FileCheck %s + +define i32 @phi_after_label(i1 %cc) { +entry: + br i1 %cc, label %then, label %end + +then: + br label %end + +end: + %r = phi i32 [ 0, %entry ], [ 1, %then ] + ret i32 %r +} + +define void @phi_before_label(i32 %bound) { +entry: + br label %loop + +loop: + %ctr = phi i32 [ 0, %entry ], [ %ctr.next, %loop ] + %ctr.next = add i32 %ctr, 1 + %cc = icmp ult i32 %ctr.next, %bound + br i1 %cc, label %loop, label %end + +end: + ret void +} + +define i32 @phi_after_label_unnamed(i1 %cc) { +0: + br i1 %cc, label %1, label %2 + +1: + br label %2 + +2: + %r = phi i32 [ 0, %0 ], [ 1, %1 ] + ret i32 %r +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll.expected new file mode 100644 index 00000000000000..1d21ebe547f689 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll.expected @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -S | FileCheck %s + +define i32 @phi_after_label(i1 %cc) { +; CHECK-LABEL: define i32 @phi_after_label( +; CHECK-SAME: i1 [[CC:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CC]], label [[THEN:%.*]], label [[END:%.*]] +; CHECK: then: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[R:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[THEN]] ] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + br i1 %cc, label %then, label %end + +then: + br label %end + +end: + %r = phi i32 [ 0, %entry ], [ 1, %then ] + ret i32 %r +} + +define void @phi_before_label(i32 %bound) { +; CHECK-LABEL: define void @phi_before_label( +; CHECK-SAME: i32 [[BOUND:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[CTR_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[CTR_NEXT]] = add i32 [[CTR]], 1 +; CHECK-NEXT: [[CC:%.*]] = icmp ult i32 [[CTR_NEXT]], [[BOUND]] +; CHECK-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %ctr = phi i32 [ 0, %entry ], [ %ctr.next, %loop ] + %ctr.next = add i32 %ctr, 1 + %cc = icmp ult i32 %ctr.next, %bound + br i1 %cc, label %loop, label %end + +end: + ret void +} + +define i32 @phi_after_label_unnamed(i1 %cc) { +; CHECK-LABEL: define i32 @phi_after_label_unnamed( +; CHECK-SAME: i1 [[CC:%.*]]) { +; CHECK-NEXT: br i1 [[CC]], label [[TMP1:%.*]], label [[TMP2:%.*]] +; CHECK: 1: +; CHECK-NEXT: br label [[TMP2]] +; CHECK: 2: +; CHECK-NEXT: [[R:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ 1, [[TMP1]] ] +; CHECK-NEXT: ret i32 [[R]] +; +0: + br i1 %cc, label %1, label %2 + +1: + br label %2 + +2: + %r = phi i32 [ 0, %0 ], [ 1, %1 ] + ret i32 %r +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values_funcs.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values_funcs.ll new file mode 100644 index 00000000000000..b4fd23a3d81ce2 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values_funcs.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 +; RUN: opt < %s -S | FileCheck %s + +; The assumption underlying this test is that there are pre-existing check lines +; but something has changed, and we would like to avoid needless changes of +; meta variable names so that diffs end up being easier to read, e.g. avoid +; changing X_I33 into X_I34 or renumbering the various TMP variables. + +define i32 @func({i32, i32} %x, i32 %y) { + %x.i34 = extractvalue {i32, i32} %x, 0 + %1 = add i32 %y, 1 + %2 = add i32 %x.i34, %1 + %3 = mul i32 %2, 3 + ret i32 %3 +} + +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[X_I33:%.*]] = extractvalue { i32, i32 } [[X]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X_I33]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 3 +; CHECK-NEXT: ret i32 [[TMP2]] +; diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values_funcs.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values_funcs.ll.expected new file mode 100644 index 00000000000000..1559319ac013a2 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values_funcs.ll.expected @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 +; RUN: opt < %s -S | FileCheck %s + +; The assumption underlying this test is that there are pre-existing check lines +; but something has changed, and we would like to avoid needless changes of +; meta variable names so that diffs end up being easier to read, e.g. avoid +; changing X_I33 into X_I34 or renumbering the various TMP variables. + +define i32 @func({i32, i32} %x, i32 %y) { + %x.i34 = extractvalue {i32, i32} %x, 0 + %1 = add i32 %y, 1 + %2 = add i32 %x.i34, %1 + %3 = mul i32 %2, 3 + ret i32 %3 +} + +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[X_I34:%.*]] = extractvalue { i32, i32 } [[X]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[Y]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X_I34]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 3 +; CHECK-NEXT: ret i32 [[TMP3]] +; diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/phi-labels.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/phi-labels.test new file mode 100644 index 00000000000000..411c84de1dcba5 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/phi-labels.test @@ -0,0 +1,5 @@ +# RUN: cp -f %S/Inputs/phi-labels.ll %t.ll && %update_test_checks --version 4 %t.ll +# RUN: diff -u %t.ll %S/Inputs/phi-labels.ll.expected +## Check that running the script again does not change the result: +# RUN: %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/phi-labels.ll.expected diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values_funcs.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values_funcs.test new file mode 100644 index 00000000000000..5132fb9a26ff43 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values_funcs.test @@ -0,0 +1,2 @@ +# RUN: cp -f %S/Inputs/stable_ir_values_funcs.ll %t.ll && %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/stable_ir_values_funcs.ll.expected diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index 8ebd9b511f39fe..8738af91b652b8 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -2110,8 +2110,7 @@ TEST_F(ValueTrackingTest, isNonZeroRecurrence) { )"); const DataLayout &DL = M->getDataLayout(); AssumptionCache AC(*F); - EXPECT_TRUE(isKnownNonZero(A, /*Depth=*/0, - SimplifyQuery(DL, /*DT=*/nullptr, &AC, CxtI))); + EXPECT_TRUE(isKnownNonZero(A, SimplifyQuery(DL, /*DT=*/nullptr, &AC, CxtI))); } TEST_F(ValueTrackingTest, KnownNonZeroFromDomCond) { @@ -2135,9 +2134,8 @@ TEST_F(ValueTrackingTest, KnownNonZeroFromDomCond) { DominatorTree DT(*F); const DataLayout &DL = M->getDataLayout(); const SimplifyQuery SQ(DL, &DT, &AC); - EXPECT_EQ(isKnownNonZero(A, /*Depth=*/0, SQ.getWithInstruction(CxtI)), true); - EXPECT_EQ(isKnownNonZero(A, /*Depth=*/0, SQ.getWithInstruction(CxtI2)), - false); + EXPECT_EQ(isKnownNonZero(A, SQ.getWithInstruction(CxtI)), true); + EXPECT_EQ(isKnownNonZero(A, SQ.getWithInstruction(CxtI2)), false); } TEST_F(ValueTrackingTest, KnownNonZeroFromDomCond2) { @@ -2161,9 +2159,8 @@ TEST_F(ValueTrackingTest, KnownNonZeroFromDomCond2) { DominatorTree DT(*F); const DataLayout &DL = M->getDataLayout(); const SimplifyQuery SQ(DL, &DT, &AC); - EXPECT_EQ(isKnownNonZero(A, /*Depth=*/0, SQ.getWithInstruction(CxtI)), true); - EXPECT_EQ(isKnownNonZero(A, /*Depth=*/0, SQ.getWithInstruction(CxtI2)), - false); + EXPECT_EQ(isKnownNonZero(A, SQ.getWithInstruction(CxtI)), true); + EXPECT_EQ(isKnownNonZero(A, SQ.getWithInstruction(CxtI2)), false); } TEST_F(ValueTrackingTest, IsImpliedConditionAnd) { diff --git a/llvm/unittests/CodeGen/RegAllocScoreTest.cpp b/llvm/unittests/CodeGen/RegAllocScoreTest.cpp index ff7146eaf9439a..eae517f9d01cf2 100644 --- a/llvm/unittests/CodeGen/RegAllocScoreTest.cpp +++ b/llvm/unittests/CodeGen/RegAllocScoreTest.cpp @@ -166,19 +166,20 @@ TEST(RegAllocScoreTest, Counts) { ASSERT_EQ(MF->size(), 2U); const auto TotalScore = llvm::calculateRegAllocScore(*MF, MBBFreqMock, IsRemat); - ASSERT_EQ(Freq1, TotalScore.copyCounts()); - ASSERT_EQ(2.0 * Freq1 + Freq2, TotalScore.loadCounts()); - ASSERT_EQ(Freq1 + Freq2, TotalScore.storeCounts()); - ASSERT_EQ(Freq2, TotalScore.loadStoreCounts()); - ASSERT_EQ(Freq1, TotalScore.cheapRematCounts()); - ASSERT_EQ(Freq2, TotalScore.expensiveRematCounts()); - ASSERT_EQ(TotalScore.getScore(), - TotalScore.copyCounts() * CopyWeight + - TotalScore.loadCounts() * LoadWeight + - TotalScore.storeCounts() * StoreWeight + - TotalScore.loadStoreCounts() * (LoadWeight + StoreWeight) + - TotalScore.cheapRematCounts() * CheapRematWeight + - TotalScore.expensiveRematCounts() * ExpensiveRematWeight + ASSERT_DOUBLE_EQ(Freq1, TotalScore.copyCounts()); + ASSERT_DOUBLE_EQ(2.0 * Freq1 + Freq2, TotalScore.loadCounts()); + ASSERT_DOUBLE_EQ(Freq1 + Freq2, TotalScore.storeCounts()); + ASSERT_DOUBLE_EQ(Freq2, TotalScore.loadStoreCounts()); + ASSERT_DOUBLE_EQ(Freq1, TotalScore.cheapRematCounts()); + ASSERT_DOUBLE_EQ(Freq2, TotalScore.expensiveRematCounts()); + ASSERT_DOUBLE_EQ( + TotalScore.getScore(), + TotalScore.copyCounts() * CopyWeight + + TotalScore.loadCounts() * LoadWeight + + TotalScore.storeCounts() * StoreWeight + + TotalScore.loadStoreCounts() * (LoadWeight + StoreWeight) + + TotalScore.cheapRematCounts() * CheapRematWeight + + TotalScore.expensiveRematCounts() * ExpensiveRematWeight ); } diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index 9cf307472d656e..7e00a80cacf933 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -21,9 +21,11 @@ using ::llvm::DILineInfo; using ::llvm::DILineInfoSpecifier; using ::llvm::DILocal; using ::llvm::StringRef; +using ::llvm::memprof::CallStackId; using ::llvm::memprof::CallStackMap; using ::llvm::memprof::Frame; using ::llvm::memprof::FrameId; +using ::llvm::memprof::IndexedAllocationInfo; using ::llvm::memprof::IndexedMemProfRecord; using ::llvm::memprof::MemInfoBlock; using ::llvm::memprof::MemProfReader; @@ -36,6 +38,7 @@ using ::llvm::memprof::SegmentEntry; using ::llvm::object::SectionedAddress; using ::llvm::symbolize::SymbolizableModule; using ::testing::Return; +using ::testing::SizeIs; class MockSymbolizer : public SymbolizableModule { public: @@ -180,13 +183,13 @@ TEST(MemProf, FillsValue) { // We expect 4 records. We attach alloc site data to foo and bar, i.e. // all frames bottom up until we find a non-inline frame. We attach call site // data to bar, xyz and abc. - ASSERT_EQ(Records.size(), 4U); + ASSERT_THAT(Records, SizeIs(4)); // Check the memprof record for foo. const llvm::GlobalValue::GUID FooId = IndexedMemProfRecord::getGUID("foo"); ASSERT_EQ(Records.count(FooId), 1U); const MemProfRecord &Foo = Records[FooId]; - ASSERT_EQ(Foo.AllocSites.size(), 1U); + ASSERT_THAT(Foo.AllocSites, SizeIs(1)); EXPECT_EQ(Foo.AllocSites[0].Info.getAllocCount(), 1U); EXPECT_THAT(Foo.AllocSites[0].CallStack[0], FrameContains("foo", 5U, 30U, true)); @@ -202,7 +205,7 @@ TEST(MemProf, FillsValue) { const llvm::GlobalValue::GUID BarId = IndexedMemProfRecord::getGUID("bar"); ASSERT_EQ(Records.count(BarId), 1U); const MemProfRecord &Bar = Records[BarId]; - ASSERT_EQ(Bar.AllocSites.size(), 1U); + ASSERT_THAT(Bar.AllocSites, SizeIs(1)); EXPECT_EQ(Bar.AllocSites[0].Info.getAllocCount(), 1U); EXPECT_THAT(Bar.AllocSites[0].CallStack[0], FrameContains("foo", 5U, 30U, true)); @@ -213,8 +216,8 @@ TEST(MemProf, FillsValue) { EXPECT_THAT(Bar.AllocSites[0].CallStack[3], FrameContains("abc", 5U, 30U, false)); - ASSERT_EQ(Bar.CallSites.size(), 1U); - ASSERT_EQ(Bar.CallSites[0].size(), 2U); + ASSERT_THAT(Bar.CallSites, SizeIs(1)); + ASSERT_THAT(Bar.CallSites[0], SizeIs(2)); EXPECT_THAT(Bar.CallSites[0][0], FrameContains("foo", 5U, 30U, true)); EXPECT_THAT(Bar.CallSites[0][1], FrameContains("bar", 51U, 20U, false)); @@ -222,8 +225,8 @@ TEST(MemProf, FillsValue) { const llvm::GlobalValue::GUID XyzId = IndexedMemProfRecord::getGUID("xyz"); ASSERT_EQ(Records.count(XyzId), 1U); const MemProfRecord &Xyz = Records[XyzId]; - ASSERT_EQ(Xyz.CallSites.size(), 1U); - ASSERT_EQ(Xyz.CallSites[0].size(), 2U); + ASSERT_THAT(Xyz.CallSites, SizeIs(1)); + ASSERT_THAT(Xyz.CallSites[0], SizeIs(2)); // Expect the entire frame even though in practice we only need the first // entry here. EXPECT_THAT(Xyz.CallSites[0][0], FrameContains("xyz", 5U, 30U, true)); @@ -234,8 +237,8 @@ TEST(MemProf, FillsValue) { ASSERT_EQ(Records.count(AbcId), 1U); const MemProfRecord &Abc = Records[AbcId]; EXPECT_TRUE(Abc.AllocSites.empty()); - ASSERT_EQ(Abc.CallSites.size(), 1U); - ASSERT_EQ(Abc.CallSites[0].size(), 2U); + ASSERT_THAT(Abc.CallSites, SizeIs(1)); + ASSERT_THAT(Abc.CallSites[0], SizeIs(2)); EXPECT_THAT(Abc.CallSites[0][0], FrameContains("xyz", 5U, 30U, true)); EXPECT_THAT(Abc.CallSites[0][1], FrameContains("abc", 5U, 30U, false)); } @@ -390,9 +393,9 @@ TEST(MemProf, SymbolizationFilter) { Records.push_back(KeyRecordPair.second); } - ASSERT_EQ(Records.size(), 1U); - ASSERT_EQ(Records[0].AllocSites.size(), 1U); - ASSERT_EQ(Records[0].AllocSites[0].CallStack.size(), 1U); + ASSERT_THAT(Records, SizeIs(1)); + ASSERT_THAT(Records[0].AllocSites, SizeIs(1)); + ASSERT_THAT(Records[0].AllocSites[0].CallStack, SizeIs(1)); EXPECT_THAT(Records[0].AllocSites[0].CallStack[0], FrameContains("foo", 5U, 30U, false)); } @@ -424,12 +427,135 @@ TEST(MemProf, BaseMemProfReader) { Records.push_back(KeyRecordPair.second); } - ASSERT_EQ(Records.size(), 1U); - ASSERT_EQ(Records[0].AllocSites.size(), 1U); - ASSERT_EQ(Records[0].AllocSites[0].CallStack.size(), 2U); + ASSERT_THAT(Records, SizeIs(1)); + ASSERT_THAT(Records[0].AllocSites, SizeIs(1)); + ASSERT_THAT(Records[0].AllocSites[0].CallStack, SizeIs(2)); EXPECT_THAT(Records[0].AllocSites[0].CallStack[0], FrameContains("foo", 20U, 5U, true)); EXPECT_THAT(Records[0].AllocSites[0].CallStack[1], FrameContains("bar", 10U, 2U, false)); } + +TEST(MemProf, BaseMemProfReaderWithCSIdMap) { + llvm::DenseMap FrameIdMap; + Frame F1(/*Hash=*/IndexedMemProfRecord::getGUID("foo"), /*LineOffset=*/20, + /*Column=*/5, /*IsInlineFrame=*/true); + Frame F2(/*Hash=*/IndexedMemProfRecord::getGUID("bar"), /*LineOffset=*/10, + /*Column=*/2, /*IsInlineFrame=*/false); + FrameIdMap.insert({F1.hash(), F1}); + FrameIdMap.insert({F2.hash(), F2}); + + llvm::DenseMap> CSIdMap; + llvm::SmallVector CallStack = {F1.hash(), F2.hash()}; + CallStackId CSId = llvm::memprof::hashCallStack(CallStack); + CSIdMap.insert({CSId, CallStack}); + + llvm::MapVector ProfData; + IndexedMemProfRecord FakeRecord; + MemInfoBlock Block; + Block.AllocCount = 1U, Block.TotalAccessDensity = 4, + Block.TotalLifetime = 200001; + FakeRecord.AllocSites.emplace_back( + /*CS=*/llvm::SmallVector(), + /*CSId=*/llvm::memprof::hashCallStack(CallStack), + /*MB=*/Block); + ProfData.insert({F1.hash(), FakeRecord}); + + MemProfReader Reader(FrameIdMap, CSIdMap, ProfData); + + llvm::SmallVector Records; + for (const auto &KeyRecordPair : Reader) { + Records.push_back(KeyRecordPair.second); + } + + ASSERT_THAT(Records, SizeIs(1)); + ASSERT_THAT(Records[0].AllocSites, SizeIs(1)); + ASSERT_THAT(Records[0].AllocSites[0].CallStack, SizeIs(2)); + EXPECT_THAT(Records[0].AllocSites[0].CallStack[0], + FrameContains("foo", 20U, 5U, true)); + EXPECT_THAT(Records[0].AllocSites[0].CallStack[1], + FrameContains("bar", 10U, 2U, false)); +} + +TEST(MemProf, IndexedMemProfRecordToMemProfRecord) { + // Verify that MemProfRecord can be constructed from IndexedMemProfRecord with + // CallStackIds only. + + llvm::DenseMap FrameIdMap; + Frame F1(1, 0, 0, false); + Frame F2(2, 0, 0, false); + Frame F3(3, 0, 0, false); + Frame F4(4, 0, 0, false); + FrameIdMap.insert({F1.hash(), F1}); + FrameIdMap.insert({F2.hash(), F2}); + FrameIdMap.insert({F3.hash(), F3}); + FrameIdMap.insert({F4.hash(), F4}); + + llvm::DenseMap> CallStackIdMap; + llvm::SmallVector CS1 = {F1.hash(), F2.hash()}; + llvm::SmallVector CS2 = {F1.hash(), F3.hash()}; + llvm::SmallVector CS3 = {F2.hash(), F3.hash()}; + llvm::SmallVector CS4 = {F2.hash(), F4.hash()}; + CallStackIdMap.insert({llvm::memprof::hashCallStack(CS1), CS1}); + CallStackIdMap.insert({llvm::memprof::hashCallStack(CS2), CS2}); + CallStackIdMap.insert({llvm::memprof::hashCallStack(CS3), CS3}); + CallStackIdMap.insert({llvm::memprof::hashCallStack(CS4), CS4}); + + IndexedMemProfRecord IndexedRecord; + IndexedAllocationInfo AI; + AI.CSId = llvm::memprof::hashCallStack(CS1); + IndexedRecord.AllocSites.push_back(AI); + AI.CSId = llvm::memprof::hashCallStack(CS2); + IndexedRecord.AllocSites.push_back(AI); + IndexedRecord.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS3)); + IndexedRecord.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS4)); + + bool CSIdMissing = false; + bool FrameIdMissing = false; + + auto Callback = [&](CallStackId CSId) -> llvm::SmallVector { + llvm::SmallVector CallStack; + llvm::SmallVector FrameIds; + + auto Iter = CallStackIdMap.find(CSId); + if (Iter == CallStackIdMap.end()) + CSIdMissing = true; + else + FrameIds = Iter->second; + + for (FrameId Id : FrameIds) { + Frame F(0, 0, 0, false); + auto Iter = FrameIdMap.find(Id); + if (Iter == FrameIdMap.end()) + FrameIdMissing = true; + else + F = Iter->second; + CallStack.push_back(F); + } + + return CallStack; + }; + + MemProfRecord Record = IndexedRecord.toMemProfRecord(Callback); + + // Make sure that all lookups are successful. + ASSERT_FALSE(CSIdMissing); + ASSERT_FALSE(FrameIdMissing); + + // Verify the contents of Record. + ASSERT_THAT(Record.AllocSites, SizeIs(2)); + ASSERT_THAT(Record.AllocSites[0].CallStack, SizeIs(2)); + EXPECT_EQ(Record.AllocSites[0].CallStack[0].hash(), F1.hash()); + EXPECT_EQ(Record.AllocSites[0].CallStack[1].hash(), F2.hash()); + ASSERT_THAT(Record.AllocSites[1].CallStack, SizeIs(2)); + EXPECT_EQ(Record.AllocSites[1].CallStack[0].hash(), F1.hash()); + EXPECT_EQ(Record.AllocSites[1].CallStack[1].hash(), F3.hash()); + ASSERT_THAT(Record.CallSites, SizeIs(2)); + ASSERT_THAT(Record.CallSites[0], SizeIs(2)); + EXPECT_EQ(Record.CallSites[0][0].hash(), F2.hash()); + EXPECT_EQ(Record.CallSites[0][1].hash(), F3.hash()); + ASSERT_THAT(Record.CallSites[1], SizeIs(2)); + EXPECT_EQ(Record.CallSites[1][0].hash(), F2.hash()); + EXPECT_EQ(Record.CallSites[1][1].hash(), F4.hash()); +} } // namespace diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index 36f8fa14653938..b3a05e081f6375 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -1181,9 +1181,15 @@ void InstrInfoEmitter::emitRecord( // Each logical operand can be multiple MI operands. MinOperands = Inst.Operands.back().MIOperandNo + Inst.Operands.back().MINumOperands; + // Even the logical output operand may be multiple MI operands. + int DefOperands = 0; + if (Inst.Operands.NumDefs) { + auto &Opnd = Inst.Operands[Inst.Operands.NumDefs - 1]; + DefOperands = Opnd.MIOperandNo + Opnd.MINumOperands; + } OS << " { "; - OS << Num << ",\t" << MinOperands << ",\t" << Inst.Operands.NumDefs << ",\t" + OS << Num << ",\t" << MinOperands << ",\t" << DefOperands << ",\t" << Inst.TheDef->getValueAsInt("Size") << ",\t" << SchedModels.getSchedClassIdx(Inst) << ",\t"; diff --git a/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp b/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp index 7a6439cb94910e..e57bc6fb507e32 100644 --- a/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp +++ b/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp @@ -60,11 +60,19 @@ static void EmitRISCVTargetDef(RecordKeeper &RK, raw_ostream &OS) { if (MArch.empty()) MArch = getMArch(*Rec); - const bool FastUnalignedAccess = + bool FastScalarUnalignedAccess = any_of(Rec->getValueAsListOfDefs("Features"), [&](auto &Feature) { - return Feature->getValueAsString("Name") == "fast-unaligned-access"; + return Feature->getValueAsString("Name") == "unaligned-scalar-mem"; }); + bool FastVectorUnalignedAccess = + any_of(Rec->getValueAsListOfDefs("Features"), [&](auto &Feature) { + return Feature->getValueAsString("Name") == "unaligned-vector-mem"; + }); + + bool FastUnalignedAccess = + FastScalarUnalignedAccess && FastVectorUnalignedAccess; + OS << "PROC(" << Rec->getName() << ", " << "{\"" << Rec->getValueAsString("Name") << "\"}, " << "{\"" << MArch << "\"}, " << FastUnalignedAccess << ")\n"; diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index ecb19d233a8d1a..eed36a0cdd73fd 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -980,10 +980,6 @@ def __init__( def is_local_def_ir_value(self): return self.ir_prefix == "%" - # Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'. - def is_global_scope_ir_value_match(self, match): - return self.global_ir_rhs_regexp is not None - # Return the IR prefix and check prefix we use for this kind or IR value, # e.g., (%, TMP) for locals. If the IR prefix is a regex, return the prefix # used in the IR output diff --git a/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn index 04f20211b3c710..22433459a78786 100644 --- a/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn @@ -23,6 +23,7 @@ static_library("FlowSensitive") { target_gen_dir, ] sources = [ + "ASTOps.cpp", "AdornedCFG.cpp", "Arena.cpp", "DataflowAnalysisContext.cpp", diff --git a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn index 1f6879358f22bc..955854c7a134bc 100644 --- a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn @@ -125,6 +125,7 @@ cxx_sources = [ "condition_variable_destructor.cpp", "error_category.cpp", "exception.cpp", + "expected.cpp", "fstream.cpp", "functional.cpp", "future.cpp", diff --git a/llvm/utils/gn/secondary/lldb/test/BUILD.gn b/llvm/utils/gn/secondary/lldb/test/BUILD.gn index 414ea4933c519d..c8245739842d9e 100644 --- a/llvm/utils/gn/secondary/lldb/test/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/test/BUILD.gn @@ -118,6 +118,7 @@ write_lit_cfg("lit_shell_site_cfg") { "LLDB_TOOLS_DIR=" + rebase_path("$root_out_dir/bin"), "LLDB_USE_SYSTEM_DEBUGSERVER=1", # XXX port //lldb/tools/debugserver (?) "LLVM_HOST_TRIPLE=$llvm_current_triple", + "LLVM_USE_SANITIZER=", ] if (llvm_enable_zlib) { diff --git a/mlir/include/mlir/Dialect/Polynomial/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/Polynomial/IR/CMakeLists.txt index d8039deb5ee217..79e739953d7cf4 100644 --- a/mlir/include/mlir/Dialect/Polynomial/IR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Polynomial/IR/CMakeLists.txt @@ -1,8 +1,8 @@ add_mlir_dialect(Polynomial polynomial) -add_mlir_doc(PolynomialDialect PolynomialDialect Polynomial/ -gen-dialect-doc) -add_mlir_doc(PolynomialOps PolynomialOps Polynomial/ -gen-op-doc) -add_mlir_doc(PolynomialAttributes PolynomialAttributes Dialects/ -gen-attrdef-doc) -add_mlir_doc(PolynomialTypes PolynomialTypes Dialects/ -gen-typedef-doc) +add_mlir_doc(Polynomial PolynomialDialect Polynomial/ -gen-dialect-doc -dialect=polynomial) +add_mlir_doc(Polynomial PolynomialOps Polynomial/ -gen-op-doc) +add_mlir_doc(Polynomial PolynomialAttributes Dialects/ -gen-attrdef-doc) +add_mlir_doc(Polynomial PolynomialTypes Dialects/ -gen-typedef-doc) set(LLVM_TARGET_DEFINITIONS Polynomial.td) mlir_tablegen(PolynomialAttributes.cpp.inc -gen-attrdef-defs -attrdefs-dialect=polynomial) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index 0cfc64f9988a0a..4e4441c640ed95 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -27,9 +27,7 @@ class SparseTensor_Op traits = []> // Sparse Tensor Operations. //===----------------------------------------------------------------------===// -def SparseTensor_NewOp : SparseTensor_Op<"new", [Pure]>, - Arguments<(ins AnyType:$source)>, - Results<(outs AnySparseTensor:$result)> { +def SparseTensor_NewOp : SparseTensor_Op<"new", [Pure]> { string summary = "Materializes a new sparse tensor from given source"; string description = [{ Materializes a sparse tensor with contents taken from an opaque pointer @@ -51,15 +49,14 @@ def SparseTensor_NewOp : SparseTensor_Op<"new", [Pure]>, sparse_tensor.new %source : !Source to tensor<1024x1024xf64, #CSR> ``` }]; + + let arguments = (ins AnyType:$source); + let results = (outs AnySparseTensor:$result); let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)"; } -def SparseTensor_AssembleOp : SparseTensor_Op<"assemble", [Pure]>, - Arguments<(ins Variadic>:$levels, - TensorOf<[AnyType]>:$values)>, - Results<(outs AnySparseTensor: $result)> { +def SparseTensor_AssembleOp : SparseTensor_Op<"assemble", [Pure]> { let summary = "Returns a sparse tensor assembled from the given levels and values"; - let description = [{ Assembles the per-level position and coordinate arrays together with the values arrays into a sparse tensor. The order and types of the @@ -93,6 +90,9 @@ def SparseTensor_AssembleOp : SparseTensor_Op<"assemble", [Pure]>, ``` }]; + let arguments = (ins Variadic>:$levels, + TensorOf<[AnyType]>:$values); + let results = (outs AnySparseTensor: $result); let assemblyFormat = "` ` `(` $levels `)` `,` $values attr-dict `:`" " `(` type($levels) `)` `,` type($values) `to` type($result)"; @@ -100,16 +100,8 @@ def SparseTensor_AssembleOp : SparseTensor_Op<"assemble", [Pure]>, let hasVerifier = 1; } -def SparseTensor_DisassembleOp : SparseTensor_Op<"disassemble", [Pure, SameVariadicResultSize]>, - Arguments<(ins AnySparseTensor:$tensor, - Variadic>:$out_levels, - TensorOf<[AnyType]>:$out_values)>, - Results<(outs Variadic>:$ret_levels, - TensorOf<[AnyType]>:$ret_values, - Variadic:$lvl_lens, - AnyIndexingScalarLike:$val_len)> { +def SparseTensor_DisassembleOp : SparseTensor_Op<"disassemble", [Pure, SameVariadicResultSize]> { let summary = "Copies the levels and values of the given sparse tensor"; - let description = [{ The disassemble operation is the inverse of `sparse_tensor::assemble`. It copies the per-level position and coordinate arrays together with @@ -143,6 +135,13 @@ def SparseTensor_DisassembleOp : SparseTensor_Op<"disassemble", [Pure, SameVaria ``` }]; + let arguments = (ins AnySparseTensor:$tensor, + Variadic>:$out_levels, + TensorOf<[AnyType]>:$out_values); + let results = (outs Variadic>:$ret_levels, + TensorOf<[AnyType]>:$ret_values, + Variadic:$lvl_lens, + AnyIndexingScalarLike:$val_len); let assemblyFormat = "$tensor attr-dict `:` type($tensor)" "`out_lvls` `(` $out_levels `:` type($out_levels) `)` " @@ -154,9 +153,7 @@ def SparseTensor_DisassembleOp : SparseTensor_Op<"disassemble", [Pure, SameVaria } def SparseTensor_ConvertOp : SparseTensor_Op<"convert", - [Pure, StageWithSortSparseOpInterface]>, - Arguments<(ins AnyTensor:$source)>, - Results<(outs AnyTensor:$dest)> { + [Pure, StageWithSortSparseOpInterface]> { string summary = "Converts between different tensor types"; string description = [{ Converts one sparse or dense tensor type to another tensor type. The rank @@ -197,20 +194,22 @@ def SparseTensor_ConvertOp : SparseTensor_Op<"convert", }]; + let arguments = (ins AnyTensor:$source); + let results = (outs AnyTensor:$dest); + let assemblyFormat = "$source attr-dict `:` type($source) `to` type($dest)"; + let extraClassDeclaration = [{ // Whether the convert can be done by a single step or it would require // an extra sort. Inherited from StageWithSortSparseOpInterface. bool needsExtraSort(); }]; - let assemblyFormat = "$source attr-dict `:` type($source) `to` type($dest)"; let hasFolder = 1; let hasVerifier = 1; } -def SparseTensor_ReinterpretMapOp : SparseTensor_Op<"reinterpret_map", [NoMemoryEffect]>, - Arguments<(ins AnySparseTensor:$source)>, - Results<(outs AnySparseTensor:$dest)> { +def SparseTensor_ReinterpretMapOp : SparseTensor_Op<"reinterpret_map", + [NoMemoryEffect]> { let summary = "Reinterprets the dimension/level maps of the source tensor"; let description = [{ Reinterprets the dimension-to-level and level-to-dimension map specified in @@ -248,19 +247,20 @@ def SparseTensor_ReinterpretMapOp : SparseTensor_Op<"reinterpret_map", [NoMemory ``` }]; + let arguments = (ins AnySparseTensor:$source); + let results = (outs AnySparseTensor:$dest); + let assemblyFormat = "$source attr-dict `:` type($source) `to` type($dest)"; + let builders = [ OpBuilder<(ins "SparseTensorEncodingAttr":$dstEnc, "Value":$source)> ]; - let assemblyFormat = "$source attr-dict `:` type($source) `to` type($dest)"; let hasFolder = 1; let hasVerifier = 1; } def SparseTensor_ToPositionsOp : SparseTensor_Op<"positions", - [Pure, DeclareOpInterfaceMethods]>, - Arguments<(ins AnySparseTensor:$tensor, LevelAttr:$level)>, - Results<(outs AnyNon0RankedMemRef:$result)> { + [Pure, DeclareOpInterfaceMethods]> { let summary = "Extracts the `level`-th positions array of the `tensor`"; let description = [{ Returns the positions array of the tensor's storage at the given @@ -280,14 +280,16 @@ def SparseTensor_ToPositionsOp : SparseTensor_Op<"positions", : tensor<64x64xf64, #CSR> to memref ``` }]; + + let arguments = (ins AnySparseTensor:$tensor, LevelAttr:$level); + let results = (outs AnyNon0RankedMemRef:$result); let assemblyFormat = "$tensor attr-dict `:` type($tensor) `to` type($result)"; + let hasVerifier = 1; } def SparseTensor_ToCoordinatesOp : SparseTensor_Op<"coordinates", - [Pure, DeclareOpInterfaceMethods]>, - Arguments<(ins AnySparseTensor:$tensor, LevelAttr:$level)>, - Results<(outs AnyNon0RankedMemRef:$result)> { + [Pure, DeclareOpInterfaceMethods]> { let summary = "Extracts the `level`-th coordinates array of the `tensor`"; let description = [{ Returns the coordinates array of the tensor's storage at the given @@ -307,14 +309,16 @@ def SparseTensor_ToCoordinatesOp : SparseTensor_Op<"coordinates", : tensor<64x64xf64, #CSR> to memref ``` }]; + + let arguments = (ins AnySparseTensor:$tensor, LevelAttr:$level); + let results = (outs AnyNon0RankedMemRef:$result); let assemblyFormat = "$tensor attr-dict `:` type($tensor) `to` type($result)"; + let hasVerifier = 1; } def SparseTensor_ToCoordinatesBufferOp : SparseTensor_Op<"coordinates_buffer", - [Pure, DeclareOpInterfaceMethods]>, - Arguments<(ins AnySparseTensor:$tensor)>, - Results<(outs AnyNon0RankedMemRef:$result)> { + [Pure, DeclareOpInterfaceMethods]> { let summary = "Extracts the linear coordinates array from a tensor"; let description = [{ Returns the linear coordinates array for a sparse tensor with @@ -339,14 +343,16 @@ def SparseTensor_ToCoordinatesBufferOp : SparseTensor_Op<"coordinates_buffer", : tensor<64x64xf64, #COO> to memref ``` }]; + + let arguments = (ins AnySparseTensor:$tensor); + let results = (outs AnyNon0RankedMemRef:$result); let assemblyFormat = "$tensor attr-dict `:` type($tensor) `to` type($result)"; + let hasVerifier = 1; } def SparseTensor_ToValuesOp : SparseTensor_Op<"values", - [Pure, DeclareOpInterfaceMethods]>, - Arguments<(ins AnySparseTensor:$tensor)>, - Results<(outs AnyNon0RankedMemRef:$result)> { + [Pure, DeclareOpInterfaceMethods]> { let summary = "Extracts numerical values array from a tensor"; let description = [{ Returns the values array of the sparse storage format for the given @@ -365,13 +371,15 @@ def SparseTensor_ToValuesOp : SparseTensor_Op<"values", %1 = sparse_tensor.values %0 : tensor<64x64xf64, #CSR> to memref ``` }]; + + let arguments = (ins AnySparseTensor:$tensor); + let results = (outs AnyNon0RankedMemRef:$result); let assemblyFormat = "$tensor attr-dict `:` type($tensor) `to` type($result)"; + let hasVerifier = 1; } -def SparseTensor_NumberOfEntriesOp : SparseTensor_Op<"number_of_entries", [Pure]>, - Arguments<(ins AnySparseTensor:$tensor)>, - Results<(outs Index:$result)> { +def SparseTensor_NumberOfEntriesOp : SparseTensor_Op<"number_of_entries", [Pure]> { let summary = "Returns the number of entries that are stored in the tensor."; let description = [{ Returns the number of entries that are stored in the given sparse tensor. @@ -385,14 +393,14 @@ def SparseTensor_NumberOfEntriesOp : SparseTensor_Op<"number_of_entries", [Pure] %noe = sparse_tensor.number_of_entries %tensor : tensor<64x64xf64, #CSR> ``` }]; + + let arguments = (ins AnySparseTensor:$tensor); + let results = (outs Index:$result); let assemblyFormat = "$tensor attr-dict `:` type($tensor)"; } def SparseTensor_ConcatenateOp : SparseTensor_Op<"concatenate", - [Pure, StageWithSortSparseOpInterface]>, - Arguments<(ins Variadic:$inputs, DimensionAttr:$dimension)>, - Results<(outs AnyRankedTensor:$result)> { - + [Pure, StageWithSortSparseOpInterface]> { let summary = "Concatenates a list of tensors into a single tensor."; let description = [{ Concatenates a list input tensors and the output tensor with the same @@ -418,13 +426,14 @@ def SparseTensor_ConcatenateOp : SparseTensor_Op<"concatenate", bool needsExtraSort(); }]; + let arguments = (ins Variadic:$inputs, DimensionAttr:$dimension); + let results = (outs AnyRankedTensor:$result); let assemblyFormat = "$inputs attr-dict `:` type($inputs) `to` type($result)"; + let hasVerifier = 1; } -def SparseTensor_ToSliceOffsetOp : SparseTensor_Op<"slice.offset", [Pure]>, - Arguments<(ins AnySparseTensorSlice:$slice, IndexAttr:$dim)>, - Results<(outs Index:$offset)> { +def SparseTensor_ToSliceOffsetOp : SparseTensor_Op<"slice.offset", [Pure]> { let summary = "Extracts the offset of the sparse tensor slice at the given dimension"; let description = [{ Extracts the offset of the sparse tensor slice at the given dimension. @@ -445,13 +454,15 @@ def SparseTensor_ToSliceOffsetOp : SparseTensor_Op<"slice.offset", [Pure]>, // %2 = %v2 ``` }]; + + let arguments = (ins AnySparseTensorSlice:$slice, IndexAttr:$dim); + let results = (outs Index:$offset); let assemblyFormat = "$slice `at` $dim attr-dict `:` type($slice)"; + let hasVerifier = 1; } -def SparseTensor_ToSliceStrideOp : SparseTensor_Op<"slice.stride", [Pure]>, - Arguments<(ins AnySparseTensorSlice:$slice, IndexAttr:$dim)>, - Results<(outs Index:$stride)> { +def SparseTensor_ToSliceStrideOp : SparseTensor_Op<"slice.stride", [Pure]> { let summary = "Extracts the stride of the sparse tensor slice at the given dimension"; let description = [{ Extracts the stride of the sparse tensor slice at the given dimension. @@ -473,7 +484,11 @@ def SparseTensor_ToSliceStrideOp : SparseTensor_Op<"slice.stride", [Pure]>, ``` }]; + + let arguments = (ins AnySparseTensorSlice:$slice, IndexAttr:$dim); + let results = (outs Index:$stride); let assemblyFormat = "$slice `at` $dim attr-dict `:` type($slice)"; + let hasVerifier = 1; } @@ -482,9 +497,7 @@ def SparseTensor_ToSliceStrideOp : SparseTensor_Op<"slice.stride", [Pure]>, //===----------------------------------------------------------------------===// def SparseTensor_StorageSpecifierInitOp : SparseTensor_Op<"storage_specifier.init", - [Pure]>, - Arguments<(ins Optional:$source)>, - Results<(outs SparseTensorStorageSpecifier:$result)> { + [Pure]> { let summary = ""; let description = [{ Returns an initial storage specifier value. A storage specifier @@ -515,6 +528,10 @@ def SparseTensor_StorageSpecifierInitOp : SparseTensor_Op<"storage_specifier.ini ``` }]; + let arguments = (ins Optional:$source); + let results = (outs SparseTensorStorageSpecifier:$result); + let assemblyFormat = "attr-dict (`with` $source^)? `:` (`from` qualified(type($source))^ `to`)?" + " qualified(type($result))"; let builders = [ OpBuilder<(ins "Type":$result), [{ @@ -522,15 +539,10 @@ def SparseTensor_StorageSpecifierInitOp : SparseTensor_Op<"storage_specifier.ini }]> ]; - let assemblyFormat = "attr-dict (`with` $source^)? `:` (`from` qualified(type($source))^ `to`)?" - " qualified(type($result))"; + } -def SparseTensor_GetStorageSpecifierOp : SparseTensor_Op<"storage_specifier.get", [Pure]>, - Arguments<(ins SparseTensorStorageSpecifier:$specifier, - SparseTensorStorageSpecifierKindAttr:$specifierKind, - OptionalAttr:$level)>, - Results<(outs Index:$result)> { +def SparseTensor_GetStorageSpecifierOp : SparseTensor_Op<"storage_specifier.get", [Pure]> { let summary = ""; let description = [{ Returns the requested field of the given storage_specifier. @@ -543,19 +555,19 @@ def SparseTensor_GetStorageSpecifierOp : SparseTensor_Op<"storage_specifier.get" ``` }]; + let arguments = (ins SparseTensorStorageSpecifier:$specifier, + SparseTensorStorageSpecifierKindAttr:$specifierKind, + OptionalAttr:$level); + let results = (outs Index:$result); let assemblyFormat = "$specifier $specifierKind (`at` $level^)? attr-dict" "`:` qualified(type($specifier))"; + let hasVerifier = 1; let hasFolder = 1; } def SparseTensor_SetStorageSpecifierOp : SparseTensor_Op<"storage_specifier.set", - [Pure, AllTypesMatch<["result", "specifier"]>]>, - Arguments<(ins SparseTensorStorageSpecifier:$specifier, - SparseTensorStorageSpecifierKindAttr:$specifierKind, - OptionalAttr:$level, - Index:$value)>, - Results<(outs SparseTensorStorageSpecifier:$result)> { + [Pure, AllTypesMatch<["result", "specifier"]>]> { let summary = ""; let description = [{ Set the field of the storage specifier to the given input value. Returns @@ -568,8 +580,15 @@ def SparseTensor_SetStorageSpecifierOp : SparseTensor_Op<"storage_specifier.set" : !sparse_tensor.storage_specifier<#COO> ``` }]; + + let arguments = (ins SparseTensorStorageSpecifier:$specifier, + SparseTensorStorageSpecifierKindAttr:$specifierKind, + OptionalAttr:$level, + Index:$value); + let results = (outs SparseTensorStorageSpecifier:$result); let assemblyFormat = "$specifier $specifierKind (`at` $level^)? `with` $value" " attr-dict `:` qualified(type($result))"; + let hasVerifier = 1; } @@ -577,9 +596,7 @@ def SparseTensor_SetStorageSpecifierOp : SparseTensor_Op<"storage_specifier.set" // Sparse Tensor Coordinate Operations. //===----------------------------------------------------------------------===// -def SparseTensor_LvlOp : SparseTensor_Op<"lvl", [ConditionallySpeculatable, NoMemoryEffect]>, - Arguments<(ins AnySparseTensor:$source, Index:$index)>, - Results<(outs Index:$result)> { +def SparseTensor_LvlOp : SparseTensor_Op<"lvl", [ConditionallySpeculatable, NoMemoryEffect]> { let summary = "level index operation"; let description = [{ The `sparse_tensor.lvl` behaves similar to `tensor.dim` operation. @@ -615,9 +632,9 @@ def SparseTensor_LvlOp : SparseTensor_Op<"lvl", [ConditionallySpeculatable, NoMe ``` }]; - let assemblyFormat = [{ - attr-dict $source `,` $index `:` type($source) - }]; + let arguments = (ins AnySparseTensor:$source, Index:$index); + let results = (outs Index:$result); + let assemblyFormat = "attr-dict $source `,` $index `:` type($source) "; let builders = [ OpBuilder<(ins "Value":$source, "int64_t":$index)> @@ -635,11 +652,7 @@ def SparseTensor_LvlOp : SparseTensor_Op<"lvl", [ConditionallySpeculatable, NoMe let hasFolder = 1; } -def SparseTensor_CrdTranslateOp : SparseTensor_Op<"crd_translate", [Pure]>, - Arguments<(ins Variadic:$in_crds, - SparseTensorCrdTransDirectionAttr:$direction, - SparseTensorEncodingAttr:$encoder)>, - Results<(outs Variadic:$out_crds)> { +def SparseTensor_CrdTranslateOp : SparseTensor_Op<"crd_translate", [Pure]> { string summary = "Performs coordinate translation between level and dimension coordinate space."; string description = [{ Performs coordinate translation between level and dimension coordinate space according @@ -652,7 +665,13 @@ def SparseTensor_CrdTranslateOp : SparseTensor_Op<"crd_translate", [Pure]>, : index, index, index, index ``` }]; + + let arguments = (ins Variadic:$in_crds, + SparseTensorCrdTransDirectionAttr:$direction, + SparseTensorEncodingAttr:$encoder); + let results = (outs Variadic:$out_crds); let assemblyFormat = "$direction `[` $in_crds `]` `as` $encoder attr-dict `:` type($out_crds)"; + let hasVerifier = 1; let hasFolder = 1; } @@ -669,13 +688,7 @@ def SparseTensor_PushBackOp : SparseTensor_Op<"push_back", [TypesMatchWith<"value type matches element type of inBuffer", "inBuffer", "value", "::llvm::cast($_self).getElementType()">, - AllTypesMatch<["inBuffer", "outBuffer"]>]>, - Arguments<(ins Index:$curSize, - StridedMemRefRankOf<[AnyType], [1]>:$inBuffer, - AnyType:$value, Optional:$n, - UnitAttr:$inbounds)>, - Results<(outs StridedMemRefRankOf<[AnyType], [1]>:$outBuffer, - Index:$newSize)> { + AllTypesMatch<["inBuffer", "outBuffer"]>]> { string summary = "Pushes a value to the back of a given buffer"; string description = [{ Pushes `value` to the end of the given sparse tensor storage buffer @@ -719,6 +732,13 @@ def SparseTensor_PushBackOp : SparseTensor_Op<"push_back", : xindex, memref, f64 ``` }]; + + let arguments = (ins Index:$curSize, + StridedMemRefRankOf<[AnyType], [1]>:$inBuffer, + AnyType:$value, Optional:$n, + UnitAttr:$inbounds); + let results = (outs StridedMemRefRankOf<[AnyType], [1]>:$outBuffer, + Index:$newSize); let assemblyFormat = "(`inbounds` $inbounds^)? $curSize `,` $inBuffer" " `,` $value (`,` $n^ )? attr-dict `:`" " type($curSize) `,` type($inBuffer) `,`" @@ -732,12 +752,7 @@ def SparseTensor_PushBackOp : SparseTensor_Op<"push_back", let hasVerifier = 1; } -def SparseTensor_ExpandOp : SparseTensor_Op<"expand", []>, - Arguments<(ins AnySparseTensor:$tensor)>, - Results<(outs AnyStridedMemRefOfRank<1>:$values, - StridedMemRefRankOf<[I1],[1]>:$filled, - StridedMemRefRankOf<[Index],[1]>:$added, - Index:$count)> { +def SparseTensor_ExpandOp : SparseTensor_Op<"expand", []> { string summary = "Expands an access pattern for insertion"; string description = [{ Performs an access pattern expansion for the innermost levels of the @@ -771,19 +786,19 @@ def SparseTensor_ExpandOp : SparseTensor_Op<"expand", []>, : tensor<4x4xf64, #CSR> to memref, memref, memref ``` }]; + + + let arguments = (ins AnySparseTensor:$tensor); + let results = (outs AnyStridedMemRefOfRank<1>:$values, + StridedMemRefRankOf<[I1],[1]>:$filled, + StridedMemRefRankOf<[Index],[1]>:$added, + Index:$count); let assemblyFormat = "$tensor attr-dict `:` type($tensor) `to` type($values)" " `,` type($filled) `,` type($added)"; } def SparseTensor_CompressOp : SparseTensor_Op<"compress", - [AllTypesMatch<["tensor", "result"]>]>, - Arguments<(ins AnyStridedMemRefOfRank<1>:$values, - StridedMemRefRankOf<[I1],[1]>:$filled, - StridedMemRefRankOf<[Index],[1]>:$added, - Index:$count, - AnySparseTensor:$tensor, - Variadic:$lvlCoords)>, - Results<(outs AnySparseTensor:$result)> { + [AllTypesMatch<["tensor", "result"]>]> { string summary = "Compressed an access pattern for insertion"; string description = [{ Finishes a single access pattern expansion by moving inserted elements @@ -807,6 +822,14 @@ def SparseTensor_CompressOp : SparseTensor_Op<"compress", : memref, memref, memref, tensor<4x4xf64, #CSR> ``` }]; + + let arguments = (ins AnyStridedMemRefOfRank<1>:$values, + StridedMemRefRankOf<[I1],[1]>:$filled, + StridedMemRefRankOf<[Index],[1]>:$added, + Index:$count, + AnySparseTensor:$tensor, + Variadic:$lvlCoords); + let results = (outs AnySparseTensor:$result); let assemblyFormat = "$values `,` $filled `,` $added `,` $count" " `into` $tensor `[` $lvlCoords `]` attr-dict" " `:` type($values) `,` type($filled) `,` type($added)" @@ -814,9 +837,7 @@ def SparseTensor_CompressOp : SparseTensor_Op<"compress", let hasVerifier = 1; } -def SparseTensor_LoadOp : SparseTensor_Op<"load", [SameOperandsAndResultType]>, - Arguments<(ins AnySparseTensor:$tensor, UnitAttr:$hasInserts)>, - Results<(outs AnyTensor:$result)> { +def SparseTensor_LoadOp : SparseTensor_Op<"load", [SameOperandsAndResultType]> { let summary = "Rematerializes tensor from underlying sparse storage format"; let description = [{ @@ -845,11 +866,13 @@ def SparseTensor_LoadOp : SparseTensor_Op<"load", [SameOperandsAndResultType]>, %1 = sparse_tensor.load %0 hasInserts : tensor<16x32xf32, #CSR> ``` }]; + + let arguments = (ins AnySparseTensor:$tensor, UnitAttr:$hasInserts); + let results = (outs AnyTensor:$result); let assemblyFormat = "$tensor (`hasInserts` $hasInserts^)? attr-dict `:` type($tensor)"; } -def SparseTensor_OutOp : SparseTensor_Op<"out", []>, - Arguments<(ins AnySparseTensor:$tensor, AnyType:$dest)> { +def SparseTensor_OutOp : SparseTensor_Op<"out", []> { string summary = "Outputs a sparse tensor to the given destination"; string description = [{ Outputs the contents of a sparse tensor to the destination defined by an @@ -868,6 +891,8 @@ def SparseTensor_OutOp : SparseTensor_Op<"out", []>, sparse_tensor.out %t, %dest : tensor<1024x1024xf64, #CSR>, !Dest ``` }]; + + let arguments = (ins AnySparseTensor:$tensor, AnyType:$dest); let assemblyFormat = "$tensor `,` $dest attr-dict `:` type($tensor) `,` type($dest)"; } @@ -875,11 +900,7 @@ def SparseTensor_OutOp : SparseTensor_Op<"out", []>, // Sparse Tensor Sorting/Ordering Operations. //===----------------------------------------------------------------------===// -def SparseTensor_SortOp : SparseTensor_Op<"sort">, - Arguments<(ins Index:$n, StridedMemRefRankOf<[AnyInteger, Index], [1]>:$xy, - Variadic>:$ys, - AffineMapAttr:$perm_map, OptionalAttr:$ny, - SparseTensorSortKindAttr:$algorithm)> { +def SparseTensor_SortOp : SparseTensor_Op<"sort"> { let summary = "Sorts the arrays in xs and ys lexicographically on the " "integral values found in the xs list"; let description = [{ @@ -904,16 +925,18 @@ def SparseTensor_SortOp : SparseTensor_Op<"sort">, ``` }]; + let arguments = (ins Index:$n, + StridedMemRefRankOf<[AnyInteger, Index], [1]>:$xy, + Variadic>:$ys, + AffineMapAttr:$perm_map, OptionalAttr:$ny, + SparseTensorSortKindAttr:$algorithm); let assemblyFormat = "$algorithm $n" "`,`$xy (`jointly` $ys^)? attr-dict" "`:` type($xy) (`jointly` type($ys)^)?"; let hasVerifier = 1; } -def SparseTensor_ReorderCOOOp : SparseTensor_Op<"reorder_coo", [Pure]>, - Arguments<(ins AnySparseTensor: $input_coo, - SparseTensorSortKindAttr:$algorithm)>, - Results<(outs AnySparseTensor: $result_coo)> { +def SparseTensor_ReorderCOOOp : SparseTensor_Op<"reorder_coo", [Pure]> { let summary = "Reorder the input COO such that it has the the same order as " "the output COO"; let description = [{ @@ -933,6 +956,9 @@ def SparseTensor_ReorderCOOOp : SparseTensor_Op<"reorder_coo", [Pure]>, ``` }]; + let arguments = (ins AnySparseTensor: $input_coo, + SparseTensorSortKindAttr:$algorithm); + let results = (outs AnySparseTensor: $result_coo); let assemblyFormat = "$algorithm $input_coo attr-dict" "`:` type($input_coo) `to` type($result_coo)"; @@ -944,9 +970,7 @@ def SparseTensor_ReorderCOOOp : SparseTensor_Op<"reorder_coo", [Pure]>, // Sparse Tensor Syntax Operations. //===----------------------------------------------------------------------===// -def SparseTensor_BinaryOp : SparseTensor_Op<"binary", [Pure]>, - Arguments<(ins AnyType:$x, AnyType:$y, UnitAttr:$left_identity, UnitAttr:$right_identity)>, - Results<(outs AnyType:$output)> { +def SparseTensor_BinaryOp : SparseTensor_Op<"binary", [Pure]> { let summary = "Binary set operation utilized within linalg.generic"; let description = [{ Defines a computation within a `linalg.generic` operation that takes two @@ -1054,18 +1078,24 @@ def SparseTensor_BinaryOp : SparseTensor_Op<"binary", [Pure]>, }]; let regions = (region AnyRegion:$overlapRegion, AnyRegion:$leftRegion, AnyRegion:$rightRegion); + let arguments = (ins AnyType:$x, AnyType:$y, UnitAttr:$left_identity, UnitAttr:$right_identity); + let results = (outs AnyType:$output); let assemblyFormat = [{ $x `,` $y `:` attr-dict type($x) `,` type($y) `to` type($output) `\n` `overlap` `=` $overlapRegion `\n` `left` `=` (`identity` $left_identity^):($leftRegion)? `\n` `right` `=` (`identity` $right_identity^):($rightRegion)? }]; + let hasVerifier = 1; } -def SparseTensor_UnaryOp : SparseTensor_Op<"unary", [Pure]>, - Arguments<(ins AnyType:$x)>, - Results<(outs AnyType:$output)> { +def SparseTensor_UnaryOp : SparseTensor_Op<"unary", [Pure]> { + + let arguments = (ins AnyType:$x); + + let results = (outs AnyType:$output); + let summary = "Unary set operation utilized within linalg.generic"; let description = [{ Defines a computation with a `linalg.generic` operation that takes a single @@ -1162,9 +1192,7 @@ def SparseTensor_UnaryOp : SparseTensor_Op<"unary", [Pure]>, let hasVerifier = 1; } -def SparseTensor_ReduceOp : SparseTensor_Op<"reduce", [Pure, SameOperandsAndResultType]>, - Arguments<(ins AnyType:$x, AnyType:$y, AnyType:$identity)>, - Results<(outs AnyType:$output)> { +def SparseTensor_ReduceOp : SparseTensor_Op<"reduce", [Pure, SameOperandsAndResultType]> { let summary = "Custom reduction operation utilized within linalg.generic"; let description = [{ Defines a computation with a `linalg.generic` operation that takes two @@ -1208,16 +1236,14 @@ def SparseTensor_ReduceOp : SparseTensor_Op<"reduce", [Pure, SameOperandsAndResu }]; let regions = (region SizedRegion<1>:$region); + let arguments = (ins AnyType:$x, AnyType:$y, AnyType:$identity); + let results = (outs AnyType:$output); + let assemblyFormat = "$x `,` $y `,` $identity attr-dict `:` type($output) $region"; - let assemblyFormat = [{ - $x `,` $y `,` $identity attr-dict `:` type($output) $region - }]; let hasVerifier = 1; } -def SparseTensor_SelectOp : SparseTensor_Op<"select", [Pure, SameOperandsAndResultType]>, - Arguments<(ins AnyType:$x)>, - Results<(outs AnyType:$output)> { +def SparseTensor_SelectOp : SparseTensor_Op<"select", [Pure, SameOperandsAndResultType]> { let summary = "Select operation utilized within linalg.generic"; let description = [{ Defines an evaluation within a `linalg.generic` operation that takes a single @@ -1269,16 +1295,16 @@ def SparseTensor_SelectOp : SparseTensor_Op<"select", [Pure, SameOperandsAndResu }]; let regions = (region SizedRegion<1>:$region); - let assemblyFormat = [{ - $x attr-dict `:` type($x) $region - }]; + let arguments = (ins AnyType:$x); + let results = (outs AnyType:$output); + let assemblyFormat = "$x attr-dict `:` type($x) $region"; + let hasVerifier = 1; } def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator, ParentOneOf<["BinaryOp", "UnaryOp", "ReduceOp", "SelectOp", - "ForeachOp"]>]>, - Arguments<(ins Variadic:$results)> { + "ForeachOp"]>]> { let summary = "Yield from sparse_tensor set-like operations"; let description = [{ Yields a value from within a `binary`, `unary`, `reduce`, @@ -1319,17 +1345,12 @@ def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator, } }]; - let assemblyFormat = [{ - $results attr-dict `:` type($results) - }]; + let arguments = (ins Variadic:$results); + let assemblyFormat = "$results attr-dict `:` type($results)"; } def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", - [SingleBlockImplicitTerminator<"YieldOp">]>, - Arguments<(ins AnyTensor:$tensor, - Variadic:$initArgs, - OptionalAttr:$order)>, - Results<(outs Variadic:$results)> { + [SingleBlockImplicitTerminator<"YieldOp">]> { let summary = "Iterates over elements in a tensor"; let description = [{ Iterates over stored elements in a tensor (which are typically, but not always, @@ -1424,18 +1445,79 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", ]; let regions = (region SizedRegion<1>:$region); + let arguments = (ins AnyTensor:$tensor, + Variadic:$initArgs, + OptionalAttr:$order); + let results = (outs Variadic:$results); let assemblyFormat = "`in` $tensor (`init``(`$initArgs^`)`)? attr-dict" " `:` type($tensor) (`,` type($initArgs)^)?" " (`->` type($results)^)? `do` $region"; let hasVerifier = 1; } +//===----------------------------------------------------------------------===// +// Sparse Tensor Iteration Operations. +//===----------------------------------------------------------------------===// + +def ExtractIterSpaceOp : SparseTensor_Op<"extract_iteration_space", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Extracts an iteration space from a sparse tensor between certain levels"; + let description = [{ + Extracts a `!sparse_tensor.iter_space` from a sparse tensor between + certain (consecutive) levels. For sparse levels, it is usually done by + loading a postion range from the underlying sparse tensor storage. + E.g., for a compressed level, the iteration space is extracted by + [pos[i], pos[i+1]) supposing the the parent iterator points at `i`. + + `tensor`: the input sparse tensor that defines the iteration space. + `parentIter`: the iterator for the previous level, at which the iteration space + at the current levels will be extracted. + `loLvl`, `hiLvl`: the level range between [loLvl, hiLvl) in the input tensor that + the returned iteration space covers. `hiLvl - loLvl` defines the dimension of the + iteration space. + + The type of returned the value is automatically inferred to + `!sparse_tensor.iter_space<#INPUT_ENCODING, lvls = $loLvl to $hiLvl>`. + The returned iteration space can then be iterated over by + `sparse_tensor.iterate` operations to visit every stored element + (usually nonzeros) in the input sparse tensor. + + Example: + ```mlir + // Extracts a 1-D iteration space from a COO tensor at level 1. + %space = sparse_tensor.iteration.extract_space %sp at %it1 lvls = 1 + : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0> + ``` + }]; + + + let extraClassDeclaration = [{ + std::pair getLvlRange() { + return std::make_pair(getLoLvl(), getHiLvl()); + } + unsigned getSpaceDim() { + return getHiLvl() - getLoLvl(); + } + ArrayRef<::mlir::sparse_tensor::LevelType> getSpaceLvlTypes() { + return getResultSpace().getType().getLvlTypes(); + } + }]; + + let arguments = (ins AnySparseTensor:$tensor, + Optional:$parentIter, + LevelAttr:$loLvl, LevelAttr:$hiLvl); + let results = (outs AnySparseIterSpace:$resultSpace); + let assemblyFormat = "$tensor (`at` $parentIter^)? `lvls` `=` custom($loLvl, $hiLvl) " + " attr-dict `:` type($tensor) (`,` type($parentIter)^)?"; + + let hasVerifier = 1; +} + //===----------------------------------------------------------------------===// // Sparse Tensor Debugging and Test-Only Operations. //===----------------------------------------------------------------------===// -def SparseTensor_PrintOp : SparseTensor_Op<"print">, - Arguments<(ins AnySparseTensor:$tensor)> { +def SparseTensor_PrintOp : SparseTensor_Op<"print"> { string summary = "Prints a sparse tensor (for testing and debugging)"; string description = [{ Prints the individual components of a sparse tensors (the positions, @@ -1449,6 +1531,8 @@ def SparseTensor_PrintOp : SparseTensor_Op<"print">, sparse_tensor.print %tensor : tensor<1024x1024xf64, #CSR> ``` }]; + + let arguments = (ins AnySparseTensor:$tensor); let assemblyFormat = "$tensor attr-dict `:` type($tensor)"; } diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td index 185cff46ae25d5..79113d8778743c 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td @@ -72,4 +72,101 @@ def SparseTensorStorageSpecifier : Type($_self)">, "metadata", "::mlir::sparse_tensor::StorageSpecifierType">; +//===----------------------------------------------------------------------===// +// Sparse Tensor Iteration Types. +//===----------------------------------------------------------------------===// + +def SparseTensor_IterSpace : SparseTensor_Type<"IterSpace"> { + let mnemonic = "iter_space"; + + let description = [{ + A sparse iteration space that represents an abstract N-D (sparse) iteration space + extracted from a sparse tensor, i.e., a set of (crd_0, crd_1, ..., crd_N) for + every stored element (usually nonzeros) in a sparse tensor between the specified + [$loLvl, $hiLvl) levels. + + Examples: + + ```mlir + // An iteration space extracted from a CSR tensor between levels [0, 2). + !iter_space<#CSR, lvls = 0 to 2> + ``` + }]; + + let parameters = (ins + SparseTensorEncodingAttr : $encoding, + "Level" : $loLvl, + "Level" : $hiLvl + ); + + let extraClassDeclaration = [{ + /// The the dimension of the iteration space. + unsigned getSpaceDim() const { + return getHiLvl() - getLoLvl(); + } + + /// Get the level types for the iteration space. + ArrayRef getLvlTypes() const { + return getEncoding().getLvlTypes().slice(getLoLvl(), getSpaceDim()); + } + + /// Whether the iteration space is unique (i.e., no duplicated coordinate). + bool isUnique() { + return !getLvlTypes().back().isa(); + } + + /// Get the corresponding iterator type. + ::mlir::sparse_tensor::IteratorType getIteratorType() const; + }]; + + let assemblyFormat="`<` $encoding `,` `lvls` `=` custom($loLvl, $hiLvl) `>`"; +} + +def SparseTensor_Iterator : SparseTensor_Type<"Iterator"> { + let mnemonic = "iterator"; + + let description = [{ + An iterator that points to the current element in the corresponding iteration space. + + Examples: + + ```mlir + // An iterator that iterates over a iteration space of type `!iter_space<#CSR, lvls = 0 to 2>` + !iterator<#CSR, lvls = 0 to 2> + ``` + }]; + + let parameters = (ins + SparseTensorEncodingAttr : $encoding, + "Level" : $loLvl, + "Level" : $hiLvl + ); + + let extraClassDeclaration = [{ + /// Get the corresponding iteration space type. + ::mlir::sparse_tensor::IterSpaceType getIterSpaceType() const; + + unsigned getSpaceDim() const { return getIterSpaceType().getSpaceDim(); } + ArrayRef getLvlTypes() const { return getIterSpaceType().getLvlTypes(); } + bool isUnique() { return getIterSpaceType().isUnique(); } + }]; + + let assemblyFormat="`<` $encoding `,` `lvls` `=` custom($loLvl, $hiLvl) `>`"; +} + +def IsSparseSparseIterSpaceTypePred + : CPred<"::llvm::isa<::mlir::sparse_tensor::IterSpaceType>($_self)">; + +def IsSparseSparseIteratorTypePred + : CPred<"::llvm::isa<::mlir::sparse_tensor::IteratorType>($_self)">; + +def AnySparseIterSpace + : Type; + +def AnySparseIterator + : Type; + + #endif // SPARSETENSOR_TYPES diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h index 87aabdc015fea5..eca9255ff3974b 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h @@ -12,6 +12,7 @@ #include "mlir/Bytecode/BytecodeOpInterface.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" +#include "mlir/IR/TypeUtilities.h" #include "mlir/Interfaces/ShapedOpInterfaces.h" #include "mlir/Interfaces/SideEffectInterfaces.h" #include "mlir/Interfaces/ViewLikeInterface.h" diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td index cd38549f1ccf43..6579d07ec26215 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -19,17 +19,36 @@ class XeGPUAttr traits = [], } def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> { + let summary = [{a composite attribute for `TensorDescType`}]; + let description = [{`TensorDescAttr` (or `tdesc_attr`) is a composite + attribute defined for `TensorDescType` for describing following + properties of a `TensorDesc`. + 1. `memory_scope`: It describes where the data block described by the + TensorDesc is located, `Global` device memory or `Shared` local memory. + It is default to `Global`. + 2. `array_length`: It describes how many horizontally consecutive blocks + will be loaded by a hardware load instruction. If the TensorDesc shape + is 8x16, with array_length = 2. The loaded block shape will be acctually + 8x32. Its default value is 1. + 3. `boundary_check`: It is used to indicates the hardware whether to do + out-of-boundary check. The default value is true. + 4. `scattered`: It is used to differenciate TensorDescs created from + `create_nd_tdesc` vs from `create_tdesc`. + }]; + let parameters = (ins OptionalParameter<"MemoryScopeAttr">: $memory_scope, OptionalParameter<"IntegerAttr", "1">: $array_length, - OptionalParameter<"BoolAttr", "true">: $boundary_check + OptionalParameter<"BoolAttr", "true">: $boundary_check, + OptionalParameter<"BoolAttr", "false">: $scattered ); let builders = [ AttrBuilder<(ins CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope, CArg<"int", "1">:$array_length, - CArg<"bool", "true">: $boundary_check + CArg<"bool", "true">: $boundary_check, + CArg<"bool", "false">: $scattered )> ]; @@ -41,15 +60,17 @@ def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> { //===----------------------------------------------------------------------===// def XeGPU_MemoryScopeGlobal: I32EnumAttrCase<"Global", 0, "global">; def XeGPU_MemoryScopeShared: I32EnumAttrCase<"SLM", 1, "slm">; -def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope", - "The address space of the memory the tensor descritor is created for", +def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope", + "The address space of the memory the tensor descritor is created for", [XeGPU_MemoryScopeGlobal, XeGPU_MemoryScopeShared]> { let genSpecializedAttr = 0; let cppNamespace = "::mlir::xegpu"; } -def XeGPU_MemoryScopeAttr: +def XeGPU_MemoryScopeAttr: EnumAttr { + let summary = [{Describe the location of data described by a `TensorDesc`: + Global device memory (`Global`) or Shared local memory (`SLM`).}]; let assemblyFormat = "$value"; } @@ -63,19 +84,18 @@ def XeGPU_CachePolicyInvalid: I32EnumAttrCase<"READ_INVALIDATE", 3, "read_ def XeGPU_CachePolicyWriteBack: I32EnumAttrCase<"WRITE_BACK", 4, "write_back">; // valid for write only def XeGPU_CachePolicyWriteThrough: I32EnumAttrCase<"WRITE_THROUGH", 5, "write_through">; // valid for write only -def XeGPU_CachePolicyEnums : I32EnumAttr<"CachePolicy", "Cache policy", - [XeGPU_CachePolicyCached, XeGPU_CachePolicyUncached, +def XeGPU_CachePolicyEnums : I32EnumAttr<"CachePolicy", "Cache policy", + [XeGPU_CachePolicyCached, XeGPU_CachePolicyUncached, XeGPU_CachePolicyStreaming, XeGPU_CachePolicyInvalid, XeGPU_CachePolicyWriteBack, XeGPU_CachePolicyWriteThrough]> { let genSpecializedAttr = 0; let cppNamespace = "::mlir::xegpu"; } -def XeGPU_CacheHintAttr +def XeGPU_CacheHintAttr : EnumAttr { + let summary = [{Describe the cache settings for prefetch/load/store operators}]; let assemblyFormat = "`<` $value `>`"; } - - -#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD +#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD \ No newline at end of file diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index b8ebd1a40c6073..c6f7f83441b96c 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -47,36 +47,35 @@ class XeGPU_Op traits = []>: } -def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface, +def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface, AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface]> { let summary = "Create nd-tensor descriptor operation"; let description = [{ The "create_nd_tdesc" operation creates a TensorDescType which represents a sub-view of a 2D memory region (It can be extended to support n-D memory - region if needed in future). Elements in the subview continuous in each - dimention. It encodes the following important information for supporting + region if needed in future). Elements in the subview continuous in each + dimension. It encodes the following important information for supporting Intel hardware features: - * source: an object representing (starting address/pointer of) a 2D memory region. + * source: an object representing (starting address/pointer of) a 2D memory region. It can be either a 2D memref object, or simply a pointer represented by uint64_t type. - for the later case, the shape and layout information of the 2D memory region should - be explicitly passed via `dynamic_shape` and `dynamic_strides` parameters. - * offsets: two index values represents offsets from the "source" at the each dimension + for the later case, the shape and layout information of the 2D memory region should + be explicitly passed via `shape` and `strides` parameters. + * offsets: two index values represents offsets from the "source" at the each dimension at which the subview of the target memory will be created. It is encoded via two - variables, including "dynamic_offsets" and "static_offsets", such that it can - accept various forms, such as, operands (e.g., [%c0, %c]) and attributes (e.g., [2, 4])). - * shape: the shape information of the memory region pointed by the "source". It is - typically encoded via the MemRefType of the source, e.g., memref<4096x4096xf16>. - But if "source" is simply a pointer represented as uint64_t type, or a memref - type without shape information e.g., memref, the shape information has - to be explicitly passed via the "dynamic_shape" argument. Currently "dynamic_shape" - only accepts operands(e.g., [%c4096, %c4096]), not attributes(e.g., [4096, 4096]). - * strides: the strides of the memory region pointed by the "source". Similar to shape, - it is typically encoded via the MemRefType of the source too. But if "source" is - simply a pointer represented as uint64_t type, or a memref type without shape - information e.g., memref, the strides information has to be explicitly - passed via the "dynamic_strides" argument. And it currently only accepts operands two. + variables, including "offsets" and "const_offsets", such that it can + accept various forms, such as, operands (e.g., [%c0, %c]) and attributes (e.g., [2, 4]). + * shape: the shape information of the memory region pointed by the "source". It is + typically encoded via the MemRefType of the source, e.g., memref<4096x4096xf16>. + But if "source" is simply a pointer represented as uint64_t type, or a memref + type without shape information e.g., memref, the shape information has + to be explicitly passed via the "shape" and "const_shape" arguments. + * strides: the strides of the memory region pointed by the "source". Similar to shape, + it is typically encoded via the MemRefType of the source too. But if "source" is + simply a pointer represented as uint64_t type, or a memref type without shape + information e.g., memref, the strides information has to be explicitly + passed via the "strides" and "const_strides" argument. Example 1 (suppose the tensor shape inferred by the compiler is 8x16): %0 = memref.alloc() : memref<1024x1024xf32> @@ -97,10 +96,10 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface %1 = xegpu.create_nd_tdesc %0[%c0, %c0], [%h, %w], [%w, %c1]: ui64 -> TensorDesc<8x16xf32> }]; - let arguments = (ins - XeGPU_BaseAddrType: $source, - Variadic: $offsets, - Variadic: $shape, + let arguments = (ins + XeGPU_BaseAddrType: $source, + Variadic: $offsets, + Variadic: $shape, Variadic: $strides, DenseI64ArrayAttr: $const_offsets, OptionalAttr: $const_shape, @@ -119,12 +118,12 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface let hasVerifier = 1; let builders = [ - OpBuilder<(ins "Type": $tdesc, "TypedValue": $source, + OpBuilder<(ins "Type": $tdesc, "TypedValue": $source, "llvm::ArrayRef": $offsets)>, - OpBuilder<(ins "Type": $tdesc, "TypedValue ": $source, + OpBuilder<(ins "Type": $tdesc, "TypedValue ": $source, "llvm::ArrayRef": $offsets, - "llvm::ArrayRef": $shape, + "llvm::ArrayRef": $shape, "llvm::ArrayRef": $strides)> ]; @@ -159,41 +158,41 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface } /// wrapper for matching with OffsetSizeAndStrideOpInterface - /// If source is IntegerType or `const_shape` is filled, + /// If source is IntegerType or `const_shape` is filled, /// it will return `const_shape`, such that mixes of `shape` - /// and `const_shape` will be used to represent the shape of + /// and `const_shape` will be used to represent the shape of /// source operand. They overide static shape from source memref type. ArrayRef getStaticSizes() { auto attr = getConstShapeAttr(); if (getSourceType().isa() || attr) return attr; - + auto memrefType = getSourceType().dyn_cast(); assert(memrefType && "Incorrect use of getStaticSizes"); return memrefType.getShape(); } /// wrapper for matching with OffsetSizeAndStrideOpInterface - /// If source is IntegerType or `const_strides` is filled, it + /// If source is IntegerType or `const_strides` is filled, it /// will return `const_strides`, such that mixes of `strides` - /// and `const_strides` will be used to represent the strides of + /// and `const_strides` will be used to represent the strides of /// source operand. They overide static strides from source memref type. ArrayRef getStaticStrides() { auto attr = getConstStridesAttr(); if (getSourceType().isa() || attr) return attr; - + auto memrefType = getSourceType().dyn_cast(); assert(memrefType && "Incorrect use of getStaticStrides"); auto [strides, offset] = getStridesAndOffset(memrefType); - // reuse the storage of ConstStridesAttr since strides from + // reuse the storage of ConstStridesAttr since strides from // memref is not persistant setConstStrides(strides); attr = getConstStridesAttr(); return attr; } - /// Return the expected rank of each of the`static_offsets`, + /// Return the expected rank of each of the`static_offsets`, /// `static_shape` and `static_strides` attributes. std::array getArrayAttrMaxRanks() { unsigned rank; @@ -204,8 +203,8 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface } return {rank, rank, rank}; } - - /// Return the number of leading operands before the `offsets`, + + /// Return the number of leading operands before the `offsets`, /// `shape` and `strides` operands. static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; } @@ -214,15 +213,15 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface } def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> { - let summary = "prefetches a nD block to cache"; + let summary = "prefetches a n-D block to cache"; let description = [{ - It issues an instruction to prefetch the data from memory to each - level of the cache based on their cache policy. + It issues an instruction to prefetch a block of data from continuous + memory regions to each level of the cache based on their cache policy. Example: ``` - xegpu.prefetch_nd %tdesc {l1_hint = #xegpu.cache_hint, - l2_hint = #xegpu.cache_hint, + xegpu.prefetch_nd %tdesc {l1_hint = #xegpu.cache_hint, + l2_hint = #xegpu.cache_hint, l3_hint = #xegpu.cache_hint} : !xegpu.tensor_desc<8x16xf16> ``` @@ -233,34 +232,41 @@ def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> { OptionalAttr: $l1_hint, OptionalAttr: $l2_hint, OptionalAttr: $l3_hint); - - let extraClassDeclaration = extraBaseClassDeclaration; + + let extraClassDeclaration = extraBaseClassDeclaration # [{ + xegpu::TensorDescType getTensorDescType() { + return getTensorDesc().getType(); + } + }]; let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc))"; + + let hasVerifier = 1; } -def XeGPU_LoadNdOp : XeGPU_Op<"load_nd"> { - let summary = "loads a n-D block from memory (represented by TensorDesc)" +def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [AllElementTypesMatch<["value", "TensorDesc"]>, + AllElementCountsMatch<["value", "TensorDesc"]>]> { + let summary = "loads a n-D block from memory (represented by TensorDesc)" "to registers (represented by vector)"; let description = [{ - LoadNdOp essentially mimics the hardware block read instruction to read - a block of data from memory to register. It takes a set of optional cache - hints for each level of cache, L1, L2 and L3. If hardware does not have a + LoadNdOp essentially mimics the hardware block read instruction to read + a block of data from memory to register. It takes a set of optional cache + hints for each level of cache, L1, L2 and L3. If hardware does not have a correspoding cache, Corresponding cache hint attribute will be masked. - vnni transform is an hardware feature for Intel GPU, which is used to - do data packing during the load for B operand of matrix operation, if - the bit width of the data type is less then 32 bits, e.g., fp16. And + vnni transform is an hardware feature for Intel GPU, which is used to + do data packing during the load for B operand of matrix operation, if + the bit width of the data type is less then 32 bits, e.g., fp16. And transpose is another Intel hardware feature, which will do transpose - operation when loading the data if the bit width of the data type is - fp32 or fp64. It implies that vnni and transpose cannot exit at the + operation when loading the data if the bit width of the data type is + fp32 or fp64. It implies that vnni and transpose cannot exit at the same time. Example: ``` xegpu.load_nd %1 {transpose = [1, 0], - l1_hint = #xegpu.cache_hint, - l2_hint = #xegpu.cache_hint, + l1_hint = #xegpu.cache_hint, + l2_hint = #xegpu.cache_hint, l3_hint = #xegpu.cache_hint} : !xegpu.tensor_desc<8x16xf32> -> vector<16x8xf32> ``` @@ -291,20 +297,21 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd"> { let hasVerifier = 1; } -def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", []> { +def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [AllShapesMatch<["value", "TensorDesc"]>, + AllElementTypesMatch<["value", "TensorDesc"]>]> { let summary = "stores a n-D block register region back to memory, currently only supports 2D"; let description = [{ StoreNdOp essentially mimics the hardware block write instruction io - write a block of data from register into the memory region as described - by the TensorDesc. It takes a set of optional cache hints for each level - of cache, L1, L2 and L3. If hardware does not have a correspoding cache, + write a block of data from register into the memory region as described + by the TensorDesc. It takes a set of optional cache hints for each level + of cache, L1, L2 and L3. If hardware does not have a correspoding cache, Corresponding cache hint attribute will be masked. Example: ``` xegpu.store_nd %3, %2 {l1_hint = #xegpu.cache_hint, - l2_hint = #xegpu.cache_hint, + l2_hint = #xegpu.cache_hint, l3_hint = #xegpu.cache_hint} : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16> ``` @@ -318,11 +325,342 @@ def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", []> { OptionalAttr: $l2_hint, OptionalAttr: $l3_hint); - let extraClassDeclaration = extraBaseClassDeclaration; + let extraClassDeclaration = extraBaseClassDeclaration # [{ + VectorType getValueType() { + return llvm::dyn_cast(getValue().getType()); + } - let assemblyFormat = [{$value `,` $TensorDesc prop-dict attr-dict + xegpu::TensorDescType getTensorDescType() { + return getTensorDesc().getType(); + } + }]; + + let assemblyFormat = [{$value `,` $TensorDesc prop-dict attr-dict `:` type($value) `,` qualified(type($TensorDesc))}]; let hasVerifier = 1; } +def XeGPU_UpdateNdOffsetOp : XeGPU_Op<"update_nd_offset", + [AllTypesMatch<["TensorDesc", "result"]>]> { + let summary = "It updates the offsets for the TensorDesc."; + let description = [{The op updates the offset of the given TensorDesc. + The offsets are relative offset to the current position in the number + of elements. It will result in a same type TensorDesc as the input. + + example: + ``` + %2 = xegpu.update_nd_offset %1, [0, 16]: !xegpu.tensor_desc<8x16xf32> + ``` + }]; + + let arguments = (ins + XeGPU_TensorDesc: $TensorDesc, + Variadic: $offsets, + DenseI64ArrayAttr: $const_offsets); + + let results = (outs XeGPU_TensorDesc: $result); + + let extraClassDeclaration = extraBaseClassDeclaration # [{ + xegpu::TensorDescType getTensorDescType() { + return getTensorDesc().getType(); + } + + SmallVector getMixedOffsets() { + Builder b(getContext()); + return getMixedValues(getConstOffsets(), getOffsets(), b); + } + + size_t getNumOffsets() { + return getMixedOffsets().size(); + } + + OpFoldResult getOffset(unsigned idx) { + assert(idx < getNumOffsets() && "Invalid out of bound access."); + return getMixedOffsets()[idx]; + } + }]; + + let assemblyFormat = [{ + $TensorDesc `,` + custom($offsets, $const_offsets) + attr-dict `:` qualified(type($result)) + }]; + + let hasVerifier = 1; +} + +def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> { + let summary = "create scattered tensor descriptors (TensorDesc)."; + let description = [{ + "create_tdesc" is similar to "create_nd_tdesc" in terms that it creates + a Tensor Descriptor (TensorDescType) for a memory region. While "create_nd_tdesc" + is for creating continuous subviews, "create_tdesc" is for creating non-continuous + (scattered) subviews, allowing each work-item in a subgroup specifying their own offset. + It accepts the following parameters: + + * source: a 1D memref or pointer (uint64_t) represents the flattened memory object. + * offsets: a array containing offsets of each access point. Its size + is fixed to the hardware supportted subgroup size, e.g., 16 on PVC, + implying each element in the array corresponds to a work-item (SIMT lane) + in the subgroup. + * chunk_size: [optional attribute] indicates number of continious + elements accessed for each offset, default is 1. + + Example 1. It assumes subgroup size is 4, and accesses a[0], a[16], a[32], a[64] + ``` + %a = memref.alloc() : memref<1024xf32> + %1 = xegpu.create_tdesc %a[0, 16, 32, 64]: memref<1024xf32> -> TensorDesc<4xf32> + ``` + + Example 2. It assumes subgroup size is 4, and each workitem access 8 elements. + It will access totally 32 data elements: a[0:7], a[16:23], a[32:39], a[64:71] + ``` + %0 = memref.alloc() : memref<1024xf32> + %1 = xegpu.create_tdesc %0[0, 16, 32, 64] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32> + ``` + + Example 3. It is similar to Example 2, but there is some overlaps among workitems. + It accesses: a[0:7], a[4:11], a[8:15], a[12:19] + ``` + %0 = memref.alloc() : memref<1024xf32> + %1 = xegpu.create_tdesc %0[0, 4, 8, 12] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32> + ``` + + + + + }]; + + let arguments = (ins XeGPU_BaseAddrType: $source, + Variadic: $offsets, + DenseI64ArrayAttr: $const_offsets, + DefaultValuedAttr: $chunk_size); + let results = (outs XeGPU_TensorDesc:$TensorDesc); + + let builders = [ + OpBuilder<(ins "xegpu::TensorDescType": $TensorDesc, "Value": $source, + "llvm::ArrayRef": $offsets, + CArg<"uint32_t", "1"> : $chunk_size)>, + ]; + + let assemblyFormat = [{ + $source + custom($offsets, $const_offsets) + attr-dict `:` type($source) `->` qualified(type($TensorDesc)) + }]; + + let extraClassDeclaration = extraBaseClassDeclaration # [{ + xegpu::TensorDescType getTensorDescType() { + return getTensorDesc().getType(); + } + + SmallVector getMixedOffsets() { + Builder b(getContext()); + return getMixedValues(getConstOffsets(), getOffsets(), b); + } + + size_t getNumOffsets() { + return getMixedOffsets().size(); + } + + mlir::Value getViewSource() { return getSource(); } + + OpFoldResult getOffset(unsigned idx) { + assert(idx < getNumOffsets() && "Invalid out of bound access."); + return getMixedOffsets()[idx]; + } + }]; + + let hasVerifier = 1; +} + +def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", []> { + let summary = "prefetches a set of scattered data points to cache"; + + let description = [{ + It issues instructions to prefetch a set of scattered data points + from memory to each level of the cache based on their cache policy. + As compared to prefetch_nd, which works on non-scattered TensorDesc, + it works on scattered TensorDesc instead. + + Example: + ``` + xegpu.prefetch %tdesc {l1_hint = #xegpu.cache_hint, + l2_hint = #xegpu.cache_hint, + l3_hint = #xegpu.cache_hint} + : !xegpu.tensor_desc<16xf16> + ``` + + }]; + + let arguments = (ins XeGPU_TensorDesc: $TensorDesc, + OptionalAttr: $l1_hint, + OptionalAttr: $l2_hint, + OptionalAttr: $l3_hint); + + let extraClassDeclaration = extraBaseClassDeclaration # [{ + xegpu::TensorDescType getTensorDescType() { + return getTensorDesc().getType(); + } + }]; + + let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc))"; + + let hasVerifier = 1; +} + +def XeGPU_LoadGatherOp : XeGPU_Op<"load", [AllRanksMatch<["value", "TensorDesc"]>, + AllElementTypesMatch<["value", "TensorDesc"]>, + AllElementCountsMatch<["value", "TensorDesc"]>]> { + let summary = "load a set of scattered data points from memory."; + + let description = [{ It (aka. load) load data per each work-item. The output + describes the data being loaded at the subgroup level, so its size is + consistent with the number of work-items in a subgroup. When `chunk_size_per_lane` + attribute is larger than 1 in TensorDesc, the output vector will be 2D vector, + with dim-1 correspoding to the chunk size. + + The mask operand masks out memory access so that it is safe to pass out-of-boundary + addresses/offsets as long as they are masked. It applies to slots of SIMD lanes. + + Example: + ``` + %2 = xegpu.load %1, %0 {transpose = [1, 0], + l1_hint = #xegpu.cache_hint, + l2_hint = #xegpu.cache_hint, + l3_hint = #xegpu.cache_hint} + : !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr>, vector<16xi1> + -> vector<16xf32> + ``` + + }]; + + let arguments = (ins XeGPU_TensorDesc: $TensorDesc, + XeGPU_MaskType: $mask, + OptionalAttr: $transpose, + OptionalAttr: $l1_hint, + OptionalAttr: $l2_hint, + OptionalAttr: $l3_hint); + let results = (outs XeGPU_ValueType: $value); + + let extraClassDeclaration = extraBaseClassDeclaration # [{ + xegpu::TensorDescType getTensorDescType() { + return getTensorDesc().getType(); + } + + mlir::Type getElementType() { + auto type = getValue().getType(); + return getElementTypeOrSelf(type); + } + + Type getValueType() { + return getValue().getType(); + } + + Type getMaskType() { + return getMask().getType(); + } + + }]; + + let assemblyFormat = [{$TensorDesc `,` $mask prop-dict attr-dict + `:` qualified(type($TensorDesc)) `,` type($mask) `->` type($value)}]; + + let hasVerifier = 1; +} + +def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllShapesMatch<["value", "TensorDesc"]>, + AllElementTypesMatch<["value", "TensorDesc"]>]> { + let summary = "store data to scattered memory locations."; + let description = [{ It (aka. store) stores data to scattered memory locations. + It has similar semantic to `load_gather`. + + Example: + ``` + %3 = xegpu.store %0, %1, %2 {l1_hint = #xegpu.cache_hint, + l2_hint = #xegpu.cache_hint, + l3_hint = #xegpu.cache_hint} + : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr>, vector<16xi1> + ``` + }]; + + let arguments = (ins + XeGPU_ValueType: $value, + XeGPU_TensorDesc: $TensorDesc, + XeGPU_MaskType: $mask, + OptionalAttr: $l1_hint, + OptionalAttr: $l2_hint, + OptionalAttr: $l3_hint); + + let extraClassDeclaration = extraBaseClassDeclaration # [{ + xegpu::TensorDescType getTensorDescType() { + return getTensorDesc().getType(); + } + + Type getValueType() { + return getValue().getType(); + } + + Type getMaskType() { + return getMask().getType(); + } + }]; + + let assemblyFormat = [{$value `,` $TensorDesc `,` $mask prop-dict attr-dict + `:` type($value) `,` qualified(type($TensorDesc)) `,` type($mask)}]; + + let hasVerifier = 1; +} + +def XeGPU_UpdateOffsetOp: XeGPU_Op<"update_offset", + [AllTypesMatch<["TensorDesc", "result"]>]> { + let summary = "It updates the offsets for the given tensor descriptor"; + + let description = [{It behaves similar to `update_nd_offset` in terms that + it updates offset of a TensorDesc, and the offsets are relative offset to + the current position in the number of elements. However, `update_nd_offset` + is to update the start point of a 2D block, so its offset constains two + elements representing the shift in each dimension. `update_offset` is to + update the offset per work-item, so its offsets contains values representing + shifts for each work-item. + + Example: + ``` + %2 = xegpu.update_offset %1, [32, 32, 32, 32] + : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + ``` + }]; + + let arguments = (ins XeGPU_TensorDesc: $TensorDesc, + Variadic: $offsets, + DenseI64ArrayAttr: $const_offsets); + let results = (outs XeGPU_TensorDesc: $result); + + let extraClassDeclaration = extraBaseClassDeclaration # [{ + xegpu::TensorDescType getTensorDescType() { + return getTensorDesc().getType(); + } + + SmallVector getMixedOffsets() { + Builder b(getContext()); + return getMixedValues(getConstOffsets(), getOffsets(), b); + } + + size_t getNumOffsets() { + return getMixedOffsets().size(); + } + + OpFoldResult getOffset(unsigned idx) { + assert(idx < getNumOffsets() && "Invalid out of bound access."); + return getMixedOffsets()[idx]; + } + }]; + + let assemblyFormat = [{ + $TensorDesc `,` + custom($offsets, $const_offsets) + attr-dict `:` qualified(type($TensorDesc)) + }]; +} + #endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td index 19ac1693712dd8..4cd4e5411653c1 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -34,10 +34,10 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", [ShapedTypeInterface], "::mlir::TensorType"> { let summary = "TensorDesc describing regions of interested data."; let description = [{ - TensorDesc is a type designed to describe regions of the interested data as well as some - features that are unique to Intel hardware. Different with the builtin tensor type in MLIR, - it essentially only contains the meta data, and doesn't hold the data by itself. It is designed - to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU. + TensorDesc is a type designed to describe regions of the interested data as well as some + features that are unique to Intel hardware. Different with the builtin tensor type in MLIR, + it essentially only contains the meta data, and doesn't hold the data by itself. It is designed + to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU. It encodes the following information: * shape: the sizes/shape of the intereted data block, e.g., 8x16 means 8 rows @@ -46,15 +46,15 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", is set or not. * element_type: the data type of the data element, e.g., f16, f32. - Similar to the builtin tensor, it also provides an optinal attribute to encoding + Similar to the builtin tensor, it also provides an optinal attribute to encoding the following information via the TensorDescAttr object: - * memory_scope (xegpu::MemoryScope): [optional] where the data is located, + * memory_scope (xegpu::MemoryScope): [optional] where the data is located, global memory or shared memory. It is default to Global. * array_length (int): [optional] The number of contiguous blocks with size as `shape`, that will be loaded by block load at a time. It is default to 1. - * boundary_check (bool): [optional] indicates whether the operation detects the boundary + * boundary_check (bool): [optional] indicates whether the operation detects the boundary and pads with zero for out-of-boundary access. It is default to do boundary check. - + Syntax: @@ -63,7 +63,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", element-type ::= float-type | integer-type | index-type dim-list := (static-dim-list `x`)? static-dim-list ::= decimal-literal `x` decimal-literal - attr-list = (, memory_scope = value)? (, arr_len = value)? (, boundary_check = value)? + attr-list = (, memory_scope = value)? (, arr_len = value)? (, boundary_check = value)? (, scattered = value)? ``` Examples: @@ -84,6 +84,17 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", "mlir::Type": $elementType, OptionalParameter<"mlir::Attribute">: $encoding); + let builders = [ + TypeBuilderWithInferredContext<(ins + "llvm::ArrayRef": $shape, + "mlir::Type": $elementType, + CArg<"bool", "false">: $scattered, + CArg<"int", "1">: $array_length, + CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope, + CArg<"bool", "true">: $boundary_check + )> + ]; + let extraClassDeclaration = [{ using TensorType::clone; using mlir::ShapedType::Trait::getElementTypeBitWidth; @@ -116,7 +127,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", if (attr && attr.getArrayLength()) return attr.getArrayLength().getInt(); // return default value - return 1; + return 1; } bool getBoundaryCheck() { @@ -126,10 +137,18 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", // return default value return true; } + + bool getScattered() { + auto attr = getEncodingAsTensorDescAttr(); + if (attr && attr.getScattered()) + return attr.getScattered().getValue(); + // return default value + return false; + } }]; let hasCustomAssemblyFormat = true; - + } #endif // MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp index 03e578136e5901..4a15976d40c763 100644 --- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp @@ -1289,13 +1289,14 @@ struct AngleOpConversion : public OpConversionPattern { ConversionPatternRewriter &rewriter) const override { auto loc = op.getLoc(); auto type = op.getType(); + arith::FastMathFlagsAttr fmf = op.getFastMathFlagsAttr(); Value real = rewriter.create(loc, type, adaptor.getComplex()); Value imag = rewriter.create(loc, type, adaptor.getComplex()); - rewriter.replaceOpWithNewOp(op, imag, real); + rewriter.replaceOpWithNewOp(op, imag, real, fmf); return success(); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 25785653a71675..df61381432921b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1412,10 +1412,11 @@ static SmallVector getTiledPackShape(tensor::PackOp packOp, /// Create a TransferReadOp from `source` with static shape `readShape`. If the /// vector type for the read is not the same as the type of `source`, then a -/// mask is created on the read. +/// mask is created on the read. If `doMasking` parameter is set to false we +/// update the `inBounds` attribute instead of masking. static Value createReadOrMaskedRead(OpBuilder &builder, Location loc, Value source, ArrayRef readShape, - Value padValue) { + Value padValue, bool doMasking = true) { assert(llvm::none_of(readShape, [](int64_t s) { return s == ShapedType::kDynamic; })); auto sourceShape = dyn_cast(source.getType()).getShape(); @@ -1424,14 +1425,21 @@ static Value createReadOrMaskedRead(OpBuilder &builder, Location loc, auto vectorType = VectorType::get(readShape, padValue.getType()); int64_t readRank = readShape.size(); auto zero = builder.create(loc, 0); + SmallVector inBoundsVal(readRank, true); + if (!doMasking) { + // Update the inBounds attribute. + for (unsigned i = 0; i < readRank; i++) + inBoundsVal[i] = sourceShape[i] == readShape[i]; + } auto transferReadOp = builder.create( loc, /*vectorType=*/vectorType, /*source=*/source, /*indices=*/SmallVector(readRank, zero), /*padding=*/padValue, - /*inBounds=*/SmallVector(readRank, true)); - if (llvm::equal(readShape, sourceShape)) { + /*inBounds=*/inBoundsVal); + + if (llvm::equal(readShape, sourceShape) || !doMasking) { return transferReadOp; } SmallVector mixedSourceDims = @@ -1482,11 +1490,10 @@ static Operation *createWriteOrMaskedWrite(OpBuilder &builder, Location loc, return write; } -/// Vectorize tensor::PackOp with (1) static innerTiles and (2) constant -/// padding value into: +/// Vectorize tensor::PackOp with (1) static innerTiles (2) constant +/// padding value and (3) input vector sizes into: /// masked_transfer_read->shape_cast->transpose->transfer_write_in_bounds /// As in the following example: -/// /// %pack = tensor.pack %src inner_dims_pos = [2, 1] inner_tiles = [16, 2] /// into %dst : tensor<32x8x16xf32> -> tensor<32x4x1x16x2xf32> /// @@ -1505,6 +1512,10 @@ static Operation *createWriteOrMaskedWrite(OpBuilder &builder, Location loc, /// %empty[%c0_0, %c0_0, %c0_0, %c0_0, %c0_0] /// {in_bounds = [true, true, true, true, true]} /// : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> +/// +/// If the (3) input vector sizes are not provided, the vector sizes are +/// determined by the result tensor shape. Also, we update the inBounds +/// attribute instead of masking. static LogicalResult vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp, ArrayRef inputVectorSizes, @@ -1525,6 +1536,16 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp, (void)status; // prevent unused variable warning on non-assert builds. assert(succeeded(status) && "failed to reify result shapes"); + // If the input vector sizes are not provided, then the vector sizes are + // determined by the result tensor shape. In case the vector sizes aren't + // provided, we update the inBounds attribute instead of masking. + bool doMasking = true; + if (inputVectorSizes.empty()) { + ArrayRef resultTensorShape = packOp.getDestType().getShape(); + inputVectorSizes = resultTensorShape.take_front(packOp.getSourceRank()); + doMasking = false; + } + // Create masked TransferReadOp. SmallVector inputShape(inputVectorSizes); auto innerTiles = packOp.getStaticInnerTiles(); @@ -1536,7 +1557,7 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp, for (auto [idx, size] : enumerate(innerTiles)) inputShape[innerDimsPos[idx]] *= size; auto maskedRead = createReadOrMaskedRead(rewriter, loc, packOp.getSource(), - inputShape, padValue); + inputShape, padValue, doMasking); // Create ShapeCastOp. SmallVector destShape(inputVectorSizes); @@ -1763,7 +1784,7 @@ vectorizeDynamicLinalgOpPrecondition(linalg::LinalgOp op, /// Returns success if `inputVectorSizes` is a valid masking configuraion for /// given `shape`, i.e., it meets: /// 1. The numbers of elements in both array are equal. -/// 2. `inputVectorSizes` does nos have dynamic dimensions. +/// 2. `inputVectorSizes` does not have dynamic dimensions. /// 3. All the values in `inputVectorSizes` are greater than or equal to /// static sizes in `shape`. static LogicalResult @@ -1881,18 +1902,25 @@ static LogicalResult vectorizeLinalgOpPrecondition( return success(); } -/// TODO: Use a matcher to check for a constant padding value. static LogicalResult vectorizePackOpPrecondition(tensor::PackOp packOp, ArrayRef inputVectorSizes) { auto padValue = packOp.getPaddingValue(); - if (padValue && !padValue.getDefiningOp()) { + Attribute cstAttr; + if (padValue && !matchPattern(padValue, m_Constant(&cstAttr))) { LDBG("pad value is not constant: " << packOp << "\n"); return failure(); } - ArrayRef resultTensorShape = packOp.getDestType().getShape(); - if (failed(isValidMaskedInputVector( + bool satisfyEmptyCond = true; + if (inputVectorSizes.empty()) { + if (!packOp.getDestType().hasStaticShape() || + !packOp.getSourceType().hasStaticShape()) + satisfyEmptyCond = false; + } + + if (!satisfyEmptyCond && + failed(isValidMaskedInputVector( resultTensorShape.take_front(packOp.getSourceRank()), inputVectorSizes))) return failure(); diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index e9058394d33da5..516b0943bdcfac 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -30,6 +30,14 @@ #include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.cpp.inc" #include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrEnums.cpp.inc" +// Forward declarations, following custom print/parsing methods are referenced +// by the generated code for SparseTensorTypes.td. +static mlir::ParseResult parseLevelRange(mlir::AsmParser &, + mlir::sparse_tensor::Level &, + mlir::sparse_tensor::Level &); +static void printLevelRange(mlir::AsmPrinter &, mlir::sparse_tensor::Level, + mlir::sparse_tensor::Level); + #define GET_TYPEDEF_CLASSES #include "mlir/Dialect/SparseTensor/IR/SparseTensorTypes.cpp.inc" @@ -1953,6 +1961,108 @@ LogicalResult SortOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// Sparse Tensor Iteration Operations. +//===----------------------------------------------------------------------===// + +IterSpaceType IteratorType::getIterSpaceType() const { + return IterSpaceType::get(getContext(), getEncoding(), getLoLvl(), + getHiLvl()); +} + +IteratorType IterSpaceType::getIteratorType() const { + return IteratorType::get(getContext(), getEncoding(), getLoLvl(), getHiLvl()); +} + +/// Parses a level range in the form "$lo `to` $hi" +/// or simply "$lo" if $hi - $lo = 1 +static ParseResult parseLevelRange(AsmParser &parser, Level &lvlLo, + Level &lvlHi) { + if (parser.parseInteger(lvlLo)) + return failure(); + + if (succeeded(parser.parseOptionalKeyword("to"))) { + if (parser.parseInteger(lvlHi)) + return failure(); + } else { + lvlHi = lvlLo + 1; + } + + if (lvlHi <= lvlLo) + parser.emitError(parser.getNameLoc(), + "expect larger level upper bound than lower bound"); + + return success(); +} + +/// Parses a level range in the form "$lo `to` $hi" +/// or simply "$lo" if $hi - $lo = 1 +static ParseResult parseLevelRange(OpAsmParser &parser, IntegerAttr &lvlLoAttr, + IntegerAttr &lvlHiAttr) { + Level lvlLo, lvlHi; + if (parseLevelRange(parser, lvlLo, lvlHi)) + return failure(); + + lvlLoAttr = IntegerAttr::get(parser.getBuilder().getIndexType(), lvlLo); + lvlHiAttr = IntegerAttr::get(parser.getBuilder().getIndexType(), lvlHi); + return success(); +} + +/// Prints a level range in the form "$lo `to` $hi" +/// or simply "$lo" if $hi - $lo = 1 +static void printLevelRange(AsmPrinter &p, Level lo, Level hi) { + + if (lo + 1 == hi) + p << lo; + else + p << lo << " to " << hi; +} + +/// Prints a level range in the form "$lo `to` $hi" +/// or simply "$lo" if $hi - $lo = 1 +static void printLevelRange(OpAsmPrinter &p, Operation *, IntegerAttr lvlLo, + IntegerAttr lvlHi) { + unsigned lo = lvlLo.getValue().getZExtValue(); + unsigned hi = lvlHi.getValue().getZExtValue(); + printLevelRange(p, lo, hi); +} + +LogicalResult ExtractIterSpaceOp::inferReturnTypes( + MLIRContext *ctx, std::optional loc, ValueRange ops, + DictionaryAttr attr, OpaqueProperties prop, RegionRange region, + SmallVectorImpl &ret) { + + ExtractIterSpaceOp::Adaptor adaptor(ops, attr, prop, region); + SparseTensorType stt = getSparseTensorType(adaptor.getTensor()); + ret.push_back(IterSpaceType::get(ctx, stt.getEncoding(), adaptor.getLoLvl(), + adaptor.getHiLvl())); + return success(); +} + +LogicalResult ExtractIterSpaceOp::verify() { + if (getLoLvl() >= getHiLvl()) + return emitOpError("expected smaller level low than level high"); + + TypedValue pIter = getParentIter(); + if ((pIter && getLoLvl() == 0) || (!pIter && getLoLvl() != 0)) { + return emitOpError( + "parent iterator should be specified iff level lower bound equals 0"); + } + + if (pIter) { + IterSpaceType spaceTp = getResultSpace().getType(); + if (pIter.getType().getEncoding() != spaceTp.getEncoding()) + return emitOpError( + "mismatch in parent iterator encoding and iteration space encoding."); + + if (spaceTp.getLoLvl() != pIter.getType().getHiLvl()) + return emitOpError("parent iterator should be used to extract an " + "iteration space from a consecutive level."); + } + + return success(); +} + /// Materialize a single constant operation from a given attribute value with /// the desired resultant type. Operation *SparseTensorDialect::materializeConstant(OpBuilder &builder, diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 0b3f4b9c9dbeae..24719fe748fe4f 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -32,6 +32,17 @@ void XeGPUDialect::initialize() { //===----------------------------------------------------------------------===// // XeGPU_TensorDescAttr //===----------------------------------------------------------------------===// +TensorDescAttr TensorDescAttr::get(mlir::MLIRContext *context, + xegpu::MemoryScope memory_scope, + int array_length, bool boundary_check, + bool scattered) { + auto scopeAttr = MemoryScopeAttr::get(context, memory_scope); + auto lengthAttr = + IntegerAttr::get(IntegerType::get(context, 64), array_length); + auto boundaryAttr = BoolAttr::get(context, boundary_check); + auto scatteredAttr = BoolAttr::get(context, scattered); + return Base::get(context, scopeAttr, lengthAttr, boundaryAttr, scatteredAttr); +} //===----------------------------------------------------------------------===// // XeGPU_TensorDescType @@ -96,6 +107,16 @@ void TensorDescType::print(::mlir::AsmPrinter &printer) const { printer << ">"; } +TensorDescType TensorDescType::get(llvm::ArrayRef shape, + mlir::Type elementType, bool scattered, + int array_length, MemoryScope memory_scope, + bool boundary_check) { + auto context = elementType.getContext(); + auto attr = TensorDescAttr::get(context, memory_scope, array_length, + boundary_check, scattered); + return Base::get(context, shape, elementType, attr); +} + } // namespace xegpu } // namespace mlir diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index 02106f221f3233..530c50ef74f7a0 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -9,6 +9,9 @@ #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/Dialect/XeGPU/IR/XeGPU.h" #include "mlir/IR/Builders.h" +#include "mlir/IR/TypeUtilities.h" + +#include "llvm/Support/Debug.h" #define DEBUG_TYPE "xegpu" @@ -16,8 +19,8 @@ namespace mlir { namespace xegpu { static void transpose(llvm::ArrayRef trans, - std::vector &shape) { - std::vector old = shape; + SmallVector &shape) { + SmallVector old = shape; for (size_t i = 0; i < trans.size(); i++) shape[i] = old[trans[i]]; } @@ -38,6 +41,38 @@ static std::string makeString(T array, bool breakline = false) { return buf; } +static SmallVector getShapeOf(Type type) { + SmallVector shape; + if (auto ty = llvm::dyn_cast(type)) + shape = SmallVector(ty.getShape()); + else + shape.push_back(1); + return shape; +} + +static int64_t getRankOf(Value val) { + auto type = val.getType(); + if (auto ty = llvm::dyn_cast(type)) + return ty.getRank(); + return 0; +} + +static bool isReadHintOrNone(const CachePolicyAttr &attr) { + if (!attr) + return true; + auto kind = attr.getValue(); + return kind == CachePolicy::CACHED || kind == CachePolicy::UNCACHED || + kind == CachePolicy::STREAMING || kind == CachePolicy::READ_INVALIDATE; +} + +static bool isWriteHintOrNone(const CachePolicyAttr &attr) { + if (!attr) + return true; + auto kind = attr.getValue(); + return kind == CachePolicy::CACHED || kind == CachePolicy::UNCACHED || + kind == CachePolicy::WRITE_BACK || kind == CachePolicy::WRITE_THROUGH; +} + //===----------------------------------------------------------------------===// // XeGPU_CreateNdDescOp //===----------------------------------------------------------------------===// @@ -114,6 +149,29 @@ LogicalResult CreateNdDescOp::verify() { return emitOpError("TensorDesc should have the same element " "type with the source if it is a memref.\n"); + if (getType().getScattered()) + return emitOpError("Expects a non-scattered TensorDesc.\n"); + + return success(); +} + +//===----------------------------------------------------------------------===// +// XeGPU_PrefetchNdOp +//===----------------------------------------------------------------------===// +LogicalResult PrefetchNdOp::verify() { + auto tdescTy = getTensorDescType(); + if (tdescTy.getScattered()) + return emitOpError("Expects a non-scattered TensorDesc.\n"); + + if (!isReadHintOrNone(getL1HintAttr())) + return emitOpError("invlid l1_hint: ") << getL1HintAttr(); + + if (!isReadHintOrNone(getL2HintAttr())) + return emitOpError("invlid l2_hint: ") << getL2HintAttr(); + + if (!isReadHintOrNone(getL3HintAttr())) + return emitOpError("invlid l3_hint: ") << getL3HintAttr(); + return success(); } @@ -125,22 +183,26 @@ LogicalResult LoadNdOp::verify() { auto valueTy = getType(); if (tdescTy.getRank() != 2) - return emitOpError( - "The TensorDesc for LoadNdOp should be a 2D TensorDesc."); + return emitOpError("Expecting a 2D TensorDesc.\n"); + + if (tdescTy.getScattered()) + return emitOpError("Expects a non-scattered TensorDesc.\n"); if (!valueTy) return emitOpError("Invalid result, it should be a VectorType.\n"); - auto tdescElemTy = tdescTy.getElementType(); - auto valueElemTy = valueTy.getElementType(); + if (!isReadHintOrNone(getL1HintAttr())) + return emitOpError("invlid l1_hint: ") << getL1HintAttr(); - if (tdescElemTy != valueElemTy) - return emitOpError( - "Value should have the same element type as TensorDesc."); + if (!isReadHintOrNone(getL2HintAttr())) + return emitOpError("invlid l2_hint: ") << getL2HintAttr(); + + if (!isReadHintOrNone(getL3HintAttr())) + return emitOpError("invlid l3_hint: ") << getL3HintAttr(); auto array_len = tdescTy.getArrayLength(); - auto tdescShape = tdescTy.getShape().vec(); - auto valueShape = valueTy.getShape().vec(); + auto tdescShape = getShapeOf(tdescTy); + auto valueShape = getShapeOf(valueTy); if (getTranspose()) { auto trans = getTranspose().value(); @@ -174,26 +236,174 @@ LogicalResult LoadNdOp::verify() { // XeGPU_StoreNdOp //===----------------------------------------------------------------------===// LogicalResult StoreNdOp::verify() { - auto dstTy = getTensorDesc().getType(); // Tile - auto valTy = getValue().getType().cast(); // Vector + auto dstTy = getTensorDescType(); // Tile + auto valTy = getValueType(); // Vector if (dstTy.getRank() != 2) - return emitOpError("Expecting a 2D TensorDesc shape.\n"); + return emitOpError("Expecting a 2D TensorDesc.\n"); + + if (dstTy.getScattered()) + return emitOpError("Expects a non-scattered TensorDesc.\n"); if (!valTy) return emitOpError("Exepcting a VectorType result.\n"); - auto dstElemTy = dstTy.getElementType(); - auto valElemTy = valTy.getElementType(); + if (!isWriteHintOrNone(getL1HintAttr())) + return emitOpError("invlid l1_hint: ") << getL1HintAttr(); + + if (!isWriteHintOrNone(getL2HintAttr())) + return emitOpError("invlid l2_hint: ") << getL2HintAttr(); + + if (!isWriteHintOrNone(getL3HintAttr())) + return emitOpError("invlid l3_hint: ") << getL3HintAttr(); + + return success(); +} - if (dstElemTy != valElemTy) { - return emitOpError() << "The element type of the value should " - "match the elementtype of the TensorDesc.\n"; +//===----------------------------------------------------------------------===// +// XeGPU_UpdateNDOffsetOp +//===----------------------------------------------------------------------===// +LogicalResult UpdateNdOffsetOp::verify() { + auto ty = getTensorDescType(); + if (ty.getScattered()) + return emitOpError("Expects a non-scattered TensorDesc.\n"); + + // number of offsets specified must match the rank of the tensor descriptor + if (ty.getRank() != (int64_t)getNumOffsets()) { + return emitOpError("Invalid number of offsets."); } + return success(); +} + +//===----------------------------------------------------------------------===// +// XeGPU_CreateDescOp +//===----------------------------------------------------------------------===// +void CreateDescOp::build(OpBuilder &builder, OperationState &state, + TensorDescType TensorDesc, Value source, + llvm::ArrayRef offsets, + uint32_t chunk_size) { + llvm::SmallVector staticOffsets; + llvm::SmallVector dynamicOffsets; + dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); + build(builder, state, TensorDesc, source, dynamicOffsets, staticOffsets, + chunk_size); +} + +LogicalResult CreateDescOp::verify() { + auto tdescTy = getTensorDescType(); + auto chunkSize = getChunkSize(); + + if (getRankOf(getSource()) > 1) + return emitOpError( + "Expecting the source is a 1D memref or pointer (uint64_t)."); + + if (!tdescTy.getScattered()) + return emitOpError("Expects a scattered TensorDesc.\n"); + + SmallVector shape({(int64_t)getNumOffsets()}); + if (chunkSize != 1) + shape.push_back(chunkSize); + + auto tdescShape = getShapeOf(tdescTy); + if (shape != tdescShape) + return emitOpError("Incorrect TensorDesc shape. ") + << "Expected is " << makeString(shape) << "\n"; + + return success(); +} + +//===----------------------------------------------------------------------===// +// XeGPU_PrefetchOp +//===----------------------------------------------------------------------===// +LogicalResult PrefetchOp::verify() { + auto tdescTy = getTensorDescType(); + if (!tdescTy.getScattered()) + return emitOpError("Expects a scattered TensorDesc.\n"); + + if (!isReadHintOrNone(getL1HintAttr())) + return emitOpError("invlid l1_hint: ") << getL1HintAttr(); + + if (!isReadHintOrNone(getL2HintAttr())) + return emitOpError("invlid l2_hint: ") << getL2HintAttr(); + + if (!isReadHintOrNone(getL3HintAttr())) + return emitOpError("invlid l3_hint: ") << getL3HintAttr(); + + return success(); +} + +//===----------------------------------------------------------------------===// +// XeGPU_LoadGatherOp +//===----------------------------------------------------------------------===// +LogicalResult LoadGatherOp::verify() { + auto tdescTy = getTensorDescType(); + auto maskTy = getMaskType(); + auto valueTy = getValueType(); + + if (!tdescTy.getScattered()) + return emitOpError("Expects a scattered TensorDesc.\n"); + + if (!isReadHintOrNone(getL1HintAttr())) + return emitOpError("invlid l1_hint: ") << getL1HintAttr(); + + if (!isReadHintOrNone(getL2HintAttr())) + return emitOpError("invlid l2_hint: ") << getL2HintAttr(); + + if (!isReadHintOrNone(getL3HintAttr())) + return emitOpError("invlid l3_hint: ") << getL3HintAttr(); + + auto tdescElemTy = tdescTy.getElementType(); + auto valueElemTy = getElementType(); + if (tdescElemTy != valueElemTy) + return emitOpError( + "Value should have the same element type as TensorDesc."); + + auto maskShape = getShapeOf(maskTy); + auto valueShape = getShapeOf(valueTy); + auto tdescShape = getShapeOf(tdescTy); + + if (tdescShape[0] != maskShape[0]) + return emitOpError("dim-0 of the Mask and TensorDesc should be the same."); + + if (getTransposeAttr()) { + auto trans = getTranspose().value(); + if (tdescShape.size() < trans.size()) + emitWarning("Invalid transpose attr. It is ignored."); + else + transpose(trans, tdescShape); + } + + if (valueShape != tdescShape) + return emitOpError("Unexpected result shape") + << "(Expected shape: " << makeString(tdescShape) + << ", Given shape: " << makeString(valueShape) << ").\n"; + + return success(); +} + +//===----------------------------------------------------------------------===// +// XeGPU_StoreScatterOp +//===----------------------------------------------------------------------===// +LogicalResult StoreScatterOp::verify() { + auto tdescTy = getTensorDescType(); + if (!tdescTy.getScattered()) + return emitOpError("Expects a scattered TensorDesc.\n"); + + if (!isWriteHintOrNone(getL1HintAttr())) + return emitOpError("invlid l1_hint: ") << getL1HintAttr(); + + if (!isWriteHintOrNone(getL2HintAttr())) + return emitOpError("invlid l2_hint: ") << getL2HintAttr(); + + if (!isWriteHintOrNone(getL3HintAttr())) + return emitOpError("invlid l3_hint: ") << getL3HintAttr(); + + auto maskTy = getMaskType(); + auto maskShape = getShapeOf(maskTy); + auto tdescShape = getShapeOf(tdescTy); + if (tdescShape[0] != maskShape[0]) + return emitOpError("dim-0 of the Mask and TensorDesc should be the same."); - if (dstTy.getShape() != valTy.getShape()) - return emitOpError() - << "The result shape should match the TensorDesc shape.\n"; return success(); } diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir index fa1d564d6ad355..827ae940165c7e 100644 --- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir +++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir @@ -2187,3 +2187,16 @@ func.func @complex_tanh_nnan_ninf(%arg: complex) -> complex { // CHECK-COUNT-1: arith.select // CHECK-NOT: arith.select + +// ----- + +// CHECK-LABEL: func.func @complex_angle_with_fmf +// CHECK-SAME: %[[ARG:.*]]: complex +func.func @complex_angle_with_fmf(%arg: complex) -> f32 { + %angle = complex.angle %arg fastmath : complex + return %angle : f32 +} +// CHECK: %[[REAL:.*]] = complex.re %[[ARG]] : complex +// CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex +// CHECK: %[[RESULT:.*]] = math.atan2 %[[IMAG]], %[[REAL]] fastmath : f32 +// CHECK: return %[[RESULT]] : f32 \ No newline at end of file diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir index 9127eac5da9510..5d3c07c8e23c1e 100644 --- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir @@ -109,3 +109,20 @@ module attributes {transform.with_named_sequence} { transform.yield } } + + // ----- + +func.func @test_pack_no_vectorize_dynamic_shape(%arg0: tensor, %arg1: tensor<4x16xf32>) -> tensor<4x16xf32> { + %pad = arith.constant 0.000000e+00 : f32 + // expected-error @+1 {{Attempted to vectorize, but failed}} + %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [0] inner_tiles = [16] into %arg1 : tensor -> tensor<4x16xf32> + return %pack : tensor<4x16xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 : !transform.any_op + transform.yield + } +} diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index fd7d3b4767eb22..80a5a4c6702ac1 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -930,3 +930,58 @@ func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest: transform.yield } } + + // ----- + +// CHECK-LABEL: test_vectorize_pack_no_vector_sizes +func.func @test_vectorize_pack_no_vector_sizes(%arg0: tensor<64x4xf32>, %arg1: tensor<2x4x16x2xf32>) -> tensor<2x4x16x2xf32> { + %pack = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<64x4xf32> -> tensor<2x4x16x2xf32> + return %pack : tensor<2x4x16x2xf32> +} +// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index +// CHECK: %[[read:.*]] = vector.transfer_read %{{.*}}[%[[c0]], %[[c0]]], %[[cst]] +// CHECK-SAME: {in_bounds = [true, true]} : tensor<64x4xf32>, vector<64x4xf32> +// CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[read]] : vector<64x4xf32> to vector<4x16x2x2xf32> +// CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [2, 0, 1, 3] : vector<4x16x2x2xf32> to vector<2x4x16x2xf32> +// CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<2x4x16x2xf32> +// CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] +// CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<2x4x16x2xf32>, tensor<2x4x16x2xf32> +// CHECK: return %[[write]] : tensor<2x4x16x2xf32> + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 : !transform.any_op + transform.yield + } +} + + // ----- + +// CHECK-LABEL: test_vectorize_padded_pack_no_vector_sizes +func.func @test_vectorize_padded_pack_no_vector_sizes(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> { + %pad = arith.constant 0.000000e+00 : f32 + %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32> + return %pack : tensor<32x4x1x16x2xf32> +} +// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index +// CHECK: %[[transfer_read:.*]] = vector.transfer_read %{{.*}}[%[[c0]], %[[c0]], %[[c0]]], %[[cst]] +// CHECK-SAME: {in_bounds = [true, false, false]} : tensor<32x7x15xf32>, vector<32x8x16xf32> +// CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[transfer_read]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> +// CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32> +// CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<32x4x1x16x2xf32> +// CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] +// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> +// CHECK: return %[[write]] : tensor<32x4x1x16x2xf32> + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 : !transform.any_op + transform.yield + } +} diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir index 7f5c05190fc9a2..3fa696e1600a93 100644 --- a/mlir/test/Dialect/SparseTensor/invalid.mlir +++ b/mlir/test/Dialect/SparseTensor/invalid.mlir @@ -1012,3 +1012,85 @@ func.func @sparse_print(%arg0: tensor<10x10xf64>) { sparse_tensor.print %arg0 : tensor<10x10xf64> return } + +// ----- + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + +func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 2>) { + // expected-error@+1 {{'sparse_tensor.extract_iteration_space' expect larger level upper bound than lower bound}} + %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 2 to 0 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 2> + return +} + +// ----- + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + +func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>) { + // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be specified iff level lower bound equals 0}} + %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 0 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0> + return +} + +// ----- + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + +func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>) { + // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be specified iff level lower bound equals 0}} + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 1 : tensor<4x8xf32, #COO> + return +} + +// ----- + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + +#CSR = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : dense, + j : compressed + ) +}> + +func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#CSR, lvls = 0>) { + // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op mismatch in parent iterator encoding and iteration space encoding.}} + %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#CSR, lvls = 0> + return +} + +// ----- + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + +func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>) { + // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be used to extract an iteration space from a consecutive level.}} + %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 2 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0> + return +} diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir index 12f69c1d37b9cd..d34071279e5129 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir @@ -738,3 +738,28 @@ func.func @sparse_has_runtime() -> i1 { %has_runtime = sparse_tensor.has_runtime_library return %has_runtime : i1 } + +// ----- + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + +// CHECK-LABEL: func.func @sparse_extract_iter_space( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<4x8xf32, #sparse{{[0-9]*}}>, +// CHECK-SAME: %[[VAL_1:.*]]: !sparse_tensor.iterator<#sparse{{[0-9]*}}, lvls = 0>) +// CHECK: %[[VAL_2:.*]] = sparse_tensor.extract_iteration_space %[[VAL_0]] lvls = 0 +// CHECK: %[[VAL_3:.*]] = sparse_tensor.extract_iteration_space %[[VAL_0]] at %[[VAL_1]] lvls = 1 +// CHECK: return %[[VAL_2]], %[[VAL_3]] : !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 0>, !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 1> +// CHECK: } +func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>) + -> (!sparse_tensor.iter_space<#COO, lvls = 0>, !sparse_tensor.iter_space<#COO, lvls = 1>) { + // Extracting the iteration space for the first level needs no parent iterator. + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO> + // Extracting the iteration space for the second level needs a parent iterator. + %l2 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0> + return %l1, %l2 : !sparse_tensor.iter_space<#COO, lvls = 0>, !sparse_tensor.iter_space<#COO, lvls = 1> +} diff --git a/mlir/test/Dialect/XeGPU/XeGPUOps.mlir b/mlir/test/Dialect/XeGPU/XeGPUOps.mlir index 039346adbb851c..f0945c79a94ac3 100644 --- a/mlir/test/Dialect/XeGPU/XeGPUOps.mlir +++ b/mlir/test/Dialect/XeGPU/XeGPUOps.mlir @@ -59,4 +59,66 @@ gpu.func @test_store_nd_vc(%dst: memref<24x32xf16>) { gpu.return } +// CHECK: gpu.func @test_create_update_nd_tdesc_vc(%[[arg0:.*]]: memref<24x32xf32>) { +gpu.func @test_create_update_nd_tdesc_vc(%src: memref<24x32xf32>) { + // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32> + %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK: %[[R1:.*]] = xegpu.update_nd_offset %[[REG]], [0, 16] : !xegpu.tensor_desc<8x16xf32> + %2 = xegpu.update_nd_offset %1, [0, 16]: !xegpu.tensor_desc<8x16xf32> + gpu.return +} + +// CHECK: gpu.func @test_create_tdesc_vc(%[[arg0:.*]]: ui64) { +gpu.func @test_create_tdesc_vc(%src: ui64) { + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + gpu.return +} + +// CHECK: gpu.func @test_prefetch_vc(%[[arg0:.*]]: ui64) { +gpu.func @test_prefetch_vc(%src: ui64) { + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + // CHECK: xegpu.prefetch %[[R0]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + xegpu.prefetch %1 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + gpu.return +} + +// CHECK: gpu.func @test_load_gather_vc(%[[arg0:.*]]: ui64) { +gpu.func @test_load_gather_vc(%src: ui64) { + //CHECK: %[[cst:.*]] = arith.constant dense : vector<4xi1> + %0 = arith.constant dense<1>: vector<4xi1> + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + //CHECK: %[[R1:.*]] = xegpu.load %[[R0]], %[[cst]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> + //CHECK-SAME: !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> -> vector<4x2xf32> + %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> + : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> -> vector<4x2xf32> + gpu.return +} + +// CHECK: gpu.func @test_store_scatter_vc(%[[arg0:.*]]: ui64) { +gpu.func @test_store_scatter_vc(%src: ui64) { + //CHECK: %[[c0:.*]] = arith.constant dense : vector<4xi1> + %0 = arith.constant dense<1>: vector<4xi1> + //CHECK: %[[c1:.*]] = arith.constant dense<2.900000e+00> : vector<4x2xf32> + %1 = arith.constant dense<2.9>: vector<4x2xf32> + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + %2 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + //CHECK: xegpu.store %[[c1]], %[[R0]], %[[c0]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> + //CHECK-SAME: vector<4x2xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> + xegpu.store %1, %2, %0 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> + : vector<4x2xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> + gpu.return +} + +// CHECK: gpu.func @test_create_update_tdesc_vc(%[[arg0:.*]]: ui64) { +gpu.func @test_create_update_tdesc_vc(%src: ui64) { + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + //CHECK: %[[R1:.*]] = xegpu.update_offset %[[R0]], [32, 32, 32, 32] : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + %2 = xegpu.update_offset %1, [32, 32, 32, 32] : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + gpu.return +} + } \ No newline at end of file diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir new file mode 100644 index 00000000000000..5e29361ec69087 --- /dev/null +++ b/mlir/test/Dialect/XeGPU/invalid.mlir @@ -0,0 +1,159 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics + +// ----- +func.func @test_create_nd_tdesc_vc_1(%src: memref<24xf32>) { + // expected-error@+1 {{Expecting the rank of shape, strides, offsets, source memref type (if source is a memref) and TensorDesc should match with each other. They currenlty are 2D.}} + %1 = xegpu.create_nd_tdesc %src[0] : memref<24xf32> -> !xegpu.tensor_desc<8x16xf32> + return +} + +// ----- + +func.func @test_create_nd_tdesc_vc_2(%src: memref<24x32xf32>) { + // expected-error@+1 {{TensorDesc should have the same element type with the source if it is a memref}} + %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf16> + return +} + +// ----- +func.func @test_prefetch_nd_vc_1(%src: memref<24x32xf16>) { + %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> + // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint}} + xegpu.prefetch_nd %1 <{l1_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<8x16xf16> + return +} + +// ----- +func.func @test_prefetch_nd_vc_2(%src: memref<24xf16>) { + %1 = xegpu.create_tdesc %src[0, 1, 2, 3, 4, 5, 6, 7] + : memref<24xf16> -> !xegpu.tensor_desc<8xf16, #xegpu.tdesc_attr> + // expected-error@+1 {{Expects a non-scattered TensorDesc}} + xegpu.prefetch_nd %1 <{l1_hint = #xegpu.cache_hint}> + : !xegpu.tensor_desc<8xf16, #xegpu.tdesc_attr> + return +} + +// ----- +func.func @test_load_nd_vc_1(%src: memref<8x16xf16>) { + %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16> + // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint}} + %2 = xegpu.load_nd %1 <{l1_hint = #xegpu.cache_hint}> + : !xegpu.tensor_desc<8x16xf16> -> vector<4x16x2xf16> + return +} + +// ----- +func.func @test_load_nd_vc_2(%src: memref<16xf16>) { + %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} + : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + // expected-error@+1 {{Expects a non-scattered TensorDesc.}} + %2 = xegpu.load_nd %1 <{l1_hint = #xegpu.cache_hint}> + : !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> -> vector<8x2xf16> + return +} + +// ----- +func.func @test_store_nd_vc_1(%dst: memref<24x32xf16>) { + %1 = arith.constant dense<1.0>: vector<24x32xf16> + %2 = xegpu.create_nd_tdesc %dst[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<24x32xf16> + // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint}} + xegpu.store_nd %1, %2 <{l1_hint = #xegpu.cache_hint}>: vector<24x32xf16>, !xegpu.tensor_desc<24x32xf16> + return +} + +// ----- +func.func @test_store_nd_vc_2(%dst: memref<16xf16>) { + %1 = arith.constant dense<1.0>: vector<8x2xf16> + %2 = xegpu.create_tdesc %dst[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} + : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + // expected-error@+1 {{Expects a non-scattered TensorDesc}} + xegpu.store_nd %1, %2 <{l1_hint = #xegpu.cache_hint}> + : vector<8x2xf16>, !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + return +} + +// ----- +func.func @test_update_nd_offset_1(%dst: memref<16xf16>) { + %1 = xegpu.create_tdesc %dst[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} + : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + // expected-error@+1 {{Expects a non-scattered TensorDesc}} + xegpu.update_nd_offset %1, [0, 2] : !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + return +} + +// ----- +func.func @test_create_tdesc_vc_1(%src: ui64) { + // expected-error@+1 {{Expects a scattered TensorDesc}} + %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} + : ui64 -> !xegpu.tensor_desc<8x2xf16> + return +} + +// ----- +func.func @test_create_tdesc_vc_2(%src: ui64) { + // expected-error@+1 {{Incorrect TensorDesc shape}} + %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} + : ui64 -> !xegpu.tensor_desc<8x4xf16, #xegpu.tdesc_attr> + return +} + +// ----- +func.func @test_prefetch_vc_1(%src: memref<24x32xf16>) { + %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<24x32xf16> + // expected-error@+1 {{Expects a scattered TensorDesc}} + xegpu.prefetch %1 <{l1_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<24x32xf16> + return +} + +// ----- +func.func @test_prefetch_vc_2(%src: ui64) { + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint}} + xegpu.prefetch %1 <{l1_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + return +} + +// ----- +func.func @test_load_gather_vc_1(%src: memref<24x32xf16>) { + %0 = arith.constant dense<1>: vector<4xi1> + %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<4x2xf16> + // expected-error@+1 {{Expects a scattered TensorDesc}} + %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint}> + : !xegpu.tensor_desc<4x2xf16>, vector<4xi1> -> vector<4x2xf16> + return +} + +// ----- +func.func @test_load_gather_vc_2(%src: ui64) { + %0 = arith.constant dense<1>: vector<4xi1> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 + -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint}} + %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint}> + : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> + -> vector<4x2xf32> + return +} + +// ----- +func.func @test_store_scatter_vc_1(%src: memref<24x32xf32>) { + %0 = arith.constant dense<1>: vector<4xi1> + %1 = arith.constant dense<2.9>: vector<4x2xf32> + %2 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<4x2xf32> + // expected-error@+1 {{Expects a scattered TensorDesc}} + xegpu.store %1, %2, %0 <{l1_hint = #xegpu.cache_hint}> + : vector<4x2xf32>, !xegpu.tensor_desc<4x2xf32>, vector<4xi1> + return +} + +// ----- +func.func @test_store_scatter_vc_2(%src: ui64) { + %0 = arith.constant dense<1>: vector<4xi1> + %1 = arith.constant dense<2.9>: vector<4x2xf32> + %2 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} + : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint}} + xegpu.store %1, %2, %0 <{l1_hint = #xegpu.cache_hint}> : vector<4x2xf32>, + !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> + return +} \ No newline at end of file diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h index 79e8464bfda5c1..7f05464f36c1f3 100644 --- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h @@ -45,6 +45,8 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" +struct RecordReplayTy; + namespace llvm { namespace omp { namespace target { @@ -1031,6 +1033,12 @@ struct GenericPluginTy { return *RPCServer; } + /// Get a reference to the R&R interface for this plugin. + RecordReplayTy &getRecordAndReplay() const { + assert(RecordReplay && "R&R not initialized"); + return *RecordReplay; + } + /// Get the OpenMP requires flags set for this plugin. int64_t getRequiresFlags() const { return RequiresFlags; } @@ -1220,6 +1228,9 @@ struct GenericPluginTy { /// The interface between the plugin and the GPU for host services. RPCServerTy *RPCServer; + + /// The interface into the record-and-replay functionality. + RecordReplayTy *RecordReplay; }; namespace Plugin { diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp index b5f3c45c835fdb..6df9798f12e3d0 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp @@ -362,8 +362,6 @@ struct RecordReplayTy { } }; -static RecordReplayTy RecordReplay; - // Extract the mapping of host function pointers to device function pointers // from the entry table. Functions marked as 'indirect' in OpenMP will have // offloading entries generated for them which map the host's function pointer @@ -473,7 +471,8 @@ GenericKernelTy::getKernelLaunchEnvironment( // Ctor/Dtor have no arguments, replaying uses the original kernel launch // environment. Older versions of the compiler do not generate a kernel // launch environment. - if (isCtorOrDtor() || RecordReplay.isReplaying() || + if (isCtorOrDtor() || + GenericDevice.Plugin.getRecordAndReplay().isReplaying() || Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR) return nullptr; @@ -562,6 +561,7 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs, // Record the kernel description after we modified the argument count and num // blocks/threads. + RecordReplayTy &RecordReplay = GenericDevice.Plugin.getRecordAndReplay(); if (RecordReplay.isRecording()) { RecordReplay.saveImage(getName(), getImage()); RecordReplay.saveKernelInput(getName(), getImage()); @@ -839,9 +839,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { delete MemoryManager; MemoryManager = nullptr; - if (RecordReplay.isRecordingOrReplaying()) - RecordReplay.deinit(); - if (RPCServer) if (auto Err = RPCServer->deinitDevice(*this)) return Err; @@ -858,6 +855,7 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { return deinitImpl(); } + Expected GenericDeviceTy::loadBinary(GenericPluginTy &Plugin, const __tgt_device_image *InputTgtImage) { @@ -892,7 +890,8 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin, return std::move(Err); // Setup the global device memory pool if needed. - if (!RecordReplay.isReplaying() && shouldSetupDeviceMemoryPool()) { + if (!Plugin.getRecordAndReplay().isReplaying() && + shouldSetupDeviceMemoryPool()) { uint64_t HeapSize; auto SizeOrErr = getDeviceHeapSize(HeapSize); if (SizeOrErr) { @@ -1307,8 +1306,8 @@ Expected GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr, TargetAllocTy Kind) { void *Alloc = nullptr; - if (RecordReplay.isRecordingOrReplaying()) - return RecordReplay.alloc(Size); + if (Plugin.getRecordAndReplay().isRecordingOrReplaying()) + return Plugin.getRecordAndReplay().alloc(Size); switch (Kind) { case TARGET_ALLOC_DEFAULT: @@ -1344,7 +1343,7 @@ Expected GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr, Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) { // Free is a noop when recording or replaying. - if (RecordReplay.isRecordingOrReplaying()) + if (Plugin.getRecordAndReplay().isRecordingOrReplaying()) return Plugin::success(); int Res; @@ -1396,6 +1395,7 @@ Error GenericDeviceTy::launchKernel(void *EntryPtr, void **ArgPtrs, ptrdiff_t *ArgOffsets, KernelArgsTy &KernelArgs, __tgt_async_info *AsyncInfo) { + RecordReplayTy &RecordReplay = Plugin.getRecordAndReplay(); AsyncInfoWrapperTy AsyncInfoWrapper( *this, RecordReplay.isRecordingOrReplaying() ? nullptr : AsyncInfo); @@ -1495,6 +1495,9 @@ Error GenericPluginTy::init() { RPCServer = new RPCServerTy(*this); assert(RPCServer && "Invalid RPC server"); + RecordReplay = new RecordReplayTy(); + assert(RecordReplay && "Invalid Record and Replay handler"); + return Plugin::success(); } @@ -1508,6 +1511,9 @@ Error GenericPluginTy::deinit() { assert(!Devices[DeviceId] && "Device was not deinitialized"); } + if (RecordReplay && RecordReplay->isRecordingOrReplaying()) + RecordReplay->deinit(); + // There is no global handler if no device is available. if (GlobalHandler) delete GlobalHandler; @@ -1515,6 +1521,9 @@ Error GenericPluginTy::deinit() { if (RPCServer) delete RPCServer; + if (RecordReplay) + delete RecordReplay; + // Perform last deinitializations on the plugin. return deinitImpl(); } @@ -1630,12 +1639,12 @@ int32_t GenericPluginTy::initialize_record_replay(int32_t DeviceId, isRecord ? RecordReplayTy::RRStatusTy::RRRecording : RecordReplayTy::RRStatusTy::RRReplaying; - if (auto Err = RecordReplay.init(&Device, MemorySize, VAddr, Status, - SaveOutput, ReqPtrArgOffset)) { + if (auto Err = RecordReplay->init(&Device, MemorySize, VAddr, Status, + SaveOutput, ReqPtrArgOffset)) { REPORT("WARNING RR did not intialize RR-properly with %lu bytes" "(Error: %s)\n", MemorySize, toString(std::move(Err)).data()); - RecordReplay.setStatus(RecordReplayTy::RRStatusTy::RRDeactivated); + RecordReplay->setStatus(RecordReplayTy::RRStatusTy::RRDeactivated); if (!isRecord) { return OFFLOAD_FAIL; @@ -1984,8 +1993,8 @@ int32_t GenericPluginTy::get_global(__tgt_device_binary Binary, uint64_t Size, assert(DevicePtr && "Invalid device global's address"); // Save the loaded globals if we are recording. - if (RecordReplay.isRecording()) - RecordReplay.addEntry(Name, Size, *DevicePtr); + if (getRecordAndReplay().isRecording()) + getRecordAndReplay().addEntry(Name, Size, *DevicePtr); return OFFLOAD_SUCCESS; } diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp index ac85b2b3f2fcd4..fc333765118179 100644 --- a/openmp/runtime/src/kmp_dispatch.cpp +++ b/openmp/runtime/src/kmp_dispatch.cpp @@ -2397,6 +2397,8 @@ static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last, sh->u.s.ordered_iteration = 0; } + KMP_MB(); /* Flush all pending memory write invalidates. */ + sh->buffer_index += __kmp_dispatch_num_buffers; KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n", gtid, sh->buffer_index)); diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp index 3f2ceef0c4add4..36825dbebafb51 100644 --- a/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp +++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp @@ -1,7 +1,4 @@ // RUN: %libomp-cxx-compile-and-run -// -// AIX runs out of resource in 32-bit with 4*omp_get_max_threads() threads. -// XFAIL: aix && ppc #include @@ -11,6 +8,12 @@ #include #include +// AIX runs out of resource in 32-bit if 4*omp_get_max_threads() is more +// than 64 threads with the default stack size. +#if defined(_AIX) && !__LP64__ +#define MAX_THREADS 64 +#endif + void dummy_root() { // omp_get_max_threads() will do middle initialization int nthreads = omp_get_max_threads(); @@ -18,9 +21,14 @@ void dummy_root() { } int main(int argc, char *argv[]) { - const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()), - 4 * omp_get_num_procs()), - std::numeric_limits::max()); + int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()), + 4 * omp_get_num_procs()), + std::numeric_limits::max()); + +#if defined(_AIX) && !__LP64__ + if (N > MAX_THREADS) + N = MAX_THREADS; +#endif std::vector data(N); diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp index f7405d00255cb9..1cceee95e704b8 100644 --- a/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp +++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp @@ -1,7 +1,4 @@ // RUN: %libomp-cxx-compile-and-run -// -// AIX runs out of resource in 32-bit with 4*omp_get_max_threads() threads. -// XFAIL: aix && ppc #include @@ -10,10 +7,21 @@ #include #include +// AIX runs out of resource in 32-bit if 4*omp_get_max_threads() is more +// than 64 threads with the default stacksize. +#if defined(_AIX) && !__LP64__ +#define MAX_THREADS 64 +#endif + int main(int argc, char *argv[]) { - const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()), - 4 * omp_get_num_procs()), - std::numeric_limits::max()); + int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()), + 4 * omp_get_num_procs()), + std::numeric_limits::max()); + +#if defined(_AIX) && !__LP64__ + if (N > MAX_THREADS) + N = MAX_THREADS; +#endif std::vector data(N); diff --git a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel index 6dfe8085b92857..1f2b5b476bcc11 100644 --- a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel @@ -702,6 +702,9 @@ cc_library( "//lldb/source/Plugins:PluginSymbolLocatorDebugSymbols", "//lldb/source/Plugins:PluginSymbolVendorMacOSX", ], + "@platforms//os:linux": [ + "//lldb/source/Plugins:PluginProcessLinux", + ], "//conditions:default": [], }), ) @@ -752,7 +755,13 @@ cc_binary( data = [ ":lldb-argdumper", ] + select({ - "@platforms//os:macos": [":debugserver"], + "@platforms//os:macos": [ + ":debugserver", + ":lldb-server", + ], + "@platforms//os:linux": [ + ":lldb-server", + ], "//conditions:default": [], }), deps = [ @@ -799,8 +808,8 @@ cc_library( ["tools/debugserver/source/**/*.cpp"], exclude = ["tools/debugserver/source/debugserver.cpp"], ), - tags = ["nobuildkite"], local_defines = ["LLDB_USE_OS_LOG"], + tags = ["nobuildkite"], deps = [ ":DebugServerCommonHeaders", ":DebugServerCommonMacOSXHeaders", @@ -852,3 +861,63 @@ cc_binary( srcs = glob(["tools/argdumper/*.cpp"]), deps = ["//llvm:Support"], ) + +gentbl_cc_library( + name = "lldb_server_opts_gen", + strip_include_prefix = ".", + tbl_outs = [( + ["-gen-opt-parser-defs"], + "LLGSOptions.inc", + )], + tblgen = "//llvm:llvm-tblgen", + td_file = "tools/lldb-server/LLGSOptions.td", + deps = ["//llvm:OptParserTdFiles"], +) + +cc_binary( + name = "lldb-server", + srcs = glob([ + "tools/lldb-server/*.cpp", + "tools/lldb-server/*.h", + ]), + target_compatible_with = select({ + "@platforms//os:linux": [], + "@platforms//os:macos": [], + # TODO: This can theoretically support more platforms, but it hasn't been tested yet + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [ + ":Host", + ":Initialization", + ":Utility", + ":Version", + ":lldb_server_opts_gen", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb/source/Plugins:PluginCPlusPlusLanguage", + "//lldb/source/Plugins:PluginExpressionParserClang", + "//lldb/source/Plugins:PluginInstructionARM", + "//lldb/source/Plugins:PluginInstructionARM64", + "//lldb/source/Plugins:PluginInstructionLoongArch", + "//lldb/source/Plugins:PluginInstructionMIPS", + "//lldb/source/Plugins:PluginInstructionMIPS64", + "//lldb/source/Plugins:PluginInstructionRISCV", + "//lldb/source/Plugins:PluginObjCLanguage", + "//lldb/source/Plugins:PluginProcessGDBRemote", + "//lldb/source/Plugins:PluginSymbolFileDWARF", + "//lldb/source/Plugins:PluginSymbolFileNativePDB", + "//lldb/source/Plugins:PluginSymbolFilePDB", + "//lldb/source/Plugins:PluginTypeSystemClang", + "//llvm:Option", + "//llvm:Support", + ] + select({ + "@platforms//os:linux": [ + "//lldb/source/Plugins:PluginObjectFileELF", + "//lldb/source/Plugins:PluginProcessLinux", + ], + "@platforms//os:macos": [ + "//lldb/source/Plugins:PluginObjectFileMachO", + ], + "//conditions:default": [], + }), +) diff --git a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel index bbc523f54a190d..b5f5bed1698a6b 100644 --- a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel @@ -2100,6 +2100,25 @@ cc_library( ], ) +cc_library( + name = "PluginProcessLinux", + srcs = glob(["Process/Linux/*.cpp"]), + hdrs = glob(["Process/Linux/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessPOSIX", + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + cc_library( name = "PluginScriptedProcess", srcs = glob(["Process/scripted/*.cpp"]), diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 16b8a8023c932c..320f74efb0d149 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -11632,6 +11632,7 @@ cc_library( ":DialectUtils", ":FuncDialect", ":IR", + ":InferTypeOpInterface", ":LinalgDialect", ":MathDialect", ":Pass",