diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index fc5a07e69e9d62..5db345ecc63f09 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -41,6 +41,7 @@ add_clang_library(clangDaemon ClangdServer.cpp CodeComplete.cpp CodeCompletionStrings.cpp + CollectMacros.cpp CompileCommands.cpp Compiler.cpp Context.cpp diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 55e63c71b23eb7..9d93b8592fdcd8 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -18,6 +18,7 @@ #include "Trace.h" #include "URI.h" #include "refactor/Tweak.h" +#include "clang/Basic/Version.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" @@ -546,7 +547,10 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, CodeAction::INFO_KIND}}}; llvm::json::Object Result{ - {{"capabilities", + {{"serverInfo", + llvm::json::Object{{"name", "clangd"}, + {"version", getClangToolFullVersion("clangd")}}}, + {"capabilities", llvm::json::Object{ {"textDocumentSync", (int)TextDocumentSyncKind::Incremental}, {"documentFormattingProvider", true}, @@ -600,6 +604,8 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, Reply(std::move(Result)); } +void ClangdLSPServer::onInitialized(const InitializedParams &Params) {} + void ClangdLSPServer::onShutdown(const ShutdownParams &Params, Callback Reply) { // Do essentially nothing, just say we're ready to exit. @@ -808,7 +814,9 @@ void ClangdLSPServer::onDocumentDidClose( // VSCode). Note that this cannot race with actual diagnostics responses // because removeDocument() guarantees no diagnostic callbacks will be // executed after it returns. - publishDiagnostics(URIForFile::canonicalize(File, /*TUPath=*/File), {}); + PublishDiagnosticsParams Notification; + Notification.uri = URIForFile::canonicalize(File, /*TUPath=*/File); + publishDiagnostics(Notification); } void ClangdLSPServer::onDocumentOnTypeFormatting( @@ -1145,18 +1153,13 @@ void ClangdLSPServer::applyConfiguration( } void ClangdLSPServer::publishSemanticHighlighting( - SemanticHighlightingParams Params) { + const SemanticHighlightingParams &Params) { notify("textDocument/semanticHighlighting", Params); } void ClangdLSPServer::publishDiagnostics( - const URIForFile &File, std::vector Diagnostics) { - // Publish diagnostics. - notify("textDocument/publishDiagnostics", - llvm::json::Object{ - {"uri", File}, - {"diagnostics", std::move(Diagnostics)}, - }); + const PublishDiagnosticsParams &Params) { + notify("textDocument/publishDiagnostics", Params); } // FIXME: This function needs to be properly tested. @@ -1243,6 +1246,7 @@ ClangdLSPServer::ClangdLSPServer( NegotiatedOffsetEncoding(ForcedOffsetEncoding) { // clang-format off MsgHandler->bind("initialize", &ClangdLSPServer::onInitialize); + MsgHandler->bind("initialized", &ClangdLSPServer::onInitialized); MsgHandler->bind("shutdown", &ClangdLSPServer::onShutdown); MsgHandler->bind("sync", &ClangdLSPServer::onSync); MsgHandler->bind("textDocument/rangeFormatting", &ClangdLSPServer::onDocumentRangeFormatting); @@ -1361,15 +1365,15 @@ void ClangdLSPServer::onHighlightingsReady( void ClangdLSPServer::onDiagnosticsReady(PathRef File, std::vector Diagnostics) { - auto URI = URIForFile::canonicalize(File, /*TUPath=*/File); - std::vector LSPDiagnostics; + PublishDiagnosticsParams Notification; + Notification.uri = URIForFile::canonicalize(File, /*TUPath=*/File); DiagnosticToReplacementMap LocalFixIts; // Temporary storage for (auto &Diag : Diagnostics) { - toLSPDiags(Diag, URI, DiagOpts, + toLSPDiags(Diag, Notification.uri, DiagOpts, [&](clangd::Diagnostic Diag, llvm::ArrayRef Fixes) { auto &FixItsForDiagnostic = LocalFixIts[Diag]; llvm::copy(Fixes, std::back_inserter(FixItsForDiagnostic)); - LSPDiagnostics.push_back(std::move(Diag)); + Notification.diagnostics.push_back(std::move(Diag)); }); } @@ -1380,7 +1384,7 @@ void ClangdLSPServer::onDiagnosticsReady(PathRef File, } // Send a notification to the LSP client. - publishDiagnostics(URI, std::move(LSPDiagnostics)); + publishDiagnostics(Notification); } void ClangdLSPServer::onBackgroundIndexProgress( diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h index f30fbf6b51492a..4ab0354ead72a3 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.h +++ b/clang-tools-extra/clangd/ClangdLSPServer.h @@ -67,6 +67,7 @@ class ClangdLSPServer : private ClangdServer::Callbacks { // LSP methods. Notifications have signature void(const Params&). // Calls have signature void(const Params&, Callback). void onInitialize(const InitializeParams &, Callback); + void onInitialized(const InitializedParams &); void onShutdown(const ShutdownParams &, Callback); void onSync(const NoParams &, Callback); void onDocumentDidOpen(const DidOpenTextDocumentParams &); @@ -132,11 +133,10 @@ class ClangdLSPServer : private ClangdServer::Callbacks { void applyConfiguration(const ConfigurationSettings &Settings); /// Sends a "publishSemanticHighlighting" notification to the LSP client. - void publishSemanticHighlighting(SemanticHighlightingParams Params); + void publishSemanticHighlighting(const SemanticHighlightingParams &); /// Sends a "publishDiagnostics" notification to the LSP client. - void publishDiagnostics(const URIForFile &File, - std::vector Diagnostics); + void publishDiagnostics(const PublishDiagnosticsParams &); /// Since initialization of CDBs and ClangdServer is done lazily, the /// following context captures the one used while creating ClangdLSPServer and diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index f1a88902c8c0e5..5dd00322a5abf8 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -194,10 +194,6 @@ void ClangdServer::addDocument(PathRef File, llvm::StringRef Contents, void ClangdServer::removeDocument(PathRef File) { WorkScheduler.remove(File); } -llvm::StringRef ClangdServer::getDocument(PathRef File) const { - return WorkScheduler.getContents(File); -} - void ClangdServer::codeComplete(PathRef File, Position Pos, const clangd::CodeCompleteOptions &Opts, Callback CB) { diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index e9f2c30b174955..d098f6242f72c2 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -175,9 +175,6 @@ class ClangdServer { WantDiagnostics WD = WantDiagnostics::Auto, bool ForceRebuild = false); - /// Get the contents of \p File, which should have been added. - llvm::StringRef getDocument(PathRef File) const; - /// Remove \p File from list of tracked files, schedule a request to free /// resources associated with it. Pending diagnostics for closed files may not /// be delivered, even if requested with WantDiags::Auto or WantDiags::Yes. diff --git a/clang-tools-extra/clangd/CollectMacros.cpp b/clang-tools-extra/clangd/CollectMacros.cpp new file mode 100644 index 00000000000000..ea7dd18ee13036 --- /dev/null +++ b/clang-tools-extra/clangd/CollectMacros.cpp @@ -0,0 +1,34 @@ +//===--- CollectMacros.cpp ---------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CollectMacros.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Lexer.h" + +namespace clang { +namespace clangd { + +void CollectMainFileMacros::add(const Token &MacroNameTok, + const MacroInfo *MI) { + if (!InMainFile) + return; + auto Loc = MacroNameTok.getLocation(); + if (Loc.isInvalid() || Loc.isMacroID()) + return; + + auto Name = MacroNameTok.getIdentifierInfo()->getName(); + Out.Names.insert(Name); + auto Range = halfOpenToRange( + SM, CharSourceRange::getCharRange(Loc, MacroNameTok.getEndLoc())); + if (auto SID = getSymbolID(Name, MI, SM)) + Out.MacroRefs[*SID].push_back(Range); + else + Out.UnknownMacros.push_back(Range); +} +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/CollectMacros.h b/clang-tools-extra/clangd/CollectMacros.h index 5c3fca10ad4a5c..eecea0455be270 100644 --- a/clang-tools-extra/clangd/CollectMacros.h +++ b/clang-tools-extra/clangd/CollectMacros.h @@ -40,10 +40,8 @@ struct MainFileMacros { /// - collect macros after the preamble of the main file (in ParsedAST.cpp) class CollectMainFileMacros : public PPCallbacks { public: - explicit CollectMainFileMacros(const SourceManager &SM, - const LangOptions &LangOpts, - MainFileMacros &Out) - : SM(SM), LangOpts(LangOpts), Out(Out) {} + explicit CollectMainFileMacros(const SourceManager &SM, MainFileMacros &Out) + : SM(SM), Out(Out) {} void FileChanged(SourceLocation Loc, FileChangeReason, SrcMgr::CharacteristicKind, FileID) override { @@ -89,24 +87,8 @@ class CollectMainFileMacros : public PPCallbacks { } private: - void add(const Token &MacroNameTok, const MacroInfo *MI) { - if (!InMainFile) - return; - auto Loc = MacroNameTok.getLocation(); - if (Loc.isMacroID()) - return; - - if (auto Range = getTokenRange(SM, LangOpts, Loc)) { - auto Name = MacroNameTok.getIdentifierInfo()->getName(); - Out.Names.insert(Name); - if (auto SID = getSymbolID(Name, MI, SM)) - Out.MacroRefs[*SID].push_back(*Range); - else - Out.UnknownMacros.push_back(*Range); - } - } + void add(const Token &MacroNameTok, const MacroInfo *MI); const SourceManager &SM; - const LangOptions &LangOpts; bool InMainFile = true; MainFileMacros &Out; }; diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index 5796657a5800be..5c1288c14b5860 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -26,6 +26,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/PrettyPrinter.h" #include "clang/AST/Type.h" +#include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" #include "clang/Basic/TokenKinds.h" #include "clang/Index/IndexSymbol.h" @@ -530,32 +531,33 @@ llvm::Optional getHover(ParsedAST &AST, Position Pos, llvm::consumeError(CurLoc.takeError()); return llvm::None; } - auto TokensTouchingCursor = - syntax::spelledTokensTouching(*CurLoc, AST.getTokens()); + const auto &TB = AST.getTokens(); + auto TokensTouchingCursor = syntax::spelledTokensTouching(*CurLoc, TB); // Early exit if there were no tokens around the cursor. if (TokensTouchingCursor.empty()) return llvm::None; - // To be used as a backup for highlighting the selected token. - SourceLocation IdentLoc; + // To be used as a backup for highlighting the selected token, we use back as + // it aligns better with biases elsewhere (editors tend to send the position + // for the left of the hovered token). + CharSourceRange HighlightRange = + TokensTouchingCursor.back().range(SM).toCharRange(SM); llvm::Optional HI; // Macros and deducedtype only works on identifiers and auto/decltype keywords // respectively. Therefore they are only trggered on whichever works for them, // similar to SelectionTree::create(). for (const auto &Tok : TokensTouchingCursor) { if (Tok.kind() == tok::identifier) { - IdentLoc = Tok.location(); + // Prefer the identifier token as a fallback highlighting range. + HighlightRange = Tok.range(SM).toCharRange(SM); if (auto M = locateMacroAt(Tok, AST.getPreprocessor())) { HI = getHoverContents(*M, AST); - HI->SymRange = getTokenRange(AST.getSourceManager(), AST.getLangOpts(), - Tok.location()); break; } } else if (Tok.kind() == tok::kw_auto || Tok.kind() == tok::kw_decltype) { if (auto Deduced = getDeducedType(AST.getASTContext(), Tok.location())) { HI = getHoverContents(*Deduced, AST.getASTContext(), Index); - HI->SymRange = getTokenRange(AST.getSourceManager(), AST.getLangOpts(), - Tok.location()); + HighlightRange = Tok.range(SM).toCharRange(SM); break; } } @@ -566,10 +568,11 @@ llvm::Optional getHover(ParsedAST &AST, Position Pos, auto Offset = SM.getFileOffset(*CurLoc); // Editors send the position on the left of the hovered character. // So our selection tree should be biased right. (Tested with VSCode). - SelectionTree ST = SelectionTree::createRight( - AST.getASTContext(), AST.getTokens(), Offset, Offset); + SelectionTree ST = + SelectionTree::createRight(AST.getASTContext(), TB, Offset, Offset); std::vector Result; if (const SelectionTree::Node *N = ST.commonAncestor()) { + // FIXME: Fill in HighlightRange with range coming from N->ASTNode. auto Decls = explicitReferenceTargets(N->ASTNode, DeclRelation::Alias); if (!Decls.empty()) { HI = getHoverContents(Decls.front(), Index); @@ -592,14 +595,7 @@ llvm::Optional getHover(ParsedAST &AST, Position Pos, if (auto Formatted = tooling::applyAllReplacements(HI->Definition, Replacements)) HI->Definition = *Formatted; - // FIXME: We should rather fill this with info coming from SelectionTree node. - if (!HI->SymRange) { - SourceLocation ToHighlight = TokensTouchingCursor.front().location(); - if (IdentLoc.isValid()) - ToHighlight = IdentLoc; - HI->SymRange = - getTokenRange(AST.getSourceManager(), AST.getLangOpts(), ToHighlight); - } + HI->SymRange = halfOpenToRange(SM, HighlightRange); return HI; } diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp index 36a9c47f7a9d2e..e43c2ce662616c 100644 --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -350,7 +350,7 @@ ParsedAST::build(std::unique_ptr CI, Macros = Preamble->Macros; Clang->getPreprocessor().addPPCallbacks( std::make_unique(Clang->getSourceManager(), - Clang->getLangOpts(), Macros)); + Macros)); // Copy over the includes from the preamble, then combine with the // non-preamble includes below. diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp index eca545fd09e4b3..f2b6b017f10f58 100644 --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -54,7 +54,7 @@ class CppFilePreambleCallbacks : public PreambleCallbacks { return std::make_unique( collectIncludeStructureCallback(*SourceMgr, &Includes), - std::make_unique(*SourceMgr, *LangOpts, Macros)); + std::make_unique(*SourceMgr, Macros)); } CommentHandler *getCommentHandler() override { diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp index 8e89c1f45f3a53..5a867c52c1ed26 100644 --- a/clang-tools-extra/clangd/Protocol.cpp +++ b/clang-tools-extra/clangd/Protocol.cpp @@ -531,6 +531,13 @@ bool fromJSON(const llvm::json::Value &Params, Diagnostic &R) { return true; } +llvm::json::Value toJSON(const PublishDiagnosticsParams &PDP) { + return llvm::json::Object{ + {"uri", PDP.uri}, + {"diagnostics", PDP.diagnostics}, + }; +} + bool fromJSON(const llvm::json::Value &Params, CodeActionContext &R) { llvm::json::ObjectMapper O(Params); return O && O.map("diagnostics", R.diagnostics); diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h index a376e5f39e795c..596c7e9004e79d 100644 --- a/clang-tools-extra/clangd/Protocol.h +++ b/clang-tools-extra/clangd/Protocol.h @@ -239,6 +239,7 @@ bool fromJSON(const llvm::json::Value &E, TraceLevel &Out); struct NoParams {}; inline bool fromJSON(const llvm::json::Value &, NoParams &) { return true; } +using InitializedParams = NoParams; using ShutdownParams = NoParams; using ExitParams = NoParams; @@ -791,6 +792,14 @@ struct LSPDiagnosticCompare { bool fromJSON(const llvm::json::Value &, Diagnostic &); llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Diagnostic &); +struct PublishDiagnosticsParams { + /// The URI for which diagnostic information is reported. + URIForFile uri; + /// An array of diagnostic information items. + std::vector diagnostics; +}; +llvm::json::Value toJSON(const PublishDiagnosticsParams &); + struct CodeActionContext { /// An array of diagnostics. std::vector diagnostics; diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp index e7b1618fd2d4fb..d5c51ebff5e1e1 100644 --- a/clang-tools-extra/clangd/SemanticHighlighting.cpp +++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp @@ -23,6 +23,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "clang/Tooling/Syntax/Tokens.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -128,40 +129,39 @@ llvm::Optional kindForReference(const ReferenceLoc &R) { return Result; } +// For a macro usage `DUMP(foo)`, we want: +// - DUMP --> "macro" +// - foo --> "variable". +SourceLocation getHighlightableSpellingToken(SourceLocation L, + const SourceManager &SM) { + if (L.isFileID()) + return SM.isWrittenInMainFile(L) ? L : SourceLocation{}; + // Tokens expanded from the macro body contribute no highlightings. + if (!SM.isMacroArgExpansion(L)) + return {}; + // Tokens expanded from macro args are potentially highlightable. + return getHighlightableSpellingToken(SM.getImmediateSpellingLoc(L), SM); +} + /// Consumes source locations and maps them to text ranges for highlightings. class HighlightingsBuilder { public: - HighlightingsBuilder(const SourceManager &SourceMgr, - const LangOptions &LangOpts) - : SourceMgr(SourceMgr), LangOpts(LangOpts) {} + HighlightingsBuilder(const ParsedAST &AST) + : TB(AST.getTokens()), SourceMgr(AST.getSourceManager()), + LangOpts(AST.getLangOpts()) {} void addToken(HighlightingToken T) { Tokens.push_back(T); } void addToken(SourceLocation Loc, HighlightingKind Kind) { + Loc = getHighlightableSpellingToken(Loc, SourceMgr); if (Loc.isInvalid()) return; - if (Loc.isMacroID()) { - // Only intereseted in highlighting arguments in macros (DEF_X(arg)). - if (!SourceMgr.isMacroArgExpansion(Loc)) - return; - Loc = SourceMgr.getSpellingLoc(Loc); - } - - // Non top level decls that are included from a header are not filtered by - // topLevelDecls. (example: method declarations being included from - // another file for a class from another file). - // There are also cases with macros where the spelling loc will not be in - // the main file and the highlighting would be incorrect. - if (!isInsideMainFile(Loc, SourceMgr)) - return; + const auto *Tok = TB.spelledTokenAt(Loc); + assert(Tok); - auto Range = getTokenRange(SourceMgr, LangOpts, Loc); - if (!Range) { - // R should always have a value, if it doesn't something is very wrong. - elog("Tried to add semantic token with an invalid range"); - return; - } - Tokens.push_back(HighlightingToken{Kind, *Range}); + auto Range = halfOpenToRange(SourceMgr, + Tok->range(SourceMgr).toCharRange(SourceMgr)); + Tokens.push_back(HighlightingToken{Kind, std::move(Range)}); } std::vector collect(ParsedAST &AST) && { @@ -211,6 +211,7 @@ class HighlightingsBuilder { } private: + const syntax::TokenBuffer &TB; const SourceManager &SourceMgr; const LangOptions &LangOpts; std::vector Tokens; @@ -311,7 +312,7 @@ takeLine(ArrayRef AllTokens, std::vector getSemanticHighlightings(ParsedAST &AST) { auto &C = AST.getASTContext(); // Add highlightings for AST nodes. - HighlightingsBuilder Builder(AST.getSourceManager(), C.getLangOpts()); + HighlightingsBuilder Builder(AST); // Highlight 'decltype' and 'auto' as their underlying types. CollectExtraHighlightings(Builder).TraverseAST(C); // Highlight all decls and references coming from the AST. diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp index 79d027def4bc35..d18daa910d18ee 100644 --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -225,17 +225,6 @@ bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM) { return true; } -llvm::Optional getTokenRange(const SourceManager &SM, - const LangOptions &LangOpts, - SourceLocation TokLoc) { - if (!TokLoc.isValid()) - return llvm::None; - SourceLocation End = Lexer::getLocForEndOfToken(TokLoc, 0, SM, LangOpts); - if (!End.isValid()) - return llvm::None; - return halfOpenToRange(SM, CharSourceRange::getCharRange(TokLoc, End)); -} - bool isValidFileRange(const SourceManager &Mgr, SourceRange R) { if (!R.getBegin().isValid() || !R.getEnd().isValid()) return false; @@ -645,8 +634,7 @@ std::vector collectIdentifierRanges(llvm::StringRef Identifier, [&](const syntax::Token &Tok, const SourceManager &SM) { if (Tok.kind() != tok::identifier || Tok.text(SM) != Identifier) return; - if (auto Range = getTokenRange(SM, LangOpts, Tok.location())) - Ranges.push_back(*Range); + Ranges.push_back(halfOpenToRange(SM, Tok.range(SM).toCharRange(SM))); }); return Ranges; } diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h index c601cc89df2809..383c57371b0059 100644 --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -69,11 +69,6 @@ Position offsetToPosition(llvm::StringRef Code, size_t Offset); /// FIXME: This should return an error if the location is invalid. Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc); -/// Returns the taken range at \p TokLoc. -llvm::Optional getTokenRange(const SourceManager &SM, - const LangOptions &LangOpts, - SourceLocation TokLoc); - /// Return the file location, corresponding to \p P. Note that one should take /// care to avoid comparing the result with expansion locations. llvm::Expected sourceLocationInMainFile(const SourceManager &SM, diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index f59c19e8031ee9..3f3162a33c303a 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -921,15 +921,6 @@ void TUScheduler::remove(PathRef File) { File); } -llvm::StringRef TUScheduler::getContents(PathRef File) const { - auto It = Files.find(File); - if (It == Files.end()) { - elog("getContents() for untracked file: {0}", File); - return ""; - } - return It->second->Contents; -} - llvm::StringMap TUScheduler::getAllFileContents() const { llvm::StringMap Results; for (auto &It : Files) diff --git a/clang-tools-extra/clangd/TUScheduler.h b/clang-tools-extra/clangd/TUScheduler.h index 5082612b0ccc36..948fde7ed1099c 100644 --- a/clang-tools-extra/clangd/TUScheduler.h +++ b/clang-tools-extra/clangd/TUScheduler.h @@ -213,10 +213,6 @@ class TUScheduler { /// if requested with WantDiags::Auto or WantDiags::Yes. void remove(PathRef File); - /// Returns the current contents of the buffer for File, per last update(). - /// The returned StringRef may be invalidated by any write to TUScheduler. - llvm::StringRef getContents(PathRef File) const; - /// Returns a snapshot of all file buffer contents, per last update(). llvm::StringMap getAllFileContents() const; diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 29c2338f5bb5a6..67f7bda6a5e65b 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -29,6 +29,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/Type.h" #include "clang/Basic/LLVM.h" +#include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Index/IndexDataConsumer.h" @@ -149,108 +150,78 @@ std::vector getDeclAtPosition(ParsedAST &AST, return Result; } -llvm::Optional makeLocation(ASTContext &AST, SourceLocation TokLoc, +// Expects Loc to be a SpellingLocation, will bail out otherwise as it can't +// figure out a filename. +llvm::Optional makeLocation(const ASTContext &AST, SourceLocation Loc, llvm::StringRef TUPath) { - const SourceManager &SourceMgr = AST.getSourceManager(); - const FileEntry *F = SourceMgr.getFileEntryForID(SourceMgr.getFileID(TokLoc)); + const auto &SM = AST.getSourceManager(); + const FileEntry *F = SM.getFileEntryForID(SM.getFileID(Loc)); if (!F) return None; - auto FilePath = getCanonicalPath(F, SourceMgr); + auto FilePath = getCanonicalPath(F, SM); if (!FilePath) { log("failed to get path!"); return None; } - if (auto Range = - getTokenRange(AST.getSourceManager(), AST.getLangOpts(), TokLoc)) { - Location L; - L.uri = URIForFile::canonicalize(*FilePath, TUPath); - L.range = *Range; - return L; - } - return None; + Location L; + L.uri = URIForFile::canonicalize(*FilePath, TUPath); + // We call MeasureTokenLength here as TokenBuffer doesn't store spelled tokens + // outside the main file. + auto TokLen = Lexer::MeasureTokenLength(Loc, SM, AST.getLangOpts()); + L.range = halfOpenToRange( + SM, CharSourceRange::getCharRange(Loc, Loc.getLocWithOffset(TokLen))); + return L; } } // namespace -std::vector getDocumentLinks(ParsedAST &AST) { - const auto &SM = AST.getSourceManager(); - auto MainFilePath = - getCanonicalPath(SM.getFileEntryForID(SM.getMainFileID()), SM); - if (!MainFilePath) { - elog("Failed to get a path for the main file, so no links"); - return {}; - } - - std::vector Result; - for (auto &Inc : AST.getIncludeStructure().MainFileIncludes) { - if (!Inc.Resolved.empty()) { - Result.push_back(DocumentLink( - {Inc.R, URIForFile::canonicalize(Inc.Resolved, *MainFilePath)})); - } - } - - return Result; -} - -std::vector locateSymbolAt(ParsedAST &AST, Position Pos, - const SymbolIndex *Index) { - const auto &SM = AST.getSourceManager(); - auto MainFilePath = - getCanonicalPath(SM.getFileEntryForID(SM.getMainFileID()), SM); - if (!MainFilePath) { - elog("Failed to get a path for the main file, so no references"); - return {}; - } - - // Treat #included files as symbols, to enable go-to-definition on them. +// Treat #included files as symbols, to enable go-to-definition on them. +static llvm::Optional +locateFileReferent(const Position &Pos, ParsedAST &AST, + llvm::StringRef MainFilePath) { for (auto &Inc : AST.getIncludeStructure().MainFileIncludes) { if (!Inc.Resolved.empty() && Inc.R.start.line == Pos.line) { LocatedSymbol File; File.Name = std::string(llvm::sys::path::filename(Inc.Resolved)); File.PreferredDeclaration = { - URIForFile::canonicalize(Inc.Resolved, *MainFilePath), Range{}}; + URIForFile::canonicalize(Inc.Resolved, MainFilePath), Range{}}; File.Definition = File.PreferredDeclaration; // We're not going to find any further symbols on #include lines. - return {std::move(File)}; + return File; } } + return llvm::None; +} - auto CurLoc = sourceLocationInMainFile(SM, Pos); - if (!CurLoc) { - elog("locateSymbolAt failed to convert position to source location: {0}", - CurLoc.takeError()); - return {}; - } - - // Macros are simple: there's no declaration/definition distinction. - // As a consequence, there's no need to look them up in the index either. - std::vector Result; - const auto *TouchedIdentifier = - syntax::spelledIdentifierTouching(*CurLoc, AST.getTokens()); - if (TouchedIdentifier) { - if (auto M = locateMacroAt(*TouchedIdentifier, AST.getPreprocessor())) { - if (auto Loc = makeLocation(AST.getASTContext(), - M->Info->getDefinitionLoc(), *MainFilePath)) { - LocatedSymbol Macro; - Macro.Name = std::string(M->Name); - Macro.PreferredDeclaration = *Loc; - Macro.Definition = Loc; - Result.push_back(std::move(Macro)); - - // Don't look at the AST or index if we have a macro result. - // (We'd just return declarations referenced from the macro's - // expansion.) - return Result; - } +// Macros are simple: there's no declaration/definition distinction. +// As a consequence, there's no need to look them up in the index either. +static llvm::Optional +locateMacroReferent(const syntax::Token &TouchedIdentifier, ParsedAST &AST, + llvm::StringRef MainFilePath) { + if (auto M = locateMacroAt(TouchedIdentifier, AST.getPreprocessor())) { + if (auto Loc = makeLocation(AST.getASTContext(), + M->Info->getDefinitionLoc(), MainFilePath)) { + LocatedSymbol Macro; + Macro.Name = std::string(M->Name); + Macro.PreferredDeclaration = *Loc; + Macro.Definition = Loc; + return Macro; } } + return llvm::None; +} - // Decls are more complicated. - // The AST contains at least a declaration, maybe a definition. - // These are up-to-date, and so generally preferred over index results. - // We perform a single batch index lookup to find additional definitions. - +// Decls are more complicated. +// The AST contains at least a declaration, maybe a definition. +// These are up-to-date, and so generally preferred over index results. +// We perform a single batch index lookup to find additional definitions. +static std::vector +locateASTReferent(SourceLocation CurLoc, const syntax::Token *TouchedIdentifier, + ParsedAST &AST, llvm::StringRef MainFilePath, + const SymbolIndex *Index) { + const SourceManager &SM = AST.getSourceManager(); // Results follow the order of Symbols.Decls. + std::vector Result; // Keep track of SymbolID -> index mapping, to fill in index data later. llvm::DenseMap ResultIndex; @@ -259,7 +230,7 @@ std::vector locateSymbolAt(ParsedAST &AST, Position Pos, const NamedDecl *Preferred = Def ? Def : D; auto Loc = makeLocation(AST.getASTContext(), nameLocation(*Preferred, SM), - *MainFilePath); + MainFilePath); if (!Loc) return; @@ -278,7 +249,7 @@ std::vector locateSymbolAt(ParsedAST &AST, Position Pos, // Emit all symbol locations (declaration or definition) from AST. DeclRelationSet Relations = DeclRelation::TemplatePattern | DeclRelation::Alias; - for (const NamedDecl *D : getDeclAtPosition(AST, *CurLoc, Relations)) { + for (const NamedDecl *D : getDeclAtPosition(AST, CurLoc, Relations)) { // Special case: void foo() ^override: jump to the overridden method. if (const auto *CMD = llvm::dyn_cast(D)) { const InheritableAttr *Attr = D->getAttr(); @@ -320,23 +291,23 @@ std::vector locateSymbolAt(ParsedAST &AST, Position Pos, if (R.Definition) { // from AST // Special case: if the AST yielded a definition, then it may not be // the right *declaration*. Prefer the one from the index. - if (auto Loc = toLSPLocation(Sym.CanonicalDeclaration, *MainFilePath)) + if (auto Loc = toLSPLocation(Sym.CanonicalDeclaration, MainFilePath)) R.PreferredDeclaration = *Loc; // We might still prefer the definition from the index, e.g. for // generated symbols. if (auto Loc = toLSPLocation( getPreferredLocation(*R.Definition, Sym.Definition, Scratch), - *MainFilePath)) + MainFilePath)) R.Definition = *Loc; } else { - R.Definition = toLSPLocation(Sym.Definition, *MainFilePath); + R.Definition = toLSPLocation(Sym.Definition, MainFilePath); // Use merge logic to choose AST or index declaration. if (auto Loc = toLSPLocation( getPreferredLocation(R.PreferredDeclaration, Sym.CanonicalDeclaration, Scratch), - *MainFilePath)) + MainFilePath)) R.PreferredDeclaration = *Loc; } }); @@ -345,17 +316,75 @@ std::vector locateSymbolAt(ParsedAST &AST, Position Pos, return Result; } +std::vector locateSymbolAt(ParsedAST &AST, Position Pos, + const SymbolIndex *Index) { + const auto &SM = AST.getSourceManager(); + auto MainFilePath = + getCanonicalPath(SM.getFileEntryForID(SM.getMainFileID()), SM); + if (!MainFilePath) { + elog("Failed to get a path for the main file, so no references"); + return {}; + } + + if (auto File = locateFileReferent(Pos, AST, *MainFilePath)) + return {std::move(*File)}; + + auto CurLoc = sourceLocationInMainFile(SM, Pos); + if (!CurLoc) { + elog("locateSymbolAt failed to convert position to source location: {0}", + CurLoc.takeError()); + return {}; + } + + const syntax::Token *TouchedIdentifier = + syntax::spelledIdentifierTouching(*CurLoc, AST.getTokens()); + if (TouchedIdentifier) + if (auto Macro = + locateMacroReferent(*TouchedIdentifier, AST, *MainFilePath)) + // Don't look at the AST or index if we have a macro result. + // (We'd just return declarations referenced from the macro's + // expansion.) + return {*std::move(Macro)}; + + return locateASTReferent(*CurLoc, TouchedIdentifier, AST, *MainFilePath, + Index); +} + +std::vector getDocumentLinks(ParsedAST &AST) { + const auto &SM = AST.getSourceManager(); + auto MainFilePath = + getCanonicalPath(SM.getFileEntryForID(SM.getMainFileID()), SM); + if (!MainFilePath) { + elog("Failed to get a path for the main file, so no links"); + return {}; + } + + std::vector Result; + for (auto &Inc : AST.getIncludeStructure().MainFileIncludes) { + if (!Inc.Resolved.empty()) { + Result.push_back(DocumentLink( + {Inc.R, URIForFile::canonicalize(Inc.Resolved, *MainFilePath)})); + } + } + + return Result; +} + namespace { /// Collects references to symbols within the main file. class ReferenceFinder : public index::IndexDataConsumer { public: struct Reference { - SourceLocation Loc; + syntax::Token SpelledTok; index::SymbolRoleSet Role; + + Range range(const SourceManager &SM) const { + return halfOpenToRange(SM, SpelledTok.range(SM).toCharRange(SM)); + } }; - ReferenceFinder(ASTContext &AST, Preprocessor &PP, + ReferenceFinder(const ParsedAST &AST, const std::vector &TargetDecls) : AST(AST) { for (const NamedDecl *D : TargetDecls) @@ -364,13 +393,17 @@ class ReferenceFinder : public index::IndexDataConsumer { std::vector take() && { llvm::sort(References, [](const Reference &L, const Reference &R) { - return std::tie(L.Loc, L.Role) < std::tie(R.Loc, R.Role); + auto LTok = L.SpelledTok.location(); + auto RTok = R.SpelledTok.location(); + return std::tie(LTok, L.Role) < std::tie(RTok, R.Role); }); // We sometimes see duplicates when parts of the AST get traversed twice. References.erase(std::unique(References.begin(), References.end(), [](const Reference &L, const Reference &R) { - return std::tie(L.Loc, L.Role) == - std::tie(R.Loc, R.Role); + auto LTok = L.SpelledTok.location(); + auto RTok = R.SpelledTok.location(); + return std::tie(LTok, L.Role) == + std::tie(RTok, R.Role); }), References.end()); return std::move(References); @@ -382,22 +415,27 @@ class ReferenceFinder : public index::IndexDataConsumer { SourceLocation Loc, index::IndexDataConsumer::ASTNodeInfo ASTNode) override { assert(D->isCanonicalDecl() && "expect D to be a canonical declaration"); + if (!CanonicalTargets.count(D)) + return true; + const auto &TB = AST.getTokens(); const SourceManager &SM = AST.getSourceManager(); Loc = SM.getFileLoc(Loc); - if (isInsideMainFile(Loc, SM) && CanonicalTargets.count(D)) - References.push_back({Loc, Roles}); + // We are only traversing decls *inside* the main file, so this should hold. + assert(isInsideMainFile(Loc, SM)); + if (const auto *Tok = TB.spelledTokenAt(Loc)) + References.push_back({*Tok, Roles}); return true; } private: llvm::SmallSet CanonicalTargets; std::vector References; - const ASTContext &AST; + const ParsedAST &AST; }; std::vector findRefs(const std::vector &Decls, ParsedAST &AST) { - ReferenceFinder RefFinder(AST.getASTContext(), AST.getPreprocessor(), Decls); + ReferenceFinder RefFinder(AST, Decls); index::IndexingOptions IndexOpts; IndexOpts.SystemSymbolFilter = index::IndexingOptions::SystemSymbolFilterKind::All; @@ -428,18 +466,15 @@ std::vector findDocumentHighlights(ParsedAST &AST, // different kinds, deduplicate them. std::vector Result; for (const auto &Ref : References) { - if (auto Range = - getTokenRange(AST.getSourceManager(), AST.getLangOpts(), Ref.Loc)) { - DocumentHighlight DH; - DH.range = *Range; - if (Ref.Role & index::SymbolRoleSet(index::SymbolRole::Write)) - DH.kind = DocumentHighlightKind::Write; - else if (Ref.Role & index::SymbolRoleSet(index::SymbolRole::Read)) - DH.kind = DocumentHighlightKind::Read; - else - DH.kind = DocumentHighlightKind::Text; - Result.push_back(std::move(DH)); - } + DocumentHighlight DH; + DH.range = Ref.range(SM); + if (Ref.Role & index::SymbolRoleSet(index::SymbolRole::Write)) + DH.kind = DocumentHighlightKind::Write; + else if (Ref.Role & index::SymbolRoleSet(index::SymbolRole::Read)) + DH.kind = DocumentHighlightKind::Read; + else + DH.kind = DocumentHighlightKind::Text; + Result.push_back(std::move(DH)); } return Result; } @@ -502,16 +537,15 @@ ReferencesResult findReferences(ParsedAST &AST, Position Pos, uint32_t Limit, MainFileRefs.erase(std::unique(MainFileRefs.begin(), MainFileRefs.end(), [](const ReferenceFinder::Reference &L, const ReferenceFinder::Reference &R) { - return L.Loc == R.Loc; + return L.SpelledTok.location() == + R.SpelledTok.location(); }), MainFileRefs.end()); for (const auto &Ref : MainFileRefs) { - if (auto Range = getTokenRange(SM, AST.getLangOpts(), Ref.Loc)) { - Location Result; - Result.range = *Range; - Result.uri = URIMainFile; - Results.References.push_back(std::move(Result)); - } + Location Result; + Result.range = Ref.range(SM); + Result.uri = URIMainFile; + Results.References.push_back(std::move(Result)); } if (Index && Results.References.size() <= Limit) { for (const Decl *D : Decls) { diff --git a/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp b/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp index bdfd0e4743d64f..398b6f29dba85d 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp @@ -16,6 +16,7 @@ #include "SourceCode.h" #include "refactor/Tweak.h" #include "clang/AST/ASTTypeTraits.h" +#include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclCXX.h" @@ -156,7 +157,7 @@ getFunctionSourceCode(const FunctionDecl *FD, llvm::StringRef TargetNamespace, "define outline: couldn't find a context for target"); llvm::Error Errors = llvm::Error::success(); - tooling::Replacements QualifierInsertions; + tooling::Replacements DeclarationCleanups; // Finds the first unqualified name in function return type and name, then // qualifies those to be valid in TargetContext. @@ -181,7 +182,7 @@ getFunctionSourceCode(const FunctionDecl *FD, llvm::StringRef TargetNamespace, const NamedDecl *ND = Ref.Targets.front(); const std::string Qualifier = getQualification( AST, *TargetContext, SM.getLocForStartOfFile(SM.getMainFileID()), ND); - if (auto Err = QualifierInsertions.add( + if (auto Err = DeclarationCleanups.add( tooling::Replacement(SM, Ref.NameLoc, 0, Qualifier))) Errors = llvm::joinErrors(std::move(Errors), std::move(Err)); }); @@ -206,14 +207,72 @@ getFunctionSourceCode(const FunctionDecl *FD, llvm::StringRef TargetNamespace, assert(Tok != Tokens.rend()); DelRange.setBegin(Tok->location()); if (auto Err = - QualifierInsertions.add(tooling::Replacement(SM, DelRange, ""))) + DeclarationCleanups.add(tooling::Replacement(SM, DelRange, ""))) Errors = llvm::joinErrors(std::move(Errors), std::move(Err)); } } + auto DelAttr = [&](const Attr *A) { + if (!A) + return; + auto AttrTokens = + TokBuf.spelledForExpanded(TokBuf.expandedTokens(A->getRange())); + assert(A->getLocation().isValid()); + if (!AttrTokens || AttrTokens->empty()) { + Errors = llvm::joinErrors( + std::move(Errors), + llvm::createStringError( + llvm::inconvertibleErrorCode(), + llvm::StringRef("define outline: Can't move out of line as " + "function has a macro `") + + A->getSpelling() + "` specifier.")); + return; + } + CharSourceRange DelRange = + syntax::Token::range(SM, AttrTokens->front(), AttrTokens->back()) + .toCharRange(SM); + if (auto Err = + DeclarationCleanups.add(tooling::Replacement(SM, DelRange, ""))) + Errors = llvm::joinErrors(std::move(Errors), std::move(Err)); + }; + + DelAttr(FD->getAttr()); + DelAttr(FD->getAttr()); + + if (FD->isVirtualAsWritten()) { + SourceRange SpecRange{FD->getBeginLoc(), FD->getLocation()}; + bool HasErrors = true; + + // Clang allows duplicating virtual specifiers so check for multiple + // occurances. + for (const auto &Tok : TokBuf.expandedTokens(SpecRange)) { + if (Tok.kind() != tok::kw_virtual) + continue; + auto Spelling = TokBuf.spelledForExpanded(llvm::makeArrayRef(Tok)); + if (!Spelling) { + HasErrors = true; + break; + } + HasErrors = false; + CharSourceRange DelRange = + syntax::Token::range(SM, Spelling->front(), Spelling->back()) + .toCharRange(SM); + if (auto Err = + DeclarationCleanups.add(tooling::Replacement(SM, DelRange, ""))) + Errors = llvm::joinErrors(std::move(Errors), std::move(Err)); + } + if (HasErrors) { + Errors = llvm::joinErrors( + std::move(Errors), + llvm::createStringError(llvm::inconvertibleErrorCode(), + "define outline: Can't move out of line as " + "function has a macro `virtual` specifier.")); + } + } + if (Errors) return std::move(Errors); - return getFunctionSourceAfterReplacements(FD, QualifierInsertions); + return getFunctionSourceAfterReplacements(FD, DeclarationCleanups); } struct InsertionPoint { diff --git a/clang-tools-extra/clangd/test/initialize-params.test b/clang-tools-extra/clangd/test/initialize-params.test index 68e3ebc24a2145..2b5c02fc8ce287 100644 --- a/clang-tools-extra/clangd/test/initialize-params.test +++ b/clang-tools-extra/clangd/test/initialize-params.test @@ -47,6 +47,10 @@ # CHECK-NEXT: "textDocumentSync": 2, # CHECK-NEXT: "typeHierarchyProvider": true # CHECK-NEXT: "workspaceSymbolProvider": true +# CHECK-NEXT: }, +# CHECK-NEXT: "serverInfo": { +# CHECK-NEXT: "name": "clangd", +# CHECK-NEXT: "version": "{{.*}}clangd version {{.*}}" # CHECK-NEXT: } # CHECK-NEXT: } --- diff --git a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp index 2140997679904b..9e5952fe2cb53b 100644 --- a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp +++ b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp @@ -138,9 +138,7 @@ TEST_F(TUSchedulerTests, MissingFiles) { auto Missing = testPath("missing.cpp"); Files[Missing] = ""; - EXPECT_EQ(S.getContents(Added), ""); S.update(Added, getInputs(Added, "x"), WantDiagnostics::No); - EXPECT_EQ(S.getContents(Added), "x"); // Assert each operation for missing file is an error (even if it's // available in VFS). @@ -159,9 +157,7 @@ TEST_F(TUSchedulerTests, MissingFiles) { [&](Expected Preamble) { EXPECT_TRUE(bool(Preamble)); }); - EXPECT_EQ(S.getContents(Added), "x"); S.remove(Added); - EXPECT_EQ(S.getContents(Added), ""); // Assert that all operations fail after removing the file. S.runWithAST("", Added, diff --git a/clang-tools-extra/clangd/unittests/TweakTests.cpp b/clang-tools-extra/clangd/unittests/TweakTests.cpp index 24210aaa101d42..cae922ffcb9558 100644 --- a/clang-tools-extra/clangd/unittests/TweakTests.cpp +++ b/clang-tools-extra/clangd/unittests/TweakTests.cpp @@ -2068,6 +2068,80 @@ TEST_F(DefineOutlineTest, ApplyTest) { };)cpp", "Foo::Foo(int z) __attribute__((weak)) : bar(2){}\n", }, + // Virt specifiers. + { + R"cpp( + struct A { + virtual void f^oo() {} + };)cpp", + R"cpp( + struct A { + virtual void foo() ; + };)cpp", + " void A::foo() {}\n", + }, + { + R"cpp( + struct A { + virtual virtual void virtual f^oo() {} + };)cpp", + R"cpp( + struct A { + virtual virtual void virtual foo() ; + };)cpp", + " void A::foo() {}\n", + }, + { + R"cpp( + struct A { + virtual void foo() = 0; + }; + struct B : A { + void fo^o() override {} + };)cpp", + R"cpp( + struct A { + virtual void foo() = 0; + }; + struct B : A { + void foo() override ; + };)cpp", + "void B::foo() {}\n", + }, + { + R"cpp( + struct A { + virtual void foo() = 0; + }; + struct B : A { + void fo^o() final {} + };)cpp", + R"cpp( + struct A { + virtual void foo() = 0; + }; + struct B : A { + void foo() final ; + };)cpp", + "void B::foo() {}\n", + }, + { + R"cpp( + struct A { + virtual void foo() = 0; + }; + struct B : A { + void fo^o() final override {} + };)cpp", + R"cpp( + struct A { + virtual void foo() = 0; + }; + struct B : A { + void foo() final override ; + };)cpp", + "void B::foo() {}\n", + }, }; for (const auto &Case : Cases) { SCOPED_TRACE(Case.Test); @@ -2081,6 +2155,8 @@ TEST_F(DefineOutlineTest, HandleMacros) { llvm::StringMap EditedFiles; ExtraFiles["Test.cpp"] = ""; FileName = "Test.hpp"; + ExtraArgs.push_back("-DVIRTUAL=virtual"); + ExtraArgs.push_back("-DOVER=override"); struct { llvm::StringRef Test; @@ -2118,6 +2194,48 @@ TEST_F(DefineOutlineTest, HandleMacros) { #define TARGET foo void TARGET();)cpp", "void TARGET(){ return; }"}, + {R"cpp(#define VIRT virtual + struct A { + VIRT void f^oo() {} + };)cpp", + R"cpp(#define VIRT virtual + struct A { + VIRT void foo() ; + };)cpp", + " void A::foo() {}\n"}, + {R"cpp( + struct A { + VIRTUAL void f^oo() {} + };)cpp", + R"cpp( + struct A { + VIRTUAL void foo() ; + };)cpp", + " void A::foo() {}\n"}, + {R"cpp( + struct A { + virtual void foo() = 0; + }; + struct B : A { + void fo^o() OVER {} + };)cpp", + R"cpp( + struct A { + virtual void foo() = 0; + }; + struct B : A { + void foo() OVER ; + };)cpp", + "void B::foo() {}\n"}, + {R"cpp(#define STUPID_MACRO(X) virtual + struct A { + STUPID_MACRO(sizeof sizeof int) void f^oo() {} + };)cpp", + R"cpp(#define STUPID_MACRO(X) virtual + struct A { + STUPID_MACRO(sizeof sizeof int) void foo() ; + };)cpp", + " void A::foo() {}\n"}, }; for (const auto &Case : Cases) { SCOPED_TRACE(Case.Test); @@ -2229,6 +2347,49 @@ TEST_F(DefineOutlineTest, QualifyFunctionName) { << Case.TestHeader; } } + +TEST_F(DefineOutlineTest, FailsMacroSpecifier) { + FileName = "Test.hpp"; + ExtraFiles["Test.cpp"] = ""; + ExtraArgs.push_back("-DFINALOVER=final override"); + + std::pair Cases[] = { + { + R"cpp( + #define VIRT virtual void + struct A { + VIRT fo^o() {} + };)cpp", + "fail: define outline: Can't move out of line as function has a " + "macro `virtual` specifier."}, + { + R"cpp( + #define OVERFINAL final override + struct A { + virtual void foo() {} + }; + struct B : A { + void fo^o() OVERFINAL {} + };)cpp", + "fail: define outline: Can't move out of line as function has a " + "macro `override` specifier.\ndefine outline: Can't move out of line " + "as function has a macro `final` specifier."}, + { + R"cpp( + struct A { + virtual void foo() {} + }; + struct B : A { + void fo^o() FINALOVER {} + };)cpp", + "fail: define outline: Can't move out of line as function has a " + "macro `override` specifier.\ndefine outline: Can't move out of line " + "as function has a macro `final` specifier."}, + }; + for (const auto &Case : Cases) { + EXPECT_EQ(apply(Case.first), Case.second); + } +} } // namespace } // namespace clangd } // namespace clang diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 831ef32c6b8a4c..ce121ebe6055bb 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -101,12 +101,10 @@ Windows Support C Language Changes in Clang --------------------------- -- ... - -C11 Feature Support -^^^^^^^^^^^^^^^^^^^ +- The default C language standard used when `-std=` is not specified has been + upgraded from gnu11 to gnu17. -... +- ... C++ Language Changes in Clang ----------------------------- diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 856d5e34bbcc26..f50f8888f477ed 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -80,7 +80,7 @@ Basic Usage Intro to how to use a C compiler for newbies. compile + link compile then link debug info enabling optimizations -picking a language to use, defaults to C11 by default. Autosenses based +picking a language to use, defaults to C17 by default. Autosenses based on extension. using a makefile Command Line Options @@ -2399,10 +2399,10 @@ See :doc:`LanguageExtensions`. Differences between various standard modes ------------------------------------------ -clang supports the -std option, which changes what language mode clang -uses. The supported modes for C are c89, gnu89, c99, gnu99, c11, gnu11, -c17, gnu17, and various aliases for those modes. If no -std option is -specified, clang defaults to gnu11 mode. Many C99 and C11 features are +clang supports the -std option, which changes what language mode clang uses. +The supported modes for C are c89, gnu89, c99, gnu99, c11, gnu11, c17, gnu17, +c2x, gnu2x, and various aliases for those modes. If no -std option is +specified, clang defaults to gnu17 mode. Many C99 and C11 features are supported in earlier modes as a conforming extension, with a warning. Use ``-pedantic-errors`` to request an error if a feature from a later standard revision is used in an earlier mode. diff --git a/clang/docs/analyzer/developer-docs/DebugChecks.rst b/clang/docs/analyzer/developer-docs/DebugChecks.rst index 3f9bed78604f0d..05b3e2480d3b7e 100644 --- a/clang/docs/analyzer/developer-docs/DebugChecks.rst +++ b/clang/docs/analyzer/developer-docs/DebugChecks.rst @@ -275,6 +275,28 @@ ExprInspection checks See clang_analyzer_denote(). +- ``void clang_analyzer_isTainted(a single argument of any type);`` + + Queries the analyzer whether the expression used as argument is tainted or not. + This is useful in tests, where we don't want to issue warning for all tainted + expressions but only check for certain expressions. + This would help to reduce the *noise* that the `TaintTest` debug checker would + introduce and let you focus on the `expected-warning`s that you really care + about. + + Example usage:: + + int read_integer() { + int n; + clang_analyzer_isTainted(n); // expected-warning{{NO}} + scanf("%d", &n); + clang_analyzer_isTainted(n); // expected-warning{{YES}} + clang_analyzer_isTainted(n + 2); // expected-warning{{YES}} + clang_analyzer_isTainted(n > 0); // expected-warning{{YES}} + int next_tainted_value = n; // no-warning + return n; + } + Statistics ========== diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index efb96f3cc5b6bf..9d4930a3887a74 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2574,7 +2574,11 @@ enum CXCursorKind { */ CXCursor_OMPParallelMasterDirective = 285, - CXCursor_LastStmt = CXCursor_OMPParallelMasterDirective, + /** OpenMP depobj directive. + */ + CXCursor_OMPDepobjDirective = 286, + + CXCursor_LastStmt = CXCursor_OMPDepobjDirective, /** * Cursor that represents the translation unit itself. diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 453c068bbeb074..fa727837a80298 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -4108,6 +4108,92 @@ class OMPFlushClause final } }; +/// This represents implicit clause 'depobj' for the '#pragma omp depobj' +/// directive. +/// This clause does not exist by itself, it can be only as a part of 'omp +/// depobj' directive. This clause is introduced to keep the original structure +/// of \a OMPExecutableDirective class and its derivatives and to use the +/// existing infrastructure of clauses with the list of variables. +/// +/// \code +/// #pragma omp depobj(a) destroy +/// \endcode +/// In this example directive '#pragma omp depobj' has implicit clause 'depobj' +/// with the depobj 'a'. +class OMPDepobjClause final : public OMPClause { + friend class OMPClauseReader; + + /// Location of '('. + SourceLocation LParenLoc; + + /// Chunk size. + Expr *Depobj = nullptr; + + /// Build clause with number of variables \a N. + /// + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param EndLoc Ending location of the clause. + OMPDepobjClause(SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc) + : OMPClause(OMPC_depobj, StartLoc, EndLoc), LParenLoc(LParenLoc) {} + + /// Build an empty clause. + /// + explicit OMPDepobjClause() + : OMPClause(OMPC_depobj, SourceLocation(), SourceLocation()) {} + + void setDepobj(Expr *E) { Depobj = E; } + + /// Sets the location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + +public: + /// Creates clause. + /// + /// \param C AST context. + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param EndLoc Ending location of the clause. + /// \param Depobj depobj expression associated with the 'depobj' directive. + static OMPDepobjClause *Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc, Expr *Depobj); + + /// Creates an empty clause. + /// + /// \param C AST context. + static OMPDepobjClause *CreateEmpty(const ASTContext &C); + + /// Returns depobj expression associated with the clause. + Expr *getDepobj() { return Depobj; } + const Expr *getDepobj() const { return Depobj; } + + /// Returns the location of '('. + SourceLocation getLParenLoc() const { return LParenLoc; } + + child_range children() { + return child_range(reinterpret_cast(&Depobj), + reinterpret_cast(&Depobj) + 1); + } + + const_child_range children() const { + auto Children = const_cast(this)->children(); + return const_child_range(Children.begin(), Children.end()); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == OMPC_depobj; + } +}; + /// This represents implicit clause 'depend' for the '#pragma omp task' /// directive. /// @@ -6607,6 +6693,46 @@ class OMPOrderClause final : public OMPClause { } }; +/// This represents 'destroy' clause in the '#pragma omp depobj' +/// directive. +/// +/// \code +/// #pragma omp depobj(a) destroy +/// \endcode +/// In this example directive '#pragma omp depobj' has 'destroy' clause. +class OMPDestroyClause final : public OMPClause { +public: + /// Build 'destroy' clause. + /// + /// \param StartLoc Starting location of the clause. + /// \param EndLoc Ending location of the clause. + OMPDestroyClause(SourceLocation StartLoc, SourceLocation EndLoc) + : OMPClause(OMPC_destroy, StartLoc, EndLoc) {} + + /// Build an empty clause. + OMPDestroyClause() + : OMPClause(OMPC_destroy, SourceLocation(), SourceLocation()) {} + + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + + const_child_range children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == OMPC_destroy; + } +}; + /// This class implements a simple visitor for OMPClause /// subclasses. template class Ptr, typename RetTy> diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 29b2c354100209..3dc9af4b804209 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -2842,6 +2842,9 @@ DEF_TRAVERSE_STMT(OMPCancelDirective, DEF_TRAVERSE_STMT(OMPFlushDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPDepobjDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPOrderedDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) @@ -3156,6 +3159,11 @@ bool RecursiveASTVisitor::VisitOMPNogroupClause(OMPNogroupClause *) { return true; } +template +bool RecursiveASTVisitor::VisitOMPDestroyClause(OMPDestroyClause *) { + return true; +} + template template bool RecursiveASTVisitor::VisitOMPClauseList(T *Node) { @@ -3347,6 +3355,12 @@ bool RecursiveASTVisitor::VisitOMPFlushClause(OMPFlushClause *C) { return true; } +template +bool RecursiveASTVisitor::VisitOMPDepobjClause(OMPDepobjClause *C) { + TRY_TO(TraverseStmt(C->getDepobj())); + return true; +} + template bool RecursiveASTVisitor::VisitOMPDependClause(OMPDependClause *C) { TRY_TO(VisitOMPClauseList(C)); diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 55649079bd2b17..5f7589acdb9e34 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -2314,6 +2314,64 @@ class OMPFlushDirective : public OMPExecutableDirective { } }; +/// This represents '#pragma omp depobj' directive. +/// +/// \code +/// #pragma omp depobj(a) depend(in:x,y) +/// \endcode +/// In this example directive '#pragma omp depobj' initializes a depobj object +/// 'a' with dependence type 'in' and a list with 'x' and 'y' locators. +class OMPDepobjDirective final : public OMPExecutableDirective { + friend class ASTStmtReader; + + /// Build directive with the given start and end location. + /// + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending location of the directive. + /// \param NumClauses Number of clauses. + /// + OMPDepobjDirective(SourceLocation StartLoc, SourceLocation EndLoc, + unsigned NumClauses) + : OMPExecutableDirective(this, OMPDepobjDirectiveClass, + llvm::omp::OMPD_depobj, StartLoc, EndLoc, + NumClauses, 0) {} + + /// Build an empty directive. + /// + /// \param NumClauses Number of clauses. + /// + explicit OMPDepobjDirective(unsigned NumClauses) + : OMPExecutableDirective(this, OMPDepobjDirectiveClass, + llvm::omp::OMPD_depobj, SourceLocation(), + SourceLocation(), NumClauses, 0) {} + +public: + /// Creates directive with a list of \a Clauses. + /// + /// \param C AST context. + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending Location of the directive. + /// \param Clauses List of clauses. + /// + static OMPDepobjDirective *Create(const ASTContext &C, + SourceLocation StartLoc, + SourceLocation EndLoc, + ArrayRef Clauses); + + /// Creates an empty directive with the place for \a NumClauses + /// clauses. + /// + /// \param C AST context. + /// \param NumClauses Number of clauses. + /// + static OMPDepobjDirective *CreateEmpty(const ASTContext &C, + unsigned NumClauses, EmptyShell); + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPDepobjDirectiveClass; + } +}; + /// This represents '#pragma omp ordered' directive. /// /// \code diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 2d6978f3f41343..e6155d5d0e1018 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10031,8 +10031,14 @@ def note_omp_invalid_subscript_on_this_ptr_map : Note < "expected 'this' subscript expression on map clause to be 'this[0]'">; def err_omp_invalid_map_this_expr : Error < "invalid 'this' expression on 'map' clause">; -def err_implied_omp_allocator_handle_t_not_found : Error< - "omp_allocator_handle_t type not found; include ">; +def err_omp_implied_type_not_found : Error< + "'%0' type not found; include ">; +def err_omp_expected_omp_depend_t_lvalue : Error< + "expected lvalue expression%select{ of 'omp_depend_t' type, not %1|}0">; +def err_omp_depobj_expected : Error< + "expected depobj expression">; +def err_omp_depobj_single_clause_expected : Error< + "exactly one of 'depend', 'destroy', or 'update' clauses is expected">; def err_omp_expected_predefined_allocator : Error< "expected one of the predefined allocators for the variables with the static " "storage: 'omp_default_mem_alloc', 'omp_large_cap_mem_alloc', " diff --git a/clang/include/clang/Basic/OpenMPKinds.def b/clang/include/clang/Basic/OpenMPKinds.def index f2913fe8e9bbdf..388204c3c1934e 100644 --- a/clang/include/clang/Basic/OpenMPKinds.def +++ b/clang/include/clang/Basic/OpenMPKinds.def @@ -209,6 +209,9 @@ #ifndef OPENMP_FLUSH_CLAUSE #define OPENMP_FLUSH_CLAUSE(Name) #endif +#ifndef OPENMP_DEPOBJ_CLAUSE +#define OPENMP_DEPOBJ_CLAUSE(Name) +#endif // OpenMP clauses. OPENMP_CLAUSE(allocator, OMPAllocatorClause) @@ -272,6 +275,8 @@ OPENMP_CLAUSE(atomic_default_mem_order, OMPAtomicDefaultMemOrderClause) OPENMP_CLAUSE(allocate, OMPAllocateClause) OPENMP_CLAUSE(nontemporal, OMPNontemporalClause) OPENMP_CLAUSE(order, OMPOrderClause) +OPENMP_CLAUSE(depobj, OMPDepobjClause) +OPENMP_CLAUSE(destroy, OMPDestroyClause) // Clauses allowed for OpenMP directive 'parallel'. OPENMP_PARALLEL_CLAUSE(if) @@ -1078,6 +1083,11 @@ OPENMP_FLUSH_CLAUSE(acq_rel) OPENMP_FLUSH_CLAUSE(acquire) OPENMP_FLUSH_CLAUSE(release) +// Clauses allowed for OpenMP directive 'depobj'. +OPENMP_DEPOBJ_CLAUSE(depend) +OPENMP_DEPOBJ_CLAUSE(destroy) + +#undef OPENMP_DEPOBJ_CLAUSE #undef OPENMP_FLUSH_CLAUSE #undef OPENMP_ORDER_KIND #undef OPENMP_LASTPRIVATE_KIND diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index 2333202968198e..41c6dbdb42e958 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -232,6 +232,7 @@ def OMPBarrierDirective : StmtNode; def OMPTaskwaitDirective : StmtNode; def OMPTaskgroupDirective : StmtNode; def OMPFlushDirective : StmtNode; +def OMPDepobjDirective : StmtNode; def OMPOrderedDirective : StmtNode; def OMPAtomicDirective : StmtNode; def OMPTargetDirective : StmtNode; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index f1dfe411983a6e..9a3fc9585c98e7 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -9994,6 +9994,10 @@ class Sema final { StmtResult ActOnOpenMPFlushDirective(ArrayRef Clauses, SourceLocation StartLoc, SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp depobj'. + StmtResult ActOnOpenMPDepobjDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed '\#pragma omp ordered' after parsing of the /// associated statement. StmtResult ActOnOpenMPOrderedDirective(ArrayRef Clauses, @@ -10340,6 +10344,9 @@ class Sema final { /// Called on well-formed 'relaxed' clause. OMPClause *ActOnOpenMPRelaxedClause(SourceLocation StartLoc, SourceLocation EndLoc); + /// Called on well-formed 'destroy' clause. + OMPClause *ActOnOpenMPDestroyClause(SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed 'threads' clause. OMPClause *ActOnOpenMPThreadsClause(SourceLocation StartLoc, SourceLocation EndLoc); @@ -10452,6 +10459,10 @@ class Sema final { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + /// Called on well-formed 'depobj' pseudo clause. + OMPClause *ActOnOpenMPDepobjClause(Expr *Depobj, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); /// Called on well-formed 'depend' clause. OMPClause * ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind, SourceLocation DepLoc, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 83af4d15e27b59..0767fb8e22c18a 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1825,6 +1825,7 @@ namespace serialization { STMT_OMP_BARRIER_DIRECTIVE, STMT_OMP_TASKWAIT_DIRECTIVE, STMT_OMP_FLUSH_DIRECTIVE, + STMT_OMP_DEPOBJ_DIRECTIVE, STMT_OMP_ORDERED_DIRECTIVE, STMT_OMP_ATOMIC_DIRECTIVE, STMT_OMP_TARGET_DIRECTIVE, diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h index 19d120ebbc9f81..2ee84007481082 100644 --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -245,6 +245,10 @@ class TokenBuffer { /// "DECL", "(", "a", ")", ";"} llvm::ArrayRef spelledTokens(FileID FID) const; + /// Returns the spelled Token starting at Loc, if there are no such tokens + /// returns nullptr. + const syntax::Token *spelledTokenAt(SourceLocation Loc) const; + /// Get all tokens that expand a macro in \p FID. For the following input /// #define FOO B /// #define FOO2(X) int X diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index bc4f2b491e11cc..93a8aab7c06855 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -474,10 +474,20 @@ void ASTContext::attachCommentsToJustParsedDecls(ArrayRef Decls, if (Comments.empty() || Decls.empty()) return; - // See if there are any new comments that are not attached to a decl. - // The location doesn't have to be precise - we care only about the file. - const FileID File = - SourceMgr.getDecomposedLoc((*Decls.begin())->getLocation()).first; + FileID File; + for (Decl *D : Decls) { + SourceLocation Loc = D->getLocation(); + if (Loc.isValid()) { + // See if there are any new comments that are not attached to a decl. + // The location doesn't have to be precise - we care only about the file. + File = SourceMgr.getDecomposedLoc(Loc).first; + break; + } + } + + if (File.isInvalid()) + return; + auto CommentsInThisFile = Comments.getCommentsInFile(File); if (!CommentsInThisFile || CommentsInThisFile->empty() || CommentsInThisFile->rbegin()->second->isAttached()) diff --git a/clang/lib/AST/CommentCommandTraits.cpp b/clang/lib/AST/CommentCommandTraits.cpp index b306fcbb154f3c..bdc0dd47fb7d2f 100644 --- a/clang/lib/AST/CommentCommandTraits.cpp +++ b/clang/lib/AST/CommentCommandTraits.cpp @@ -8,6 +8,7 @@ #include "clang/AST/CommentCommandTraits.h" #include "llvm/ADT/STLExtras.h" +#include namespace clang { namespace comments { diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index c5c9bc72c294ea..2bd02a0cda4f7c 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -111,6 +111,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_mergeable: case OMPC_threadprivate: case OMPC_flush: + case OMPC_depobj: case OMPC_read: case OMPC_write: case OMPC_update: @@ -142,6 +143,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_match: case OMPC_nontemporal: case OMPC_order: + case OMPC_destroy: break; } @@ -189,6 +191,7 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C) case OMPC_mergeable: case OMPC_threadprivate: case OMPC_flush: + case OMPC_depobj: case OMPC_read: case OMPC_write: case OMPC_update: @@ -226,6 +229,7 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C) case OMPC_match: case OMPC_nontemporal: case OMPC_order: + case OMPC_destroy: break; } @@ -835,6 +839,20 @@ OMPFlushClause *OMPFlushClause::CreateEmpty(const ASTContext &C, unsigned N) { return new (Mem) OMPFlushClause(N); } +OMPDepobjClause *OMPDepobjClause::Create(const ASTContext &C, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation RParenLoc, + Expr *Depobj) { + auto *Clause = new (C) OMPDepobjClause(StartLoc, LParenLoc, RParenLoc); + Clause->setDepobj(Depobj); + return Clause; +} + +OMPDepobjClause *OMPDepobjClause::CreateEmpty(const ASTContext &C) { + return new (C) OMPDepobjClause(); +} + OMPDependClause * OMPDependClause::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc, @@ -1407,6 +1425,10 @@ void OMPClausePrinter::VisitOMPHintClause(OMPHintClause *Node) { OS << ")"; } +void OMPClausePrinter::VisitOMPDestroyClause(OMPDestroyClause *) { + OS << "destroy"; +} + template void OMPClausePrinter::VisitOMPClauseList(T *Node, char StartSym) { for (typename T::varlist_iterator I = Node->varlist_begin(), @@ -1597,6 +1619,12 @@ void OMPClausePrinter::VisitOMPFlushClause(OMPFlushClause *Node) { } } +void OMPClausePrinter::VisitOMPDepobjClause(OMPDepobjClause *Node) { + OS << "("; + Node->getDepobj()->printPretty(OS, nullptr, Policy, 0); + OS << ")"; +} + void OMPClausePrinter::VisitOMPDependClause(OMPDependClause *Node) { OS << "depend("; OS << getOpenMPSimpleClauseTypeName(Node->getClauseKind(), diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 15bedb9791df9f..153d492598d3fd 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -10,9 +10,8 @@ // //===----------------------------------------------------------------------===// -#include "clang/AST/StmtOpenMP.h" - #include "clang/AST/ASTContext.h" +#include "clang/AST/StmtOpenMP.h" using namespace clang; using namespace llvm::omp; @@ -759,6 +758,29 @@ OMPFlushDirective *OMPFlushDirective::CreateEmpty(const ASTContext &C, return new (Mem) OMPFlushDirective(NumClauses); } +OMPDepobjDirective *OMPDepobjDirective::Create(const ASTContext &C, + SourceLocation StartLoc, + SourceLocation EndLoc, + ArrayRef Clauses) { + unsigned Size = + llvm::alignTo(sizeof(OMPDepobjDirective), alignof(OMPClause *)); + void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size(), + alignof(OMPDepobjDirective)); + auto *Dir = new (Mem) OMPDepobjDirective(StartLoc, EndLoc, Clauses.size()); + Dir->setClauses(Clauses); + return Dir; +} + +OMPDepobjDirective *OMPDepobjDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + EmptyShell) { + unsigned Size = + llvm::alignTo(sizeof(OMPDepobjDirective), alignof(OMPClause *)); + void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses, + alignof(OMPDepobjDirective)); + return new (Mem) OMPDepobjDirective(NumClauses); +} + OMPOrderedDirective *OMPOrderedDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 76295ae13b6004..f7a97c2743c190 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -753,6 +753,11 @@ void StmtPrinter::VisitOMPFlushDirective(OMPFlushDirective *Node) { PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPDepobjDirective(OMPDepobjDirective *Node) { + Indent() << "#pragma omp depobj"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPOrderedDirective(OMPOrderedDirective *Node) { Indent() << "#pragma omp ordered"; PrintOMPExecutableDirective(Node, Node->hasClausesOfKind()); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 76c5fe2e540298..9f119837023599 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -532,6 +532,8 @@ void OMPClauseProfiler::VisitOMPSIMDClause(const OMPSIMDClause *) {} void OMPClauseProfiler::VisitOMPNogroupClause(const OMPNogroupClause *) {} +void OMPClauseProfiler::VisitOMPDestroyClause(const OMPDestroyClause *) {} + template void OMPClauseProfiler::VisitOMPClauseList(T *Node) { for (auto *E : Node->varlists()) { @@ -719,6 +721,10 @@ OMPClauseProfiler::VisitOMPCopyprivateClause(const OMPCopyprivateClause *C) { void OMPClauseProfiler::VisitOMPFlushClause(const OMPFlushClause *C) { VisitOMPClauseList(C); } +void OMPClauseProfiler::VisitOMPDepobjClause(const OMPDepobjClause *C) { + if (const Expr *Depobj = C->getDepobj()) + Profiler->VisitStmt(Depobj); +} void OMPClauseProfiler::VisitOMPDependClause(const OMPDependClause *C) { VisitOMPClauseList(C); } @@ -885,6 +891,10 @@ void StmtProfiler::VisitOMPFlushDirective(const OMPFlushDirective *S) { VisitOMPExecutableDirective(S); } +void StmtProfiler::VisitOMPDepobjDirective(const OMPDepobjDirective *S) { + VisitOMPExecutableDirective(S); +} + void StmtProfiler::VisitOMPOrderedDirective(const OMPOrderedDirective *S) { VisitOMPExecutableDirective(S); } diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index e7c3a8a8021ed0..8de233c191350f 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -25,7 +25,14 @@ OpenMPClauseKind clang::getOpenMPClauseKind(StringRef Str) { // clause for 'flush' directive. If the 'flush' clause is explicitly specified // the Parser should generate a warning about extra tokens at the end of the // directive. - if (Str == "flush") + // 'depobj' clause cannot be specified explicitly, because this is an implicit + // clause for 'depobj' directive. If the 'depobj' clause is explicitly + // specified the Parser should generate a warning about extra tokens at the + // end of the directive. + if (llvm::StringSwitch(Str) + .Case("flush", true) + .Case("depobj", true) + .Default(false)) return OMPC_unknown; return llvm::StringSwitch(Str) #define OPENMP_CLAUSE(Name, Class) .Case(#Name, OMPC_##Name) @@ -166,6 +173,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, case OMPC_untied: case OMPC_mergeable: case OMPC_flush: + case OMPC_depobj: case OMPC_read: case OMPC_write: case OMPC_update: @@ -194,6 +202,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, case OMPC_dynamic_allocators: case OMPC_match: case OMPC_nontemporal: + case OMPC_destroy: break; } llvm_unreachable("Invalid OpenMP simple clause kind"); @@ -380,6 +389,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_untied: case OMPC_mergeable: case OMPC_flush: + case OMPC_depobj: case OMPC_read: case OMPC_write: case OMPC_update: @@ -408,6 +418,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_dynamic_allocators: case OMPC_match: case OMPC_nontemporal: + case OMPC_destroy: break; } llvm_unreachable("Invalid OpenMP simple clause kind"); @@ -553,6 +564,20 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind, break; } break; + case OMPD_depobj: + if (OpenMPVersion < 50) + return false; + switch (CKind) { +#define OPENMP_DEPOBJ_CLAUSE(Name) \ + case OMPC_##Name: \ + return true; +#include "clang/Basic/OpenMPKinds.def" + case OMPC_depobj: + return true; + default: + break; + } + break; case OMPD_atomic: if (OpenMPVersion < 50 && (CKind == OMPC_acq_rel || CKind == OMPC_acquire || @@ -1195,6 +1220,7 @@ void clang::getOpenMPCaptureRegions( case OMPD_cancellation_point: case OMPD_cancel: case OMPD_flush: + case OMPD_depobj: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_declare_simd: diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 79c2f1b107842e..9fe03069a44e1c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6703,6 +6703,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -7014,6 +7015,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -8798,6 +8800,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -9561,6 +9564,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -10201,6 +10205,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: case OMPD_teams: case OMPD_target_data: case OMPD_distribute: diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 867bfb0727367b..b139529d8eb349 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -786,6 +786,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -862,6 +863,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -1031,6 +1033,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -1113,6 +1116,7 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 238e04999499a4..a334bab06783ee 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -247,6 +247,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { case Stmt::OMPFlushDirectiveClass: EmitOMPFlushDirective(cast(*S)); break; + case Stmt::OMPDepobjDirectiveClass: + EmitOMPDepobjDirective(cast(*S)); + break; case Stmt::OMPOrderedDirectiveClass: EmitOMPOrderedDirective(cast(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index bcd2d0635caf16..bab7c6d0dcde08 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -3800,6 +3800,8 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { S.getBeginLoc(), AO); } +void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {} + void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr) { @@ -4543,6 +4545,7 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_copyin: case OMPC_copyprivate: case OMPC_flush: + case OMPC_depobj: case OMPC_proc_bind: case OMPC_schedule: case OMPC_ordered: @@ -4578,6 +4581,7 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_match: case OMPC_nontemporal: case OMPC_order: + case OMPC_destroy: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 14111713ccac18..907b4d744b07a3 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3262,6 +3262,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S); void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S); void EmitOMPFlushDirective(const OMPFlushDirective &S); + void EmitOMPDepobjDirective(const OMPDepobjDirective &S); void EmitOMPOrderedDirective(const OMPOrderedDirective &S); void EmitOMPAtomicDirective(const OMPAtomicDirective &S); void EmitOMPTargetDirective(const OMPTargetDirective &S); diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index c356657541fa3e..d20d6298758919 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -309,7 +309,7 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { } } -static bool getPIE(const ArgList &Args, const toolchains::Linux &ToolChain) { +static bool getPIE(const ArgList &Args, const ToolChain &TC) { if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_static) || Args.hasArg(options::OPT_r) || Args.hasArg(options::OPT_static_pie)) return false; @@ -317,17 +317,16 @@ static bool getPIE(const ArgList &Args, const toolchains::Linux &ToolChain) { Arg *A = Args.getLastArg(options::OPT_pie, options::OPT_no_pie, options::OPT_nopie); if (!A) - return ToolChain.isPIEDefault(); + return TC.isPIEDefault(); return A->getOption().matches(options::OPT_pie); } -static bool getStaticPIE(const ArgList &Args, - const toolchains::Linux &ToolChain) { +static bool getStaticPIE(const ArgList &Args, const ToolChain &TC) { bool HasStaticPIE = Args.hasArg(options::OPT_static_pie); // -no-pie is an alias for -nopie. So, handling -nopie takes care of // -no-pie as well. if (HasStaticPIE && Args.hasArg(options::OPT_nopie)) { - const Driver &D = ToolChain.getDriver(); + const Driver &D = TC.getDriver(); const llvm::opt::OptTable &Opts = D.getOpts(); const char *StaticPIEName = Opts.getOptionName(options::OPT_static_pie); const char *NoPIEName = Opts.getOptionName(options::OPT_nopie); @@ -346,8 +345,12 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { - const toolchains::Linux &ToolChain = - static_cast(getToolChain()); + // FIXME: The Linker class constructor takes a ToolChain and not a + // Generic_ELF, so the static_cast might return a reference to a invalid + // instance (see PR45061). Ideally, the Linker constructor needs to take a + // Generic_ELF instead. + const toolchains::Generic_ELF &ToolChain = + static_cast(getToolChain()); const Driver &D = ToolChain.getDriver(); const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); @@ -418,8 +421,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (isAndroid) CmdArgs.push_back("--warn-shared-textrel"); - for (const auto &Opt : ToolChain.ExtraOpts) - CmdArgs.push_back(Opt.c_str()); + ToolChain.addExtraOpts(CmdArgs); CmdArgs.push_back("--eh-frame-hdr"); diff --git a/clang/lib/Driver/ToolChains/Gnu.h b/clang/lib/Driver/ToolChains/Gnu.h index 083f74c0547744..fa50b56bf95412 100644 --- a/clang/lib/Driver/ToolChains/Gnu.h +++ b/clang/lib/Driver/ToolChains/Gnu.h @@ -356,6 +356,12 @@ class LLVM_LIBRARY_VISIBILITY Generic_ELF : public Generic_GCC { void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override; + + virtual std::string getDynamicLinker(const llvm::opt::ArgList &Args) const { + return {}; + } + + virtual void addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const {} }; } // end namespace toolchains diff --git a/clang/lib/Driver/ToolChains/Hurd.cpp b/clang/lib/Driver/ToolChains/Hurd.cpp index 72286bd09f1350..ce1806c4043b15 100644 --- a/clang/lib/Driver/ToolChains/Hurd.cpp +++ b/clang/lib/Driver/ToolChains/Hurd.cpp @@ -61,8 +61,7 @@ static StringRef getOSLibDir(const llvm::Triple &Triple, const ArgList &Args) { return Triple.isArch32Bit() ? "lib" : "lib64"; } -Hurd::Hurd(const Driver &D, const llvm::Triple &Triple, - const ArgList &Args) +Hurd::Hurd(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) : Generic_ELF(D, Triple, Args) { std::string SysRoot = computeSysRoot(); path_list &Paths = getFilePaths(); @@ -170,3 +169,8 @@ void Hurd::AddClangSystemIncludeArgs(const ArgList &DriverArgs, addExternCSystemInclude(DriverArgs, CC1Args, SysRoot + "/usr/include"); } + +void Hurd::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { + for (const auto &Opt : ExtraOpts) + CmdArgs.push_back(Opt.c_str()); +} diff --git a/clang/lib/Driver/ToolChains/Hurd.h b/clang/lib/Driver/ToolChains/Hurd.h index 86c6c3f734dd91..8f88d7e8e58e4e 100644 --- a/clang/lib/Driver/ToolChains/Hurd.h +++ b/clang/lib/Driver/ToolChains/Hurd.h @@ -27,9 +27,11 @@ class LLVM_LIBRARY_VISIBILITY Hurd : public Generic_ELF { AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; - virtual std::string computeSysRoot() const; + std::string computeSysRoot() const; - virtual std::string getDynamicLinker(const llvm::opt::ArgList &Args) const; + std::string getDynamicLinker(const llvm::opt::ArgList &Args) const override; + + void addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const override; std::vector ExtraOpts; diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index d8d8a8da8fca2f..3d76e680114990 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -1007,3 +1007,8 @@ llvm::DenormalMode Linux::getDefaultDenormalModeForType( return llvm::DenormalMode::getIEEE(); } } + +void Linux::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { + for (const auto &Opt : ExtraOpts) + CmdArgs.push_back(Opt.c_str()); +} diff --git a/clang/lib/Driver/ToolChains/Linux.h b/clang/lib/Driver/ToolChains/Linux.h index e3c0103ac3e5d8..999f991b636074 100644 --- a/clang/lib/Driver/ToolChains/Linux.h +++ b/clang/lib/Driver/ToolChains/Linux.h @@ -42,7 +42,9 @@ class LLVM_LIBRARY_VISIBILITY Linux : public Generic_ELF { llvm::opt::ArgStringList &CmdArgs) const override; virtual std::string computeSysRoot() const; - virtual std::string getDynamicLinker(const llvm::opt::ArgList &Args) const; + std::string getDynamicLinker(const llvm::opt::ArgList &Args) const override; + + void addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const override; std::vector ExtraOpts; diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index ac117840ea3358..d0d08e470e6c19 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -106,7 +106,7 @@ namespace format { TYPE(CSharpNullable) \ TYPE(CSharpNullCoalescing) \ TYPE(CSharpNullConditional) \ - TYPE(CSharpNullConditionalSq) \ + TYPE(CSharpNullConditionalLSquare) \ TYPE(Unknown) enum TokenType { diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index da73361ee3d5b8..8fa78b773e5eb8 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -345,7 +345,7 @@ bool FormatTokenLexer::tryMergeCSharpNullConditional() { if (PeriodOrLSquare->is(tok::l_square)) { Question->Tok.setKind(tok::question); // no '?[' in clang tokens. - Question->Type = TT_CSharpNullConditionalSq; + Question->Type = TT_CSharpNullConditionalLSquare; } else { Question->Tok.setKind(tok::question); // no '?.' in clang tokens. Question->Type = TT_CSharpNullConditional; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index e1e08686ac44e8..35e0b423cfc492 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -972,7 +972,7 @@ class AnnotatingParser { } break; case tok::question: - if (Tok->is(TT_CSharpNullConditionalSq)) { + if (Tok->is(TT_CSharpNullConditionalLSquare)) { if (!parseSquare()) return false; break; @@ -1456,7 +1456,7 @@ class AnnotatingParser { return; } if (CurrentToken->TokenText == "?[") { - Current.Type = TT_CSharpNullConditionalSq; + Current.Type = TT_CSharpNullConditionalLSquare; return; } } @@ -2947,11 +2947,11 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return true; // No space before '?['. - if (Right.is(TT_CSharpNullConditionalSq)) + if (Right.is(TT_CSharpNullConditionalLSquare)) return false; // Possible space inside `?[ 0 ]`. - if (Left.is(TT_CSharpNullConditionalSq)) + if (Left.is(TT_CSharpNullConditionalLSquare)) return Style.SpacesInSquareBrackets; // space between keywords and paren e.g. "using (" diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 9cc41c9d96f898..8638d4300b218d 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2263,7 +2263,7 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, if (T.isPS4()) LangStd = LangStandard::lang_gnu99; else - LangStd = LangStandard::lang_gnu11; + LangStd = LangStandard::lang_gnu17; #endif break; case Language::ObjC: diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h index 3210f93cc85127..6ac9f92d23a265 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h @@ -13432,18 +13432,12 @@ int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, m uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand); uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order); uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); -uint __ovld atomic_fetch_min(volatile atomic_uint *object, int operand); -uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order); -uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope); int __ovld atomic_fetch_max(volatile atomic_int *object, int operand); int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order); int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand); uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order); uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); -uint __ovld atomic_fetch_max(volatile atomic_uint *object, int operand); -uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order); -uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) long __ovld atomic_fetch_add(volatile atomic_long *object, long operand); @@ -13482,18 +13476,12 @@ long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, ulong operand); ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); -ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, long operand); -ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order); -ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope); long __ovld atomic_fetch_max(volatile atomic_long *object, long operand); long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order); long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand); ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); -ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, long operand); -ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order); -ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) // OpenCL v2.0 s6.13.11.7.5: diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 8c822ec1d0e9a7..56e88d15f8faaf 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -1842,6 +1842,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( case OMPD_taskwait: case OMPD_taskgroup: case OMPD_flush: + case OMPD_depobj: case OMPD_for: case OMPD_for_simd: case OMPD_sections: @@ -2064,6 +2065,7 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) { break; } case OMPD_flush: + case OMPD_depobj: case OMPD_taskyield: case OMPD_barrier: case OMPD_taskwait: @@ -2123,10 +2125,13 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) { case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: case OMPD_target_teams_distribute_simd: { - // Special processing for flush clause. - Token FlushTok; - if (DKind == OMPD_flush) - FlushTok = Tok; + // Special processing for flush and depobj clauses. + Token ImplicitTok; + bool ImplicitClauseAllowed = false; + if (DKind == OMPD_flush || DKind == OMPD_depobj) { + ImplicitTok = Tok; + ImplicitClauseAllowed = true; + } ConsumeToken(); // Parse directive name of the 'critical' directive if any. if (DKind == OMPD_critical) { @@ -2156,22 +2161,32 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) { Actions.StartOpenMPDSABlock(DKind, DirName, Actions.getCurScope(), Loc); while (Tok.isNot(tok::annot_pragma_openmp_end)) { - bool FlushHasClause = false; - if (DKind == OMPD_flush && Tok.is(tok::l_paren)) { - FlushHasClause = true; + bool HasImplicitClause = false; + if (ImplicitClauseAllowed && Tok.is(tok::l_paren)) { + HasImplicitClause = true; // Push copy of the current token back to stream to properly parse - // pseudo-clause OMPFlushClause. + // pseudo-clause OMPFlushClause or OMPDepobjClause. PP.EnterToken(Tok, /*IsReinject*/ true); - PP.EnterToken(FlushTok, /*IsReinject*/ true); + PP.EnterToken(ImplicitTok, /*IsReinject*/ true); ConsumeAnyToken(); } - OpenMPClauseKind CKind = - Tok.isAnnotation() - ? OMPC_unknown - : FlushHasClause ? OMPC_flush - : getOpenMPClauseKind(PP.getSpelling(Tok)); + OpenMPClauseKind CKind = Tok.isAnnotation() + ? OMPC_unknown + : getOpenMPClauseKind(PP.getSpelling(Tok)); + if (HasImplicitClause) { + assert(CKind == OMPC_unknown && "Must be unknown implicit clause."); + if (DKind == OMPD_flush) { + CKind = OMPC_flush; + } else { + assert(DKind == OMPD_depobj && + "Expected flush or depobj directives."); + CKind = OMPC_depobj; + } + } + // No more implicit clauses allowed. + ImplicitClauseAllowed = false; Actions.StartOpenMPClause(CKind); - FlushHasClause = false; + HasImplicitClause = false; OMPClause *Clause = ParseOpenMPClause(DKind, CKind, !FirstClauses[CKind].getInt()); FirstClauses[CKind].setInt(true); @@ -2324,7 +2339,8 @@ bool Parser::ParseOpenMPSimpleVarList( /// nogroup-clause | num_tasks-clause | hint-clause | to-clause | /// from-clause | is_device_ptr-clause | task_reduction-clause | /// in_reduction-clause | allocator-clause | allocate-clause | -/// acq_rel-clause | acquire-clause | release-clause | relaxed-clause +/// acq_rel-clause | acquire-clause | release-clause | relaxed-clause | +/// depobj-clause | destroy-clause /// OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, OpenMPClauseKind CKind, bool FirstClause) { @@ -2355,6 +2371,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, case OMPC_num_tasks: case OMPC_hint: case OMPC_allocator: + case OMPC_depobj: // OpenMP [2.5, Restrictions] // At most one num_threads clause can appear on the directive. // OpenMP [2.8.1, simd construct, Restrictions] @@ -2444,6 +2461,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, case OMPC_unified_shared_memory: case OMPC_reverse_offload: case OMPC_dynamic_allocators: + case OMPC_destroy: // OpenMP [2.7.1, Restrictions, p. 9] // Only one ordered clause can appear on a loop directive. // OpenMP [2.7.1, Restrictions, C/C++, p. 4] diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index a162ff091efd13..04611dadde6614 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -1148,11 +1148,6 @@ namespace { continue; } - if (isFollowedByFallThroughComment(LastStmt)) { - ++AnnotatedCnt; - continue; // Fallthrough comment, good. - } - ++UnannotatedCnt; } return !!UnannotatedCnt; @@ -1213,41 +1208,10 @@ namespace { return nullptr; } - bool isFollowedByFallThroughComment(const Stmt *Statement) { - // Try to detect whether the fallthough is marked by a comment like - // /*FALLTHOUGH*/. - bool Invalid; - const char *SourceData = S.getSourceManager().getCharacterData( - Statement->getEndLoc(), &Invalid); - if (Invalid) - return false; - const char *LineStart = SourceData; - for (;;) { - LineStart = strchr(LineStart, '\n'); - if (LineStart == nullptr) - return false; - ++LineStart; // Start of next line. - const char *LineEnd = strchr(LineStart, '\n'); - StringRef Line(LineStart, - LineEnd ? LineEnd - LineStart : strlen(LineStart)); - if (LineStart == LineEnd || - Line.find_first_not_of(" \t\r") == StringRef::npos) - continue; // Whitespace-only line. - if (!FallthroughRegex.isValid()) - FallthroughRegex = - llvm::Regex("(/\\*[ \\t]*fall(s | |-)?thr(ough|u)\\.?[ \\t]*\\*/)" - "|(//[ \\t]*fall(s | |-)?thr(ough|u)\\.?[ \\t]*)", - llvm::Regex::IgnoreCase); - assert(FallthroughRegex.isValid()); - return FallthroughRegex.match(Line); - } - } - bool FoundSwitchStatements; AttrStmts FallthroughStmts; Sema &S; llvm::SmallPtrSet ReachableBlocks; - llvm::Regex FallthroughRegex; }; } // anonymous namespace diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 2a66303d6d9a32..cda6910364e58f 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -11587,7 +11587,16 @@ static void AnalyzeImplicitConversions(Sema &S, Expr *OrigE, SourceLocation CC, if (E->isTypeDependent() || E->isValueDependent()) return; - if (const auto *UO = dyn_cast(E)) + Expr *SourceExpr = E; + // Examine, but don't traverse into the source expression of an + // OpaqueValueExpr, since it may have multiple parents and we don't want to + // emit duplicate diagnostics. Its fine to examine the form or attempt to + // evaluate it in the context of checking the specific conversion to T though. + if (auto *OVE = dyn_cast(E)) + if (auto *Src = OVE->getSourceExpr()) + SourceExpr = Src; + + if (const auto *UO = dyn_cast(SourceExpr)) if (UO->getOpcode() == UO_Not && UO->getSubExpr()->isKnownToHaveBooleanValue()) S.Diag(UO->getBeginLoc(), diag::warn_bitwise_negation_bool) @@ -11596,21 +11605,20 @@ static void AnalyzeImplicitConversions(Sema &S, Expr *OrigE, SourceLocation CC, // For conditional operators, we analyze the arguments as if they // were being fed directly into the output. - if (isa(E)) { - ConditionalOperator *CO = cast(E); + if (auto *CO = dyn_cast(SourceExpr)) { CheckConditionalOperator(S, CO, CC, T); return; } // Check implicit argument conversions for function calls. - if (CallExpr *Call = dyn_cast(E)) + if (CallExpr *Call = dyn_cast(SourceExpr)) CheckImplicitArgumentConversions(S, Call, CC); // Go ahead and check any implicit conversions we might have skipped. // The non-canonical typecheck is just an optimization; // CheckImplicitConversion will filter out dead implicit conversions. - if (E->getType() != T) - CheckImplicitConversion(S, E, T, CC, nullptr, IsListInit); + if (SourceExpr->getType() != T) + CheckImplicitConversion(S, SourceExpr, T, CC, nullptr, IsListInit); // Now continue drilling into this expression. diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 193eaa3e01f936..1e892aa622df76 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1430,6 +1430,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPDistributeParallelForSimdDirectiveClass: case Stmt::OMPDistributeSimdDirectiveClass: case Stmt::OMPFlushDirectiveClass: + case Stmt::OMPDepobjDirectiveClass: case Stmt::OMPForDirectiveClass: case Stmt::OMPForSimdDirectiveClass: case Stmt::OMPMasterDirectiveClass: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index de732577c81b0c..ecabb3aefd20c4 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -266,6 +266,8 @@ class DSAStackTy { SmallVector RequiresDecls; /// omp_allocator_handle_t type. QualType OMPAllocatorHandleT; + /// omp_depend_t type. + QualType OMPDependT; /// Expression for the predefined allocators. Expr *OMPPredefinedAllocators[OMPAllocateDeclAttr::OMPUserDefinedMemAlloc] = { nullptr}; @@ -289,6 +291,10 @@ class DSAStackTy { Expr *getAllocator(OMPAllocateDeclAttr::AllocatorTypeTy AllocatorKind) const { return OMPPredefinedAllocators[AllocatorKind]; } + /// Sets omp_depend_t type. + void setOMPDependT(QualType Ty) { OMPDependT = Ty; } + /// Gets omp_depend_t type. + QualType getOMPDependT() const { return OMPDependT; } bool isClauseParsingMode() const { return ClauseKindMode != OMPC_unknown; } OpenMPClauseKind getClauseParsingMode() const { @@ -3740,6 +3746,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { case OMPD_cancellation_point: case OMPD_cancel: case OMPD_flush: + case OMPD_depobj: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_declare_simd: @@ -4746,6 +4753,11 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( "No associated statement allowed for 'omp flush' directive"); Res = ActOnOpenMPFlushDirective(ClausesWithImplicit, StartLoc, EndLoc); break; + case OMPD_depobj: + assert(AStmt == nullptr && + "No associated statement allowed for 'omp depobj' directive"); + Res = ActOnOpenMPDepobjDirective(ClausesWithImplicit, StartLoc, EndLoc); + break; case OMPD_ordered: Res = ActOnOpenMPOrderedDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); @@ -5029,9 +5041,11 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( case OMPC_is_device_ptr: case OMPC_nontemporal: case OMPC_order: + case OMPC_destroy: continue; case OMPC_allocator: case OMPC_flush: + case OMPC_depobj: case OMPC_threadprivate: case OMPC_uniform: case OMPC_unknown: @@ -8597,6 +8611,28 @@ StmtResult Sema::ActOnOpenMPFlushDirective(ArrayRef Clauses, return OMPFlushDirective::Create(Context, StartLoc, EndLoc, Clauses); } +StmtResult Sema::ActOnOpenMPDepobjDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc) { + if (Clauses.empty()) { + Diag(StartLoc, diag::err_omp_depobj_expected); + return StmtError(); + } else if (Clauses[0]->getClauseKind() != OMPC_depobj) { + Diag(Clauses[0]->getBeginLoc(), diag::err_omp_depobj_expected); + return StmtError(); + } + // Only depobj expression and another single clause is allowed. + if (Clauses.size() > 2) { + Diag(Clauses[2]->getBeginLoc(), + diag::err_omp_depobj_single_clause_expected); + return StmtError(); + } else if (Clauses.size() < 1) { + Diag(Clauses[0]->getEndLoc(), diag::err_omp_depobj_single_clause_expected); + return StmtError(); + } + return OMPDepobjDirective::Create(Context, StartLoc, EndLoc, Clauses); +} + StmtResult Sema::ActOnOpenMPOrderedDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, @@ -10890,6 +10926,9 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, case OMPC_hint: Res = ActOnOpenMPHintClause(Expr, StartLoc, LParenLoc, EndLoc); break; + case OMPC_depobj: + Res = ActOnOpenMPDepobjClause(Expr, StartLoc, LParenLoc, EndLoc); + break; case OMPC_if: case OMPC_default: case OMPC_proc_bind: @@ -10942,6 +10981,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, case OMPC_match: case OMPC_nontemporal: case OMPC_order: + case OMPC_destroy: llvm_unreachable("Clause is not allowed."); } return Res; @@ -11071,6 +11111,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_taskwait: case OMPD_cancellation_point: case OMPD_flush: + case OMPD_depobj: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_declare_simd: @@ -11141,6 +11182,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_taskwait: case OMPD_cancellation_point: case OMPD_flush: + case OMPD_depobj: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_declare_simd: @@ -11216,6 +11258,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_taskwait: case OMPD_cancellation_point: case OMPD_flush: + case OMPD_depobj: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_declare_simd: @@ -11288,6 +11331,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_taskwait: case OMPD_cancellation_point: case OMPD_flush: + case OMPD_depobj: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_declare_simd: @@ -11361,6 +11405,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_taskwait: case OMPD_cancellation_point: case OMPD_flush: + case OMPD_depobj: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_declare_simd: @@ -11433,6 +11478,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_taskwait: case OMPD_cancellation_point: case OMPD_flush: + case OMPD_depobj: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_declare_simd: @@ -11504,6 +11550,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_taskwait: case OMPD_cancellation_point: case OMPD_flush: + case OMPD_depobj: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_declare_simd: @@ -11578,6 +11625,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_taskwait: case OMPD_cancellation_point: case OMPD_flush: + case OMPD_depobj: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_declare_simd: @@ -11627,6 +11675,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPC_threadprivate: case OMPC_allocate: case OMPC_flush: + case OMPC_depobj: case OMPC_read: case OMPC_write: case OMPC_update: @@ -11658,6 +11707,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPC_match: case OMPC_nontemporal: case OMPC_order: + case OMPC_destroy: llvm_unreachable("Unexpected OpenMP clause."); } return CaptureRegion; @@ -11933,7 +11983,8 @@ static bool findOMPAllocatorHandleT(Sema &S, SourceLocation Loc, Stack->setAllocator(AllocatorKind, Res.get()); } if (ErrorFound) { - S.Diag(Loc, diag::err_implied_omp_allocator_handle_t_not_found); + S.Diag(Loc, diag::err_omp_implied_type_not_found) + << "omp_allocator_handle_t"; return false; } OMPAllocatorHandleT.addConst(); @@ -12052,6 +12103,7 @@ OMPClause *Sema::ActOnOpenMPSimpleClause( case OMPC_threadprivate: case OMPC_allocate: case OMPC_flush: + case OMPC_depobj: case OMPC_read: case OMPC_write: case OMPC_update: @@ -12088,6 +12140,7 @@ OMPClause *Sema::ActOnOpenMPSimpleClause( case OMPC_device_type: case OMPC_match: case OMPC_nontemporal: + case OMPC_destroy: llvm_unreachable("Clause is not allowed."); } return Res; @@ -12248,6 +12301,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause( case OMPC_threadprivate: case OMPC_allocate: case OMPC_flush: + case OMPC_depobj: case OMPC_read: case OMPC_write: case OMPC_update: @@ -12284,6 +12338,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause( case OMPC_match: case OMPC_nontemporal: case OMPC_order: + case OMPC_destroy: llvm_unreachable("Clause is not allowed."); } return Res; @@ -12463,6 +12518,9 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind, case OMPC_dynamic_allocators: Res = ActOnOpenMPDynamicAllocatorsClause(StartLoc, EndLoc); break; + case OMPC_destroy: + Res = ActOnOpenMPDestroyClause(StartLoc, EndLoc); + break; case OMPC_if: case OMPC_final: case OMPC_num_threads: @@ -12487,6 +12545,7 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind, case OMPC_threadprivate: case OMPC_allocate: case OMPC_flush: + case OMPC_depobj: case OMPC_depend: case OMPC_device: case OMPC_map: @@ -12610,6 +12669,11 @@ OMPClause *Sema::ActOnOpenMPDynamicAllocatorsClause(SourceLocation StartLoc, return new (Context) OMPDynamicAllocatorsClause(StartLoc, EndLoc); } +OMPClause *Sema::ActOnOpenMPDestroyClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (Context) OMPDestroyClause(StartLoc, EndLoc); +} + OMPClause *Sema::ActOnOpenMPVarListClause( OpenMPClauseKind Kind, ArrayRef VarList, Expr *TailExpr, const OMPVarListLocTy &Locs, SourceLocation ColonLoc, @@ -12712,6 +12776,7 @@ OMPClause *Sema::ActOnOpenMPVarListClause( Res = ActOnOpenMPNontemporalClause(VarList, StartLoc, LParenLoc, EndLoc); break; case OMPC_if: + case OMPC_depobj: case OMPC_final: case OMPC_num_threads: case OMPC_safelen: @@ -12757,6 +12822,7 @@ OMPClause *Sema::ActOnOpenMPVarListClause( case OMPC_device_type: case OMPC_match: case OMPC_order: + case OMPC_destroy: llvm_unreachable("Clause is not allowed."); } return Res; @@ -15120,6 +15186,49 @@ OMPClause *Sema::ActOnOpenMPFlushClause(ArrayRef VarList, return OMPFlushClause::Create(Context, StartLoc, LParenLoc, EndLoc, VarList); } +/// Tries to find omp_depend_t. type. +static bool findOMPDependT(Sema &S, SourceLocation Loc, DSAStackTy *Stack) { + QualType OMPDependT = Stack->getOMPDependT(); + if (!OMPDependT.isNull()) + return true; + IdentifierInfo *II = &S.PP.getIdentifierTable().get("omp_depend_t"); + ParsedType PT = S.getTypeName(*II, Loc, S.getCurScope()); + if (!PT.getAsOpaquePtr() || PT.get().isNull()) { + S.Diag(Loc, diag::err_omp_implied_type_not_found) << "omp_depend_t"; + return false; + } + Stack->setOMPDependT(PT.get()); + return true; +} + +OMPClause *Sema::ActOnOpenMPDepobjClause(Expr *Depobj, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + if (!Depobj) + return nullptr; + + bool OMPDependTFound = findOMPDependT(*this, StartLoc, DSAStack); + + // OpenMP 5.0, 2.17.10.1 depobj Construct + // depobj is an lvalue expression of type omp_depend_t. + if (!Depobj->isTypeDependent() && !Depobj->isValueDependent() && + !Depobj->isInstantiationDependent() && + !Depobj->containsUnexpandedParameterPack() && + (OMPDependTFound && + !Context.typesAreCompatible(DSAStack->getOMPDependT(), Depobj->getType(), + /*CompareUnqualified=*/true))) { + Diag(Depobj->getExprLoc(), diag::err_omp_expected_omp_depend_t_lvalue) + << 0 << Depobj->getType() << Depobj->getSourceRange(); + } + + if (!Depobj->isLValue()) { + Diag(Depobj->getExprLoc(), diag::err_omp_expected_omp_depend_t_lvalue) + << 1 << Depobj->getSourceRange(); + } + + return OMPDepobjClause::Create(Context, StartLoc, LParenLoc, EndLoc, Depobj); +} + OMPClause * Sema::ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind, SourceLocation DepLoc, SourceLocation ColonLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 6ad1c61217589c..002b73c3a1dd72 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1811,6 +1811,17 @@ class TreeTransform { EndLoc); } + /// Build a new OpenMP 'depobj' pseudo clause. + /// + /// By default, performs semantic analysis to build the new OpenMP clause. + /// Subclasses may override this routine to provide different behavior. + OMPClause *RebuildOMPDepobjClause(Expr *Depobj, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + return getSema().ActOnOpenMPDepobjClause(Depobj, StartLoc, LParenLoc, + EndLoc); + } + /// Build a new OpenMP 'depend' pseudo clause. /// /// By default, performs semantic analysis to build the new OpenMP clause. @@ -8263,6 +8274,17 @@ TreeTransform::TransformOMPFlushDirective(OMPFlushDirective *D) { return Res; } +template +StmtResult +TreeTransform::TransformOMPDepobjDirective(OMPDepobjDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().StartOpenMPDSABlock(OMPD_depobj, DirName, nullptr, + D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPOrderedDirective(OMPOrderedDirective *D) { @@ -8851,6 +8873,13 @@ TreeTransform::TransformOMPNogroupClause(OMPNogroupClause *C) { return C; } +template +OMPClause * +TreeTransform::TransformOMPDestroyClause(OMPDestroyClause *C) { + // No need to rebuild this clause, no template-dependent parameters. + return C; +} + template OMPClause *TreeTransform::TransformOMPUnifiedAddressClause( OMPUnifiedAddressClause *C) { @@ -9164,6 +9193,16 @@ OMPClause *TreeTransform::TransformOMPFlushClause(OMPFlushClause *C) { C->getLParenLoc(), C->getEndLoc()); } +template +OMPClause * +TreeTransform::TransformOMPDepobjClause(OMPDepobjClause *C) { + ExprResult E = getDerived().TransformExpr(C->getDepobj()); + if (E.isInvalid()) + return nullptr; + return getDerived().RebuildOMPDepobjClause(E.get(), C->getBeginLoc(), + C->getLParenLoc(), C->getEndLoc()); +} + template OMPClause * TreeTransform::TransformOMPDependClause(OMPDependClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a74ccc9ed17959..865a666ce8f445 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11737,6 +11737,9 @@ OMPClause *OMPClauseReader::readClause() { case OMPC_flush: C = OMPFlushClause::CreateEmpty(Context, Record.readInt()); break; + case OMPC_depobj: + C = OMPDepobjClause::CreateEmpty(Context); + break; case OMPC_depend: { unsigned NumVars = Record.readInt(); unsigned NumLoops = Record.readInt(); @@ -11824,6 +11827,9 @@ OMPClause *OMPClauseReader::readClause() { case OMPC_order: C = new (Context) OMPOrderClause(); break; + case OMPC_destroy: + C = new (Context) OMPDestroyClause(); + break; } assert(C && "Unknown OMPClause type"); @@ -11952,6 +11958,8 @@ void OMPClauseReader::VisitOMPSIMDClause(OMPSIMDClause *) {} void OMPClauseReader::VisitOMPNogroupClause(OMPNogroupClause *) {} +void OMPClauseReader::VisitOMPDestroyClause(OMPDestroyClause *) {} + void OMPClauseReader::VisitOMPUnifiedAddressClause(OMPUnifiedAddressClause *) {} void OMPClauseReader::VisitOMPUnifiedSharedMemoryClause( @@ -12249,6 +12257,11 @@ void OMPClauseReader::VisitOMPFlushClause(OMPFlushClause *C) { C->setVarRefs(Vars); } +void OMPClauseReader::VisitOMPDepobjClause(OMPDepobjClause *C) { + C->setDepobj(Record.readSubExpr()); + C->setLParenLoc(Record.readSourceLocation()); +} + void OMPClauseReader::VisitOMPDependClause(OMPDependClause *C) { C->setLParenLoc(Record.readSourceLocation()); C->setDependencyKind( diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 3da7a71e7f703a..fc83dc42d4d150 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2343,6 +2343,13 @@ void ASTStmtReader::VisitOMPFlushDirective(OMPFlushDirective *D) { VisitOMPExecutableDirective(D); } +void ASTStmtReader::VisitOMPDepobjDirective(OMPDepobjDirective *D) { + VisitStmt(D); + // The NumClauses field was read in ReadStmtFromStream. + Record.skipInts(1); + VisitOMPExecutableDirective(D); +} + void ASTStmtReader::VisitOMPOrderedDirective(OMPOrderedDirective *D) { VisitStmt(D); // The NumClauses field was read in ReadStmtFromStream. @@ -3174,6 +3181,11 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { Context, Record[ASTStmtReader::NumStmtFields], Empty); break; + case STMT_OMP_DEPOBJ_DIRECTIVE: + S = OMPDepobjDirective::CreateEmpty( + Context, Record[ASTStmtReader::NumStmtFields], Empty); + break; + case STMT_OMP_ORDERED_DIRECTIVE: S = OMPOrderedDirective::CreateEmpty( Context, Record[ASTStmtReader::NumStmtFields], Empty); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 6f77d4f5d1156b..bf59bca29e8cd3 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6161,6 +6161,8 @@ void OMPClauseWriter::VisitOMPSIMDClause(OMPSIMDClause *) {} void OMPClauseWriter::VisitOMPNogroupClause(OMPNogroupClause *) {} +void OMPClauseWriter::VisitOMPDestroyClause(OMPDestroyClause *) {} + void OMPClauseWriter::VisitOMPPrivateClause(OMPPrivateClause *C) { Record.push_back(C->varlist_size()); Record.AddSourceLocation(C->getLParenLoc()); @@ -6342,6 +6344,11 @@ void OMPClauseWriter::VisitOMPFlushClause(OMPFlushClause *C) { Record.AddStmt(VE); } +void OMPClauseWriter::VisitOMPDepobjClause(OMPDepobjClause *C) { + Record.AddStmt(C->getDepobj()); + Record.AddSourceLocation(C->getLParenLoc()); +} + void OMPClauseWriter::VisitOMPDependClause(OMPDependClause *C) { Record.push_back(C->varlist_size()); Record.push_back(C->getNumLoops()); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 9b6e869e1c3488..d2b1fc2becf108 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2314,6 +2314,13 @@ void ASTStmtWriter::VisitOMPFlushDirective(OMPFlushDirective *D) { Code = serialization::STMT_OMP_FLUSH_DIRECTIVE; } +void ASTStmtWriter::VisitOMPDepobjDirective(OMPDepobjDirective *D) { + VisitStmt(D); + Record.push_back(D->getNumClauses()); + VisitOMPExecutableDirective(D); + Code = serialization::STMT_OMP_DEPOBJ_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPOrderedDirective(OMPOrderedDirective *D) { VisitStmt(D); Record.push_back(D->getNumClauses()); diff --git a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp index 54b364f38f812e..10b27831d89f80 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "Taint.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Checkers/SValExplainer.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" @@ -46,6 +47,7 @@ class ExprInspectionChecker : public Checker(C.getCalleeName(CE)) - .Case("clang_analyzer_eval", &ExprInspectionChecker::analyzerEval) - .Case("clang_analyzer_checkInlined", - &ExprInspectionChecker::analyzerCheckInlined) - .Case("clang_analyzer_crash", &ExprInspectionChecker::analyzerCrash) - .Case("clang_analyzer_warnIfReached", - &ExprInspectionChecker::analyzerWarnIfReached) - .Case("clang_analyzer_warnOnDeadSymbol", - &ExprInspectionChecker::analyzerWarnOnDeadSymbol) - .StartsWith("clang_analyzer_explain", &ExprInspectionChecker::analyzerExplain) - .StartsWith("clang_analyzer_dump", &ExprInspectionChecker::analyzerDump) - .Case("clang_analyzer_getExtent", &ExprInspectionChecker::analyzerGetExtent) - .Case("clang_analyzer_printState", - &ExprInspectionChecker::analyzerPrintState) - .Case("clang_analyzer_numTimesReached", - &ExprInspectionChecker::analyzerNumTimesReached) - .Case("clang_analyzer_hashDump", &ExprInspectionChecker::analyzerHashDump) - .Case("clang_analyzer_denote", &ExprInspectionChecker::analyzerDenote) - .Case("clang_analyzer_express", &ExprInspectionChecker::analyzerExpress) - .Default(nullptr); + FnCheck Handler = + llvm::StringSwitch(C.getCalleeName(CE)) + .Case("clang_analyzer_eval", &ExprInspectionChecker::analyzerEval) + .Case("clang_analyzer_checkInlined", + &ExprInspectionChecker::analyzerCheckInlined) + .Case("clang_analyzer_crash", &ExprInspectionChecker::analyzerCrash) + .Case("clang_analyzer_warnIfReached", + &ExprInspectionChecker::analyzerWarnIfReached) + .Case("clang_analyzer_warnOnDeadSymbol", + &ExprInspectionChecker::analyzerWarnOnDeadSymbol) + .StartsWith("clang_analyzer_explain", + &ExprInspectionChecker::analyzerExplain) + .StartsWith("clang_analyzer_dump", + &ExprInspectionChecker::analyzerDump) + .Case("clang_analyzer_getExtent", + &ExprInspectionChecker::analyzerGetExtent) + .Case("clang_analyzer_printState", + &ExprInspectionChecker::analyzerPrintState) + .Case("clang_analyzer_numTimesReached", + &ExprInspectionChecker::analyzerNumTimesReached) + .Case("clang_analyzer_hashDump", + &ExprInspectionChecker::analyzerHashDump) + .Case("clang_analyzer_denote", &ExprInspectionChecker::analyzerDenote) + .Case("clang_analyzer_express", + &ExprInspectionChecker::analyzerExpress) + .StartsWith("clang_analyzer_isTainted", + &ExprInspectionChecker::analyzerIsTainted) + .Default(nullptr); if (!Handler) return false; @@ -412,6 +422,17 @@ void ExprInspectionChecker::analyzerExpress(const CallExpr *CE, reportBug(*Str, C); } +void ExprInspectionChecker::analyzerIsTainted(const CallExpr *CE, + CheckerContext &C) const { + if (CE->getNumArgs() != 1) { + reportBug("clang_analyzer_isTainted() requires exactly one argument", C); + return; + } + const bool IsTainted = + taint::isTainted(C.getState(), CE->getArg(0), C.getLocationContext()); + reportBug(IsTainted ? "YES" : "NO", C); +} + void ento::registerExprInspectionChecker(CheckerManager &Mgr) { Mgr.registerChecker(); } diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 801b30a9ab6c6c..1b13c49713ba1d 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1257,6 +1257,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPTaskwaitDirectiveClass: case Stmt::OMPTaskgroupDirectiveClass: case Stmt::OMPFlushDirectiveClass: + case Stmt::OMPDepobjDirectiveClass: case Stmt::OMPOrderedDirectiveClass: case Stmt::OMPAtomicDirectiveClass: case Stmt::OMPTargetDirectiveClass: diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index ae5bc687553b7f..9e12d8b603bfe4 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -183,6 +183,16 @@ llvm::ArrayRef TokenBuffer::spelledTokens(FileID FID) const { return It->second.SpelledTokens; } +const syntax::Token *TokenBuffer::spelledTokenAt(SourceLocation Loc) const { + assert(Loc.isFileID()); + const auto *Tok = llvm::partition_point( + spelledTokens(SourceMgr->getFileID(Loc)), + [&](const syntax::Token &Tok) { return Tok.location() < Loc; }); + if (!Tok || Tok->location() != Loc) + return nullptr; + return Tok; +} + std::string TokenBuffer::Mapping::str() const { return std::string( llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})", diff --git a/clang/test/Analysis/debug-exprinspection-istainted.c b/clang/test/Analysis/debug-exprinspection-istainted.c new file mode 100644 index 00000000000000..e2f6821e4aa9ab --- /dev/null +++ b/clang/test/Analysis/debug-exprinspection-istainted.c @@ -0,0 +1,27 @@ +// RUN: %clang_analyze_cc1 -verify %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: -analyzer-checker=alpha.security.taint + +int scanf(const char *restrict format, ...); +void clang_analyzer_isTainted(char); +void clang_analyzer_isTainted_any_suffix(char); +void clang_analyzer_isTainted_many_arguments(char, int, int); + +void foo() { + char buf[32] = ""; + clang_analyzer_isTainted(buf[0]); // expected-warning {{NO}} + clang_analyzer_isTainted_any_suffix(buf[0]); // expected-warning {{NO}} + scanf("%s", buf); + clang_analyzer_isTainted(buf[0]); // expected-warning {{YES}} + clang_analyzer_isTainted_any_suffix(buf[0]); // expected-warning {{YES}} + + int tainted_value = buf[0]; // no-warning +} + +void exactly_one_argument_required() { + char buf[32] = ""; + scanf("%s", buf); + clang_analyzer_isTainted_many_arguments(buf[0], 42, 42); + // expected-warning@-1 {{clang_analyzer_isTainted() requires exactly one argument}} +} diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt index 2c6487e8c26082..7fdc7d0be79f33 100644 --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -9,6 +9,15 @@ endif () string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} CLANG_TOOLS_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) +if(CLANG_BUILT_STANDALONE) + # Set HAVE_LIBZ according to recorded LLVM_ENABLE_ZLIB value. This + # value is forced to 0 if zlib was not found, so it is fine to use it + # instead of HAVE_LIBZ (not recorded). + if(LLVM_ENABLE_ZLIB) + set(HAVE_LIBZ 1) + endif() +endif() + llvm_canonicalize_cmake_booleans( CLANG_BUILD_EXAMPLES CLANG_ENABLE_ARCMT @@ -16,7 +25,7 @@ llvm_canonicalize_cmake_booleans( CLANG_SPAWN_CC1 ENABLE_BACKTRACES ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER - LLVM_ENABLE_ZLIB + HAVE_LIBZ LLVM_ENABLE_PER_TARGET_RUNTIME_DIR LLVM_ENABLE_PLUGINS LLVM_ENABLE_THREADS) diff --git a/clang/test/CodeGenCXX/debug-info-template-parameter.cpp b/clang/test/CodeGenCXX/debug-info-template-parameter.cpp new file mode 100644 index 00000000000000..95e7a187fe1026 --- /dev/null +++ b/clang/test/CodeGenCXX/debug-info-template-parameter.cpp @@ -0,0 +1,29 @@ +// Test for DebugInfo for Defaulted parameters for C++ templates +// Supported: -O0, standalone DI + +// RUN: %clang_cc1 -dwarf-version=5 -emit-llvm -triple x86_64-linux-gnu %s -o - \ +// RUN: -O0 -disable-llvm-passes \ +// RUN: -debug-info-kind=standalone \ +// RUN: | FileCheck %s + +// CHECK: DILocalVariable(name: "f1", {{.*}}, type: ![[TEMPLATE_TYPE:[0-9]+]] +// CHECK: [[TEMPLATE_TYPE]] = {{.*}}!DICompositeType({{.*}}, templateParams: ![[F1_TYPE:[0-9]+]] +// CHECK: [[F1_TYPE]] = !{![[FIRST:[0-9]+]], ![[SECOND:[0-9]+]]} +// CHECK: [[FIRST]] = !DITemplateTypeParameter(name: "T", type: !{{[0-9]*}}) +// CHECK: [[SECOND]] = !DITemplateValueParameter(name: "i", type: !{{[0-9]*}}, value: i32 6) + +// CHECK: DILocalVariable(name: "f2", {{.*}}, type: ![[TEMPLATE_TYPE:[0-9]+]] +// CHECK: [[TEMPLATE_TYPE]] = {{.*}}!DICompositeType({{.*}}, templateParams: ![[F2_TYPE:[0-9]+]] +// CHECK: [[F2_TYPE]] = !{![[FIRST:[0-9]+]], ![[SECOND:[0-9]+]]} +// CHECK: [[FIRST]] = !DITemplateTypeParameter(name: "T", type: !{{[0-9]*}}, defaulted: true) +// CHECK: [[SECOND]] = !DITemplateValueParameter(name: "i", type: !{{[0-9]*}}, defaulted: true, value: i32 3) + +template +class foo { +}; + +int main() { + foo f1; + foo<> f2; + return 0; +} diff --git a/clang/test/OpenMP/allocate_allocator_messages.cpp b/clang/test/OpenMP/allocate_allocator_messages.cpp index 0c4d36fc5f5695..3ab735acedb918 100644 --- a/clang/test/OpenMP/allocate_allocator_messages.cpp +++ b/clang/test/OpenMP/allocate_allocator_messages.cpp @@ -10,10 +10,10 @@ int sss; #pragma omp allocate(sss) allocat // expected-warning {{extra tokens at the end of '#pragma omp allocate' are ignored}} #pragma omp allocate(sss) allocate(sss) // expected-error {{unexpected OpenMP clause 'allocate' in directive '#pragma omp allocate'}} #pragma omp allocate(sss) allocator // expected-error {{expected '(' after 'allocator'}} -#pragma omp allocate(sss) allocator(0, // expected-error {{expected ')'}} expected-error {{omp_allocator_handle_t type not found; include }} expected-note {{to match this '('}} -#pragma omp allocate(sss) allocator(0,sss // expected-error {{expected ')'}} expected-error {{omp_allocator_handle_t type not found; include }} expected-note {{to match this '('}} -#pragma omp allocate(sss) allocator(0,sss) // expected-error {{expected ')'}} expected-error {{omp_allocator_handle_t type not found; include }} expected-note {{to match this '('}} -#pragma omp allocate(sss) allocator(sss) // expected-error {{omp_allocator_handle_t type not found; include }} +#pragma omp allocate(sss) allocator(0, // expected-error {{expected ')'}} expected-error {{'omp_allocator_handle_t' type not found; include }} expected-note {{to match this '('}} +#pragma omp allocate(sss) allocator(0,sss // expected-error {{expected ')'}} expected-error {{'omp_allocator_handle_t' type not found; include }} expected-note {{to match this '('}} +#pragma omp allocate(sss) allocator(0,sss) // expected-error {{expected ')'}} expected-error {{'omp_allocator_handle_t' type not found; include }} expected-note {{to match this '('}} +#pragma omp allocate(sss) allocator(sss) // expected-error {{'omp_allocator_handle_t' type not found; include }} typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; diff --git a/clang/test/OpenMP/depobj_ast_print.cpp b/clang/test/OpenMP/depobj_ast_print.cpp new file mode 100644 index 00000000000000..9d1d408c058c0e --- /dev/null +++ b/clang/test/OpenMP/depobj_ast_print.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +typedef void *omp_depend_t; + +void foo() {} + +template +T tmain(T argc) { + static T a; +#pragma omp depobj(a) depend(in:argc) +#pragma omp depobj(argc) destroy + return argc; +} +// CHECK: static T a; +// CHECK-NEXT: #pragma omp depobj (a) depend(in : argc){{$}} +// CHECK-NEXT: #pragma omp depobj (argc) destroy{{$}} +// CHECK: static void *a; +// CHECK-NEXT: #pragma omp depobj (a) depend(in : argc){{$}} +// CHECK-NEXT: #pragma omp depobj (argc) destroy{{$}} + +int main(int argc, char **argv) { + static omp_depend_t a; + omp_depend_t b; +// CHECK: static omp_depend_t a; +// CHECK-NEXT: omp_depend_t b; +#pragma omp depobj(a) depend(out:argc, argv) +#pragma omp depobj(b) destroy +// CHECK-NEXT: #pragma omp depobj (a) depend(out : argc,argv) +// CHECK-NEXT: #pragma omp depobj (b) destroy + (void)tmain(a), tmain(b); + return 0; +} + +#endif diff --git a/clang/test/OpenMP/depobj_messages.cpp b/clang/test/OpenMP/depobj_messages.cpp new file mode 100644 index 00000000000000..b820a0eb517d6d --- /dev/null +++ b/clang/test/OpenMP/depobj_messages.cpp @@ -0,0 +1,156 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ferror-limit 100 %s -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ferror-limit 100 %s -Wuninitialized + +struct S1 { // expected-note 2 {{declared here}} + int a; +} s; + +#pragma omp depobj(0) depend(in:s) // expected-error {{unexpected OpenMP directive '#pragma omp depobj'}} +void foo() { +#pragma omp depobj(0) depend(in:s) // expected-error {{'omp_depend_t' type not found; include }} expected-error {{expected lvalue expression}}} +} + +typedef void *omp_depend_t; + +template +T tmain(T argc) { + omp_depend_t x; +#pragma omp depobj() allocate(argc) // expected-error {{expected expression}} expected-error {{expected depobj expression}} expected-error {{unexpected OpenMP clause 'allocate' in directive '#pragma omp depobj'}} + ; +#pragma omp depobj(x) untied // expected-error {{unexpected OpenMP clause 'untied' in directive '#pragma omp depobj'}} +#pragma omp depobj(x) unknown // expected-warning {{extra tokens at the end of '#pragma omp depobj' are ignored}} + if (argc) +#pragma omp depobj(x) destroy // expected-error {{'#pragma omp depobj' cannot be an immediate substatement}} + if (argc) { +#pragma omp depobj(x) depend(in:s) + } + while (argc) +#pragma omp depobj(x) depend(in:s) // expected-error {{'#pragma omp depobj' cannot be an immediate substatement}} + while (argc) { +#pragma omp depobj(x) depend(in:s) + } + do +#pragma omp depobj(x) depend(in:s) // expected-error {{'#pragma omp depobj' cannot be an immediate substatement}} + while (argc) + ; + do { +#pragma omp depobj(x) depend(in:s) + } while (argc); + switch (argc) +#pragma omp depobj(x) depend(in:s) // expected-error {{'#pragma omp depobj' cannot be an immediate substatement}} + switch (argc) + case 1: +#pragma omp depobj(x) depend(in:s) // expected-error {{'#pragma omp depobj' cannot be an immediate substatement}} + switch (argc) + case 1: { +#pragma omp depobj(x) depend(in:s) + } + switch (argc) { +#pragma omp depobj(x) depend(in:s) + case 1: +#pragma omp depobj(x) depend(in:s) + break; + default: { +#pragma omp depobj(x) depend(in:s) + } break; + } + for (;;) +#pragma omp depobj(x) depend(in:s) // expected-error {{'#pragma omp depobj' cannot be an immediate substatement}} + for (;;) { +#pragma omp depobj(x) depend(in:s) + } +label: +#pragma omp depobj(x) depend(in:s) +label1 : { +#pragma omp depobj(x) depend(in:s) +} + +#pragma omp depobj // expected-error {{expected depobj expression}} +#pragma omp depobj( // expected-error {{expected expression}} expected-error {{expected depobj expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} +#pragma omp depobj() // expected-error {{expected expression}} expected-error {{expected depobj expression}} +#pragma omp depobj(argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected lvalue expression of 'omp_depend_t' type, not 'int'}}} +#pragma omp depobj(argc, // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected lvalue expression of 'omp_depend_t' type, not 'int'}} +#pragma omp depobj(argc) // expected-error {{expected lvalue expression of 'omp_depend_t' type, not 'int'}} +#pragma omp depobj(S1) // expected-error {{'S1' does not refer to a value}} expected-error {{expected depobj expression}} +#pragma omp depobj(argc) depobj(argc) // expected-warning {{extra tokens at the end of '#pragma omp depobj' are ignored}} expected-error {{expected lvalue expression of 'omp_depend_t' type, not 'int'}}} +#pragma omp parallel depobj(argc) // expected-warning {{extra tokens at the end of '#pragma omp parallel' are ignored}} + ; + return T(); +} + +int main(int argc, char **argv) { +omp_depend_t x; +#pragma omp depobj(x) depend(in:s) + ; +#pragma omp depobj(x) untied // expected-error {{unexpected OpenMP clause 'untied' in directive '#pragma omp depobj'}} +#pragma omp depobj(x) unknown // expected-warning {{extra tokens at the end of '#pragma omp depobj' are ignored}} + if (argc) +#pragma omp depobj(x) depend(in:s) // expected-error {{'#pragma omp depobj' cannot be an immediate substatement}} + if (argc) { +#pragma omp depobj(x) depend(in:s) + } + while (argc) +#pragma omp depobj(x) depend(in:s) // expected-error {{'#pragma omp depobj' cannot be an immediate substatement}} + while (argc) { +#pragma omp depobj(x) depend(in:s) + } + do +#pragma omp depobj(x) depend(in:s) // expected-error {{'#pragma omp depobj' cannot be an immediate substatement}} + while (argc) + ; + do { +#pragma omp depobj(x) depend(in:s) + } while (argc); + switch (argc) +#pragma omp depobj(x) depend(in:s) // expected-error {{'#pragma omp depobj' cannot be an immediate substatement}} + switch (argc) + case 1: +#pragma omp depobj(x) depend(in:s) // expected-error {{'#pragma omp depobj' cannot be an immediate substatement}} + switch (argc) + case 1: { +#pragma omp depobj(x) depend(in:s) + } + switch (argc) { +#pragma omp depobj(x) depend(in:s) + case 1: +#pragma omp depobj(x) depend(in:s) + break; + default: { +#pragma omp depobj(x) depend(in:s) + } break; + } + for (;;) +#pragma omp depobj(x) depend(in:s) // expected-error {{'#pragma omp depobj' cannot be an immediate substatement}} + for (;;) { +#pragma omp depobj(x) depend(in:s) + } +label: +#pragma omp depobj(x) depend(in:s) +label1 : { +#pragma omp depobj(x) depend(in:s) +} + +#pragma omp depobj // expected-error {{expected depobj expression}} +#pragma omp depobj( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected depobj expression}} +#pragma omp depobj() // expected-error {{expected expression}} expected-error {{expected depobj expression}} +#pragma omp depobj(argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected lvalue expression of 'omp_depend_t' type, not 'int'}} +#pragma omp depobj(argc, // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected lvalue expression of 'omp_depend_t' type, not 'int'}} +#pragma omp depobj(argc) // expected-error {{expected lvalue expression of 'omp_depend_t' type, not 'int'}} +#pragma omp depobj(S1) // expected-error {{'S1' does not refer to a value}} expected-error {{expected depobj expression}} +#pragma omp depobj(argc) depobj(argc) // expected-warning {{extra tokens at the end of '#pragma omp depobj' are ignored}} expected-error {{expected lvalue expression of 'omp_depend_t' type, not 'int'}} +#pragma omp parallel depobj(argc) // expected-warning {{extra tokens at the end of '#pragma omp parallel' are ignored}} + ; +#pragma omp depobj(x) seq_cst // expected-error {{unexpected OpenMP clause 'seq_cst' in directive '#pragma omp depobj'}} +#pragma omp depobj(x) depend(in: x) +#pragma omp depobj(x) destroy destroy // expected-error {{directive '#pragma omp depobj' cannot contain more than one 'destroy' clause}} +#pragma omp depobj(x) depend(in: x) destroy // expected-error {{exactly one of 'depend', 'destroy', or 'update' clauses is expected}} +#pragma omp depobj(x) destroy depend(in: x) // expected-error {{exactly one of 'depend', 'destroy', or 'update' clauses is expected}} +#pragma omp depobj(x) (x) depend(in: x) // expected-warning {{extra tokens at the end of '#pragma omp depobj' are ignored}} +#pragma omp depobj(x) depend(in: x) depend(out:x) // expected-error {{exactly one of 'depend', 'destroy', or 'update' clauses is expected}} +#pragma omp depend(out:x) depobj(x) // expected-error {{expected an OpenMP directive}} +#pragma omp destroy depobj(x) // expected-error {{expected an OpenMP directive}} +#pragma omp depobj depend(in:x) (x) // expected-error {{expected depobj expression}} expected-warning {{extra tokens at the end of '#pragma omp depobj' are ignored}} +#pragma omp depobj destroy (x) // expected-error {{expected depobj expression}} expected-warning {{extra tokens at the end of '#pragma omp depobj' are ignored}} + return tmain(argc); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} +} diff --git a/clang/test/OpenMP/flush_messages.cpp b/clang/test/OpenMP/flush_messages.cpp index 51497249a8f386..7d20e385bfafac 100644 --- a/clang/test/OpenMP/flush_messages.cpp +++ b/clang/test/OpenMP/flush_messages.cpp @@ -142,7 +142,7 @@ label1 : { #pragma omp flush seq_cst // expected-error {{unexpected OpenMP clause 'seq_cst' in directive '#pragma omp flush'}} #pragma omp flush acq_rel acquire // omp45-error {{unexpected OpenMP clause 'acq_rel' in directive '#pragma omp flush'}} omp45-error {{unexpected OpenMP clause 'acquire' in directive '#pragma omp flush'}} omp50-error {{directive '#pragma omp flush' cannot contain more than one 'acq_rel', 'acquire' or 'release' clause}} omp50-note {{'acq_rel' clause used here}} #pragma omp flush release acquire // omp45-error {{unexpected OpenMP clause 'release' in directive '#pragma omp flush'}} omp45-error {{unexpected OpenMP clause 'acquire' in directive '#pragma omp flush'}} omp50-error {{directive '#pragma omp flush' cannot contain more than one 'acq_rel', 'acquire' or 'release' clause}} omp50-note {{'release' clause used here}} -#pragma omp flush acq_rel (argc) // omp45-error {{unexpected OpenMP clause 'acq_rel' in directive '#pragma omp flush'}} omp50-error {{'flush' directive with memory order clause 'acq_rel' cannot have the list}} omp50-note {{memory order clause 'acq_rel' is specified here}} +#pragma omp flush acq_rel (argc) // omp45-error {{unexpected OpenMP clause 'acq_rel' in directive '#pragma omp flush'}} expected-warning {{extra tokens at the end of '#pragma omp flush' are ignored}} #pragma omp flush(argc) acq_rel // omp45-error {{unexpected OpenMP clause 'acq_rel' in directive '#pragma omp flush'}} omp50-error {{'flush' directive with memory order clause 'acq_rel' cannot have the list}} omp50-note {{memory order clause 'acq_rel' is specified here}} return tmain(argc); } diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c index 380e5e2d726181..df2a6128989bde 100644 --- a/clang/test/Preprocessor/init-aarch64.c +++ b/clang/test/Preprocessor/init-aarch64.c @@ -236,7 +236,7 @@ // AARCH64-NEXT: #define __STDC_HOSTED__ 1 // AARCH64-NEXT: #define __STDC_UTF_16__ 1 // AARCH64-NEXT: #define __STDC_UTF_32__ 1 -// AARCH64_C: #define __STDC_VERSION__ 201112L +// AARCH64_C: #define __STDC_VERSION__ 201710L // AARCH64-NEXT: #define __STDC__ 1 // AARCH64-NEXT: #define __UINT16_C_SUFFIX__ // AARCH64-NEXT: #define __UINT16_FMTX__ "hX" @@ -646,7 +646,7 @@ // AARCH64-MSVC: #define __STDC_HOSTED__ 0 // AARCH64-MSVC: #define __STDC_UTF_16__ 1 // AARCH64-MSVC: #define __STDC_UTF_32__ 1 -// AARCH64-MSVC: #define __STDC_VERSION__ 201112L +// AARCH64-MSVC: #define __STDC_VERSION__ 201710L // AARCH64-MSVC: #define __STDC__ 1 // AARCH64-MSVC: #define __UINT16_C_SUFFIX__ // AARCH64-MSVC: #define __UINT16_MAX__ 65535 diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c index f38f87ddef8e63..e987a3b3b93d9f 100644 --- a/clang/test/Preprocessor/init.c +++ b/clang/test/Preprocessor/init.c @@ -117,7 +117,7 @@ // RUN: %clang_cc1 -E -dM -triple=x86_64-apple-darwin < /dev/null | FileCheck -match-full-lines -check-prefix C-DEFAULT %s // RUN: %clang_cc1 -E -dM -triple=armv7a-apple-darwin < /dev/null | FileCheck -match-full-lines -check-prefix C-DEFAULT %s // -// C-DEFAULT:#define __STDC_VERSION__ 201112L +// C-DEFAULT:#define __STDC_VERSION__ 201710L // // RUN: %clang_cc1 -ffreestanding -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix FREESTANDING %s // FREESTANDING:#define __STDC_HOSTED__ 0 @@ -2098,7 +2098,7 @@ // MIPS32BE:#define __SIZE_WIDTH__ 32 // MIPS32BE-CXX:#define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 8U // MIPS32BE:#define __STDC_HOSTED__ 0 -// MIPS32BE-C:#define __STDC_VERSION__ 201112L +// MIPS32BE-C:#define __STDC_VERSION__ 201710L // MIPS32BE:#define __STDC__ 1 // MIPS32BE:#define __UINT16_C_SUFFIX__ // MIPS32BE:#define __UINT16_MAX__ 65535 @@ -2557,7 +2557,7 @@ // MIPSN32BE: #define __STDC_HOSTED__ 0 // MIPSN32BE: #define __STDC_UTF_16__ 1 // MIPSN32BE: #define __STDC_UTF_32__ 1 -// MIPSN32BE-C: #define __STDC_VERSION__ 201112L +// MIPSN32BE-C: #define __STDC_VERSION__ 201710L // MIPSN32BE: #define __STDC__ 1 // MIPSN32BE: #define __UINT16_C_SUFFIX__ // MIPSN32BE: #define __UINT16_FMTX__ "hX" @@ -2864,7 +2864,7 @@ // MIPSN32EL: #define __STDC_HOSTED__ 0 // MIPSN32EL: #define __STDC_UTF_16__ 1 // MIPSN32EL: #define __STDC_UTF_32__ 1 -// MIPSN32EL: #define __STDC_VERSION__ 201112L +// MIPSN32EL: #define __STDC_VERSION__ 201710L // MIPSN32EL: #define __STDC__ 1 // MIPSN32EL: #define __UINT16_C_SUFFIX__ // MIPSN32EL: #define __UINT16_FMTX__ "hX" @@ -5390,7 +5390,7 @@ // PPC-DARWIN:#define __SIZE_TYPE__ long unsigned int // PPC-DARWIN:#define __SIZE_WIDTH__ 32 // PPC-DARWIN:#define __STDC_HOSTED__ 0 -// PPC-DARWIN:#define __STDC_VERSION__ 201112L +// PPC-DARWIN:#define __STDC_VERSION__ 201710L // PPC-DARWIN:#define __STDC__ 1 // PPC-DARWIN:#define __UINT16_C_SUFFIX__ // PPC-DARWIN:#define __UINT16_MAX__ 65535 @@ -6602,7 +6602,7 @@ // X86_64-CLOUDABI:#define __STDC_ISO_10646__ 201206L // X86_64-CLOUDABI:#define __STDC_UTF_16__ 1 // X86_64-CLOUDABI:#define __STDC_UTF_32__ 1 -// X86_64-CLOUDABI:#define __STDC_VERSION__ 201112L +// X86_64-CLOUDABI:#define __STDC_VERSION__ 201710L // X86_64-CLOUDABI:#define __STDC__ 1 // X86_64-CLOUDABI:#define __UINT16_C_SUFFIX__ // X86_64-CLOUDABI:#define __UINT16_FMTX__ "hX" @@ -7601,7 +7601,7 @@ // WEBASSEMBLY-NOT:#define __STDC_NO_THREADS__ // WEBASSEMBLY-NEXT:#define __STDC_UTF_16__ 1 // WEBASSEMBLY-NEXT:#define __STDC_UTF_32__ 1 -// WEBASSEMBLY-NEXT:#define __STDC_VERSION__ 201112L +// WEBASSEMBLY-NEXT:#define __STDC_VERSION__ 201710L // WEBASSEMBLY-NEXT:#define __STDC__ 1 // WEBASSEMBLY-NEXT:#define __UINT16_C_SUFFIX__ // WEBASSEMBLY-NEXT:#define __UINT16_FMTX__ "hX" @@ -8166,7 +8166,7 @@ // RISCV32: #define __STDC_HOSTED__ 0 // RISCV32: #define __STDC_UTF_16__ 1 // RISCV32: #define __STDC_UTF_32__ 1 -// RISCV32: #define __STDC_VERSION__ 201112L +// RISCV32: #define __STDC_VERSION__ 201710L // RISCV32: #define __STDC__ 1 // RISCV32: #define __UINT16_C_SUFFIX__ // RISCV32: #define __UINT16_MAX__ 65535 @@ -8373,7 +8373,7 @@ // RISCV64: #define __STDC_HOSTED__ 0 // RISCV64: #define __STDC_UTF_16__ 1 // RISCV64: #define __STDC_UTF_32__ 1 -// RISCV64: #define __STDC_VERSION__ 201112L +// RISCV64: #define __STDC_VERSION__ 201710L // RISCV64: #define __STDC__ 1 // RISCV64: #define __UINT16_C_SUFFIX__ // RISCV64: #define __UINT16_MAX__ 65535 diff --git a/clang/test/Sema/fallthrough-comment.c b/clang/test/Sema/fallthrough-comment.c deleted file mode 100644 index 85d1257932f668..00000000000000 --- a/clang/test/Sema/fallthrough-comment.c +++ /dev/null @@ -1,20 +0,0 @@ -// RUN: %clang_cc1 -fsyntax-only -std=c11 -verify -Wimplicit-fallthrough %s - -int fallthrough_comment(int n) { - switch (n) { - case 0: - n++; - // FALLTHROUGH - case 1: - n++; - - /*fall-through.*/ - - case 2: - n++; - case 3: // expected-warning{{unannotated fall-through between switch labels}} expected-note{{insert '__attribute__((fallthrough));' to silence this warning}} expected-note{{insert 'break;' to avoid fall-through}} - n++; - break; - } - return n; -} diff --git a/clang/test/Sema/warn-documentation.m b/clang/test/Sema/warn-documentation.m index c713d5b07f85e9..5d60a52ae6fed7 100644 --- a/clang/test/Sema/warn-documentation.m +++ b/clang/test/Sema/warn-documentation.m @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -fsyntax-only -fblocks -Wno-objc-root-class -Wdocumentation -Wdocumentation-pedantic -verify %s +// RUN: %clang_cc1 -xobjective-c++ -fsyntax-only -fblocks -Wno-objc-root-class -Wdocumentation -Wdocumentation-pedantic -verify %s @class NSString; @@ -318,3 +319,10 @@ @interface CheckFunctionBlockPointerVars { // expected-warning@-1 {{'\return' command used in a comment that is not attached to a function or method declaration}} VoidBlockTypeCall ^e; ///< \return none // expected-warning@-1 {{'\return' command used in a comment that is not attached to a function or method declaration}} + +#ifdef __cplusplus +@interface HasAnonNamespace @end +@implementation HasAnonNamespace +namespace {} +@end +#endif diff --git a/clang/test/SemaObjC/signed-char-bool-conversion.m b/clang/test/SemaObjC/signed-char-bool-conversion.m index 6945d86fc26d17..183f60fafcd5ad 100644 --- a/clang/test/SemaObjC/signed-char-bool-conversion.m +++ b/clang/test/SemaObjC/signed-char-bool-conversion.m @@ -69,6 +69,11 @@ void t3(struct has_bf *bf) { b = local.nested->unsigned_bf2; // expected-warning{{implicit conversion from integral type 'unsigned int' to 'BOOL'}} } +void t4(BoolProp *bp) { + BOOL local = YES; + bp.p = 1 ? local : NO; // no warning +} + __attribute__((objc_root_class)) @interface BFIvar { struct has_bf bf; diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in index 39c8b47adf926e..62616d9a2b959b 100644 --- a/clang/test/lit.site.cfg.py.in +++ b/clang/test/lit.site.cfg.py.in @@ -16,7 +16,7 @@ config.host_triple = "@LLVM_HOST_TRIPLE@" config.target_triple = "@TARGET_TRIPLE@" config.host_cxx = "@CMAKE_CXX_COMPILER@" config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" -config.have_zlib = @LLVM_ENABLE_ZLIB@ +config.have_zlib = @HAVE_LIBZ@ config.clang_arcmt = @CLANG_ENABLE_ARCMT@ config.clang_default_cxx_stdlib = "@CLANG_DEFAULT_CXX_STDLIB@" config.clang_staticanalyzer = @CLANG_ENABLE_STATIC_ANALYZER@ diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 6f32240fe6e48b..62dc0e2b8f9265 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2047,6 +2047,7 @@ class EnqueueVisitor : public ConstStmtVisitor { VisitOMPCancellationPointDirective(const OMPCancellationPointDirective *D); void VisitOMPCancelDirective(const OMPCancelDirective *D); void VisitOMPFlushDirective(const OMPFlushDirective *D); + void VisitOMPDepobjDirective(const OMPDepobjDirective *D); void VisitOMPOrderedDirective(const OMPOrderedDirective *D); void VisitOMPAtomicDirective(const OMPAtomicDirective *D); void VisitOMPTargetDirective(const OMPTargetDirective *D); @@ -2249,6 +2250,8 @@ void OMPClauseEnqueue::VisitOMPSIMDClause(const OMPSIMDClause *) {} void OMPClauseEnqueue::VisitOMPNogroupClause(const OMPNogroupClause *) {} +void OMPClauseEnqueue::VisitOMPDestroyClause(const OMPDestroyClause *) {} + void OMPClauseEnqueue::VisitOMPUnifiedAddressClause( const OMPUnifiedAddressClause *) {} @@ -2444,6 +2447,9 @@ OMPClauseEnqueue::VisitOMPCopyprivateClause(const OMPCopyprivateClause *C) { void OMPClauseEnqueue::VisitOMPFlushClause(const OMPFlushClause *C) { VisitOMPClauseList(C); } +void OMPClauseEnqueue::VisitOMPDepobjClause(const OMPDepobjClause *C) { + Visitor->AddStmt(C->getDepobj()); +} void OMPClauseEnqueue::VisitOMPDependClause(const OMPDependClause *C) { VisitOMPClauseList(C); } @@ -2871,6 +2877,10 @@ void EnqueueVisitor::VisitOMPFlushDirective(const OMPFlushDirective *D) { VisitOMPExecutableDirective(D); } +void EnqueueVisitor::VisitOMPDepobjDirective(const OMPDepobjDirective *D) { + VisitOMPExecutableDirective(D); +} + void EnqueueVisitor::VisitOMPOrderedDirective(const OMPOrderedDirective *D) { VisitOMPExecutableDirective(D); } @@ -2883,8 +2893,8 @@ void EnqueueVisitor::VisitOMPTargetDirective(const OMPTargetDirective *D) { VisitOMPExecutableDirective(D); } -void EnqueueVisitor::VisitOMPTargetDataDirective(const - OMPTargetDataDirective *D) { +void EnqueueVisitor::VisitOMPTargetDataDirective( + const OMPTargetDataDirective *D) { VisitOMPExecutableDirective(D); } @@ -5503,6 +5513,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPTaskgroupDirective"); case CXCursor_OMPFlushDirective: return cxstring::createRef("OMPFlushDirective"); + case CXCursor_OMPDepobjDirective: + return cxstring::createRef("OMPDepobjDirective"); case CXCursor_OMPOrderedDirective: return cxstring::createRef("OMPOrderedDirective"); case CXCursor_OMPAtomicDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 04b713c68b8079..e10c742c65eae7 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -635,6 +635,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPFlushDirectiveClass: K = CXCursor_OMPFlushDirective; break; + case Stmt::OMPDepobjDirectiveClass: + K = CXCursor_OMPDepobjDirective; + break; case Stmt::OMPOrderedDirectiveClass: K = CXCursor_OMPOrderedDirective; break; diff --git a/clang/unittests/Format/FormatTestCSharp.cpp b/clang/unittests/Format/FormatTestCSharp.cpp index 0bc49856375b00..d22e0da82321ec 100644 --- a/clang/unittests/Format/FormatTestCSharp.cpp +++ b/clang/unittests/Format/FormatTestCSharp.cpp @@ -607,6 +607,7 @@ TEST_F(FormatTestCSharp, CSharpSpaces) { Style.SpacesInSquareBrackets = true; verifyFormat(R"(private float[ , ] Values;)", Style); + verifyFormat(R"(string dirPath = args?[ 0 ];)", Style); } TEST_F(FormatTestCSharp, CSharpNullableTypes) { diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp index ad0293bc3e072d..d4b015393286bd 100644 --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -59,6 +59,7 @@ using ::testing::ElementsAre; using ::testing::Field; using ::testing::Matcher; using ::testing::Not; +using ::testing::Pointee; using ::testing::StartsWith; namespace { @@ -363,6 +364,12 @@ TEST_F(TokenCollectorTest, Locations) { AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))))); + + auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()); + for (auto &R : Code.ranges()) { + EXPECT_THAT(Buffer.spelledTokenAt(StartLoc.getLocWithOffset(R.Begin)), + Pointee(RangeIs(R))); + } } TEST_F(TokenCollectorTest, MacroDirectives) { diff --git a/clang/www/compatibility.html b/clang/www/compatibility.html index 9f8ee4bdc01233..a593155951dae4 100755 --- a/clang/www/compatibility.html +++ b/clang/www/compatibility.html @@ -83,7 +83,7 @@

C compatibility

C99 inline functions

-

By default, Clang builds C code in GNU C11 mode, so it uses standard C99 +

By default, Clang builds C code in GNU C17 mode, so it uses standard C99 semantics for the inline keyword. These semantics are different from those in GNU C89 mode, which is the default mode in versions of GCC prior to 5.0. For example, consider the following code:

diff --git a/compiler-rt/lib/fuzzer/FuzzerUtil.cpp b/compiler-rt/lib/fuzzer/FuzzerUtil.cpp index 87180d1ea85d52..7eecb68d0729da 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtil.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtil.cpp @@ -161,20 +161,21 @@ std::string Base64(const Unit &U) { size_t i = 0, j = 0; for (size_t n = U.size() / 3 * 3; i < n; i += 3, j += 4) { - uint32_t x = (U[i] << 16) | (U[i + 1] << 8) | U[i + 2]; + uint32_t x = ((unsigned char)U[i] << 16) | ((unsigned char)U[i + 1] << 8) | + (unsigned char)U[i + 2]; Buffer[j + 0] = Table[(x >> 18) & 63]; Buffer[j + 1] = Table[(x >> 12) & 63]; Buffer[j + 2] = Table[(x >> 6) & 63]; Buffer[j + 3] = Table[x & 63]; } if (i + 1 == U.size()) { - uint32_t x = (U[i] << 16); + uint32_t x = ((unsigned char)U[i] << 16); Buffer[j + 0] = Table[(x >> 18) & 63]; Buffer[j + 1] = Table[(x >> 12) & 63]; Buffer[j + 2] = '='; Buffer[j + 3] = '='; } else if (i + 2 == U.size()) { - uint32_t x = (U[i] << 16) | (U[i + 1] << 8); + uint32_t x = ((unsigned char)U[i] << 16) | ((unsigned char)U[i + 1] << 8); Buffer[j + 0] = Table[(x >> 18) & 63]; Buffer[j + 1] = Table[(x >> 12) & 63]; Buffer[j + 2] = Table[(x >> 6) & 63]; diff --git a/compiler-rt/test/lit.common.configured.in b/compiler-rt/test/lit.common.configured.in index 4de8d030070f3c..60464bcdaa877c 100644 --- a/compiler-rt/test/lit.common.configured.in +++ b/compiler-rt/test/lit.common.configured.in @@ -51,7 +51,7 @@ if config.enable_per_target_runtime_dir: else: set_default("target_suffix", "-%s" % config.target_arch) -set_default("have_zlib", "@LLVM_ENABLE_ZLIB@") +set_default("have_zlib", "@HAVE_LIBZ@") set_default("libcxx_used", "@LLVM_LIBCXX_USED@") # LLVM tools dir can be passed in lit parameters, so try to diff --git a/compiler-rt/test/profile/instrprof-merging.cpp b/compiler-rt/test/profile/instrprof-merging.cpp index 26c191a715372b..692b049ec45c31 100644 --- a/compiler-rt/test/profile/instrprof-merging.cpp +++ b/compiler-rt/test/profile/instrprof-merging.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: powerpc64 // 1) Compile shared code into different object files and into an executable. // RUN: %clangxx_profgen -std=c++14 -fcoverage-mapping %s -c -o %t.v1.o \ diff --git a/compiler-rt/test/ubsan/TestCases/Misc/nullability.c b/compiler-rt/test/ubsan/TestCases/Misc/nullability.c index 849d7ee203c62a..50295fe503f9ed 100644 --- a/compiler-rt/test/ubsan/TestCases/Misc/nullability.c +++ b/compiler-rt/test/ubsan/TestCases/Misc/nullability.c @@ -1,3 +1,4 @@ +// UNSUPPORTED: android // RUN: %clang -w -fsanitize=nullability-arg,nullability-assign,nullability-return %s -O3 -o %t // RUN: %run %t foo 2>&1 | count 0 // RUN: %run %t 2>&1 | FileCheck %s @@ -5,11 +6,7 @@ // RUN: echo "nullability-arg:nullability.c" > %t.supp // RUN: echo "nullability-return:nullability.c" >> %t.supp // RUN: echo "nullability-assign:nullability.c" >> %t.supp -// RUN: UBSAN_OPTIONS=suppressions=%t.supp %run %t -// -// XXX: This test is failing on the sanitizer-x86_64-linux-android, but not -// in a way that provides debuggable output. Relax the check so we can debug. -// 2>&1 | FileCheck -allow-empty -check-prefix=SUPPRESS %s +// RUN: UBSAN_OPTIONS=suppressions=%t.supp %run %t 2>&1 | FileCheck -allow-empty -check-prefix=SUPPRESS %s // SUPPRESS-NOT: runtime error // CHECK: nullability.c:[[@LINE+2]]:41: runtime error: null pointer returned from function declared to never return null diff --git a/libc/src/signal/linux/CMakeLists.txt b/libc/src/signal/linux/CMakeLists.txt index 53bf5fc0f56b27..022f41b5a0ebb5 100644 --- a/libc/src/signal/linux/CMakeLists.txt +++ b/libc/src/signal/linux/CMakeLists.txt @@ -35,6 +35,7 @@ add_entrypoint_object( ../sigemptyset.h DEPENDS __errno_location + errno_h signal_h ) @@ -47,5 +48,6 @@ add_entrypoint_object( ../sigaddset.h DEPENDS __errno_location + errno_h signal_h ) diff --git a/libcxx/include/__config b/libcxx/include/__config index b14cc84eadeaa5..9bd7fc9932c858 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -102,6 +102,9 @@ # define _LIBCPP_ABI_OPTIMIZED_FUNCTION // All the regex constants must be distinct and nonzero. # define _LIBCPP_ABI_REGEX_CONSTANTS_NONZERO +// Re-worked external template instantiations for std::string with a focus on +// performance and fast-path inlining. +# define _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION #elif _LIBCPP_ABI_VERSION == 1 # if !defined(_LIBCPP_OBJECT_FORMAT_COFF) // Enable compiling copies of now inline methods into the dylib to support diff --git a/libcxx/include/string b/libcxx/include/string index 7688d3ff29ec96..c2a4220e276a89 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -4381,7 +4381,7 @@ basic_string<_CharT, _Traits, _Allocator>::__subscriptable(const const_iterator* #endif // _LIBCPP_DEBUG_LEVEL >= 2 -#if defined(_LIBCPP_ABI_UNSTABLE) || _LIBCPP_ABI_VERSION >= 2 +#ifdef _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION _LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, char) _LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, wchar_t) #else diff --git a/libcxx/src/string.cpp b/libcxx/src/string.cpp index 0345170a70ee8a..5105594cf38b8d 100644 --- a/libcxx/src/string.cpp +++ b/libcxx/src/string.cpp @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __basic_string_common; -#if defined(_LIBCPP_ABI_UNSTABLE) || _LIBCPP_ABI_VERSION >= 2 +#ifdef _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION _LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE_DEFINE, char) _LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE_DEFINE, wchar_t) #else diff --git a/libcxx/utils/merge_archives.py b/libcxx/utils/merge_archives.py index 4c31854d2b7e35..cc96cb2aa50ce5 100755 --- a/libcxx/utils/merge_archives.py +++ b/libcxx/utils/merge_archives.py @@ -143,7 +143,7 @@ def main(): if args.use_libtool: files = [f for f in files if not f.startswith('__.SYMDEF')] - execute_command_verbose([libtool_exe, '-static', '-o', args.output] + files, + execute_command_verbose([libtool_exe, '-static', '-o', args.output, '-s'] + files, cwd=temp_directory_root, verbose=args.verbose) else: execute_command_verbose([ar_exe, 'rcs', args.output] + files, diff --git a/lld/docs/WebAssembly.rst b/lld/docs/WebAssembly.rst index 13ed0aeb94d4cc..b23f2cd462b4be 100644 --- a/lld/docs/WebAssembly.rst +++ b/lld/docs/WebAssembly.rst @@ -137,7 +137,7 @@ By default no undefined symbols are allowed in the final binary. The flag ``--allow-undefined`` results in a WebAssembly import being defined for each undefined symbol. It is then up to the runtime to provide such symbols. -Alternativly symbols can be marked in the source code as with the +Alternatively symbols can be marked in the source code as with the ``import_name`` and/or ``import_module`` clang attributes which signals that they are expected to be undefined at static link time. diff --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt index dc8cedf2ea095e..8be42c46dd8ad4 100644 --- a/lld/test/CMakeLists.txt +++ b/lld/test/CMakeLists.txt @@ -4,8 +4,17 @@ set(LLVM_BUILD_MODE "%(build_mode)s") set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s") set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/%(build_config)s") +if(LLD_BUILT_STANDALONE) + # Set HAVE_LIBZ according to recorded LLVM_ENABLE_ZLIB value. This + # value is forced to 0 if zlib was not found, so it is fine to use it + # instead of HAVE_LIBZ (not recorded). + if(LLVM_ENABLE_ZLIB) + set(HAVE_LIBZ 1) + endif() +endif() + llvm_canonicalize_cmake_booleans( - LLVM_ENABLE_ZLIB + HAVE_LIBZ LLVM_LIBXML2_ENABLED ) diff --git a/lld/test/ELF/lto/resolution-err.ll b/lld/test/ELF/lto/resolution-err.ll new file mode 100644 index 00000000000000..00cdd94059ac87 --- /dev/null +++ b/lld/test/ELF/lto/resolution-err.ll @@ -0,0 +1,16 @@ +; UNSUPPORTED: system-windows +; REQUIRES: shell +; RUN: llvm-as %s -o %t.bc +; RUN: touch %t.resolution.txt +; RUN: chmod -w %t.resolution.txt +; RUN: not ld.lld -save-temps %t.bc -o %t 2>&1 | FileCheck %s +; RUN: rm -f %t.resolution.txt + +; CHECK: error: {{[Pp]}}ermission denied{{$}} + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @_start() { + ret void +} diff --git a/lld/test/lit.site.cfg.py.in b/lld/test/lit.site.cfg.py.in index 531fce15839d52..02840f8d6a3036 100644 --- a/lld/test/lit.site.cfg.py.in +++ b/lld/test/lit.site.cfg.py.in @@ -14,7 +14,7 @@ config.lld_libs_dir = "@LLVM_LIBRARY_OUTPUT_INTDIR@" config.lld_tools_dir = "@LLVM_RUNTIME_OUTPUT_INTDIR@" config.target_triple = "@TARGET_TRIPLE@" config.python_executable = "@PYTHON_EXECUTABLE@" -config.have_zlib = @LLVM_ENABLE_ZLIB@ +config.have_zlib = @HAVE_LIBZ@ config.sizeof_void_p = @CMAKE_SIZEOF_VOID_P@ # Support substitution of the tools and libs dirs with user parameters. This is diff --git a/lldb/docs/conf.py b/lldb/docs/conf.py index bd95cbe6cd98cd..ca1d6f79092179 100644 --- a/lldb/docs/conf.py +++ b/lldb/docs/conf.py @@ -46,12 +46,14 @@ # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the -# built documents. +# built documents. These are currently set to zero because we don't use them. +# Should somebody consider in the future to change them, they need to be updated +# everytime a new release comes out. # # The short version. -version = '8' +#version = '0' # The full version, including alpha/beta/rc tags. -release = '8' +#release = '0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/lldb/docs/index.rst b/lldb/docs/index.rst index f1e1eda7609aa4..29f63b32838116 100644 --- a/lldb/docs/index.rst +++ b/lldb/docs/index.rst @@ -3,7 +3,7 @@ The LLDB Debugger ================= -Welcome to the LLDB version |release| documentation! +Welcome to the LLDB documentation! LLDB is a next generation, high-performance debugger. It is built as a set of reusable components which highly leverage existing libraries in the larger LLVM diff --git a/lldb/include/lldb/Core/Disassembler.h b/lldb/include/lldb/Core/Disassembler.h index 98f34f3e0cfa2e..521c8be2bbf8cc 100644 --- a/lldb/include/lldb/Core/Disassembler.h +++ b/lldb/include/lldb/Core/Disassembler.h @@ -446,13 +446,11 @@ class Disassembler : public std::enable_shared_from_this, uint32_t num_mixed_context_lines, uint32_t options, Stream &strm); - size_t ParseInstructions(const ExecutionContext *exe_ctx, - const AddressRange &range, Stream *error_strm_ptr, - bool prefer_file_cache); + size_t ParseInstructions(Target &target, const AddressRange &range, + Stream *error_strm_ptr, bool prefer_file_cache); - size_t ParseInstructions(const ExecutionContext *exe_ctx, - const Address &range, uint32_t num_instructions, - bool prefer_file_cache); + size_t ParseInstructions(Target &target, const Address &range, + uint32_t num_instructions, bool prefer_file_cache); virtual size_t DecodeInstructions(const Address &base_addr, const DataExtractor &data, diff --git a/lldb/include/lldb/Utility/RangeMap.h b/lldb/include/lldb/Utility/RangeMap.h index 53fb691323a68b..fb24c5a434792f 100644 --- a/lldb/include/lldb/Utility/RangeMap.h +++ b/lldb/include/lldb/Utility/RangeMap.h @@ -394,19 +394,31 @@ struct RangeData : public Range { RangeData(B base, S size, DataType d) : Range(base, size), data(d) {} }; +// We can treat the vector as a flattened Binary Search Tree, augmenting it +// with upper bounds (max of range endpoints) for every index allows us to +// query for range containment quicker. +template +struct AugmentedRangeData : public RangeData { + B upper_bound; + + AugmentedRangeData(const RangeData &rd) + : RangeData(rd), upper_bound() {} +}; + template > class RangeDataVector { public: typedef lldb_private::Range Range; typedef RangeData Entry; - typedef llvm::SmallVector Collection; + typedef AugmentedRangeData AugmentedEntry; + typedef llvm::SmallVector Collection; RangeDataVector(Compare compare = Compare()) : m_compare(compare) {} ~RangeDataVector() = default; - void Append(const Entry &entry) { m_entries.push_back(entry); } + void Append(const Entry &entry) { m_entries.emplace_back(entry); } void Sort() { if (m_entries.size() > 1) @@ -418,13 +430,13 @@ class RangeDataVector { return a.size < b.size; return compare(a.data, b.data); }); + if (!m_entries.empty()) + ComputeUpperBounds(0, m_entries.size()); } #ifdef ASSERT_RANGEMAP_ARE_SORTED bool IsSorted() const { typename Collection::const_iterator pos, end, prev; - // First we determine if we can combine any of the Entry objects so we - // don't end up allocating and making a new collection for no reason for (pos = m_entries.begin(), end = m_entries.end(), prev = end; pos != end; prev = pos++) { if (prev != end && *pos < *prev) @@ -494,26 +506,20 @@ class RangeDataVector { } uint32_t FindEntryIndexThatContains(B addr) const { - const Entry *entry = FindEntryThatContains(addr); + const AugmentedEntry *entry = + static_cast(FindEntryThatContains(addr)); if (entry) return std::distance(m_entries.begin(), entry); return UINT32_MAX; } - uint32_t FindEntryIndexesThatContain(B addr, - std::vector &indexes) const { + uint32_t FindEntryIndexesThatContain(B addr, std::vector &indexes) { #ifdef ASSERT_RANGEMAP_ARE_SORTED assert(IsSorted()); #endif - // Search the entries until the first entry that has a larger base address - // than `addr`. As m_entries is sorted by their base address, all following - // entries can't contain `addr` as their base address is already larger. - for (const auto &entry : m_entries) { - if (entry.Contains(addr)) - indexes.push_back(entry.data); - else if (entry.GetRangeBase() > addr) - break; - } + if (!m_entries.empty()) + FindEntryIndexesThatContain(addr, 0, m_entries.size(), indexes); + return indexes.size(); } @@ -599,6 +605,54 @@ class RangeDataVector { protected: Collection m_entries; Compare m_compare; + +private: + // Compute extra information needed for search + B ComputeUpperBounds(size_t lo, size_t hi) { + size_t mid = (lo + hi) / 2; + AugmentedEntry &entry = m_entries[mid]; + + entry.upper_bound = entry.base + entry.size; + + if (lo < mid) + entry.upper_bound = + std::max(entry.upper_bound, ComputeUpperBounds(lo, mid)); + + if (mid + 1 < hi) + entry.upper_bound = + std::max(entry.upper_bound, ComputeUpperBounds(mid + 1, hi)); + + return entry.upper_bound; + } + + // This is based on the augmented tree implementation found at + // https://en.wikipedia.org/wiki/Interval_tree#Augmented_tree + void FindEntryIndexesThatContain(B addr, size_t lo, size_t hi, + std::vector &indexes) { + size_t mid = (lo + hi) / 2; + const AugmentedEntry &entry = m_entries[mid]; + + // addr is greater than the rightmost point of any interval below mid + // so there are cannot be any matches. + if (addr > entry.upper_bound) + return; + + // Recursively search left subtree + if (lo < mid) + FindEntryIndexesThatContain(addr, lo, mid, indexes); + + // If addr is smaller than the start of the current interval it + // cannot contain it nor can any of its right subtree. + if (addr < entry.base) + return; + + if (entry.Contains(addr)) + indexes.push_back(entry.data); + + // Recursively search right subtree + if (mid + 1 < hi) + FindEntryIndexesThatContain(addr, mid + 1, hi, indexes); + } }; // A simple range with data class where you get to define the type of diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp index ad509b81d2bf7d..c7786534076e3f 100644 --- a/lldb/source/API/SBThread.cpp +++ b/lldb/source/API/SBThread.cpp @@ -40,7 +40,6 @@ #include "lldb/Target/ThreadPlanStepInstruction.h" #include "lldb/Target/ThreadPlanStepOut.h" #include "lldb/Target/ThreadPlanStepRange.h" -#include "lldb/Target/UnixSignals.h" #include "lldb/Utility/State.h" #include "lldb/Utility/Stream.h" #include "lldb/Utility/StructuredData.h" @@ -319,97 +318,26 @@ size_t SBThread::GetStopDescription(char *dst, size_t dst_len) { std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); - if (exe_ctx.HasThreadScope()) { - Process::StopLocker stop_locker; - if (stop_locker.TryLock(&exe_ctx.GetProcessPtr()->GetRunLock())) { + if (dst) + *dst = 0; - StopInfoSP stop_info_sp = exe_ctx.GetThreadPtr()->GetStopInfo(); - if (stop_info_sp) { - std::string thread_stop_desc = - exe_ctx.GetThreadPtr()->GetStopDescription(); - const char *stop_desc = thread_stop_desc.c_str(); - - if (stop_desc[0] != '\0') { - if (dst) - return ::snprintf(dst, dst_len, "%s", stop_desc); - else { - // NULL dst passed in, return the length needed to contain the - // description - return ::strlen(stop_desc) + 1; // Include the NULL byte for size - } - } else { - size_t stop_desc_len = 0; - switch (stop_info_sp->GetStopReason()) { - case eStopReasonTrace: - case eStopReasonPlanComplete: { - static char trace_desc[] = "step"; - stop_desc = trace_desc; - stop_desc_len = - sizeof(trace_desc); // Include the NULL byte for size - } break; - - case eStopReasonBreakpoint: { - static char bp_desc[] = "breakpoint hit"; - stop_desc = bp_desc; - stop_desc_len = sizeof(bp_desc); // Include the NULL byte for size - } break; - - case eStopReasonWatchpoint: { - static char wp_desc[] = "watchpoint hit"; - stop_desc = wp_desc; - stop_desc_len = sizeof(wp_desc); // Include the NULL byte for size - } break; - - case eStopReasonSignal: { - stop_desc = - exe_ctx.GetProcessPtr()->GetUnixSignals()->GetSignalAsCString( - stop_info_sp->GetValue()); - if (stop_desc == nullptr || stop_desc[0] == '\0') { - static char signal_desc[] = "signal"; - stop_desc = signal_desc; - stop_desc_len = - sizeof(signal_desc); // Include the NULL byte for size - } - } break; - - case eStopReasonException: { - char exc_desc[] = "exception"; - stop_desc = exc_desc; - stop_desc_len = sizeof(exc_desc); // Include the NULL byte for size - } break; - - case eStopReasonExec: { - char exc_desc[] = "exec"; - stop_desc = exc_desc; - stop_desc_len = sizeof(exc_desc); // Include the NULL byte for size - } break; - - case eStopReasonThreadExiting: { - char limbo_desc[] = "thread exiting"; - stop_desc = limbo_desc; - stop_desc_len = sizeof(limbo_desc); - } break; - default: - break; - } + if (!exe_ctx.HasThreadScope()) + return 0; - if (stop_desc && stop_desc[0]) { - if (dst) - return ::snprintf(dst, dst_len, "%s", stop_desc) + - 1; // Include the NULL byte + Process::StopLocker stop_locker; + if (!stop_locker.TryLock(&exe_ctx.GetProcessPtr()->GetRunLock())) + return 0; - if (stop_desc_len == 0) - stop_desc_len = ::strlen(stop_desc) + 1; // Include the NULL byte + std::string thread_stop_desc = exe_ctx.GetThreadPtr()->GetStopDescription(); + if (thread_stop_desc.empty()) + return 0; - return stop_desc_len; - } - } - } - } - } if (dst) - *dst = 0; - return 0; + return ::snprintf(dst, dst_len, "%s", thread_stop_desc.c_str()) + 1; + + // NULL dst passed in, return the length needed to contain the + // description. + return thread_stop_desc.size() + 1; // Include the NULL byte for size } SBValue SBThread::GetStopReturnValue() { diff --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp index 60247cfdd99e01..268e25fb6697ba 100644 --- a/lldb/source/Core/Disassembler.cpp +++ b/lldb/source/Core/Disassembler.cpp @@ -193,7 +193,7 @@ lldb::DisassemblerSP Disassembler::DisassembleRange( const ArchSpec &arch, const char *plugin_name, const char *flavor, const ExecutionContext &exe_ctx, const AddressRange &range, bool prefer_file_cache) { - if (range.GetByteSize() <= 0) + if (range.GetByteSize() <= 0 || !exe_ctx.GetTargetPtr()) return {}; if (!range.GetBaseAddress().IsValid()) @@ -205,8 +205,8 @@ lldb::DisassemblerSP Disassembler::DisassembleRange( if (!disasm_sp) return {}; - const size_t bytes_disassembled = - disasm_sp->ParseInstructions(&exe_ctx, range, nullptr, prefer_file_cache); + const size_t bytes_disassembled = disasm_sp->ParseInstructions( + exe_ctx.GetTargetRef(), range, nullptr, prefer_file_cache); if (bytes_disassembled == 0) return {}; @@ -243,7 +243,7 @@ bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch, bool mixed_source_and_assembly, uint32_t num_mixed_context_lines, uint32_t options, Stream &strm) { - if (!disasm_range.GetByteSize()) + if (!disasm_range.GetByteSize() || !exe_ctx.GetTargetPtr()) return false; lldb::DisassemblerSP disasm_sp(Disassembler::FindPluginForTarget( @@ -257,8 +257,8 @@ bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch, range.GetBaseAddress()); range.SetByteSize(disasm_range.GetByteSize()); const bool prefer_file_cache = false; - size_t bytes_disassembled = - disasm_sp->ParseInstructions(&exe_ctx, range, &strm, prefer_file_cache); + size_t bytes_disassembled = disasm_sp->ParseInstructions( + exe_ctx.GetTargetRef(), range, &strm, prefer_file_cache); if (bytes_disassembled == 0) return false; @@ -275,7 +275,7 @@ bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch, bool mixed_source_and_assembly, uint32_t num_mixed_context_lines, uint32_t options, Stream &strm) { - if (num_instructions == 0) + if (num_instructions == 0 || !exe_ctx.GetTargetPtr()) return false; lldb::DisassemblerSP disasm_sp(Disassembler::FindPluginForTarget( @@ -288,7 +288,7 @@ bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch, const bool prefer_file_cache = false; size_t bytes_disassembled = disasm_sp->ParseInstructions( - &exe_ctx, addr, num_instructions, prefer_file_cache); + exe_ctx.GetTargetRef(), addr, num_instructions, prefer_file_cache); if (bytes_disassembled == 0) return false; @@ -1182,59 +1182,51 @@ InstructionList::GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr, return GetIndexOfInstructionAtAddress(address); } -size_t Disassembler::ParseInstructions(const ExecutionContext *exe_ctx, +size_t Disassembler::ParseInstructions(Target &target, const AddressRange &range, Stream *error_strm_ptr, bool prefer_file_cache) { - if (exe_ctx) { - Target *target = exe_ctx->GetTargetPtr(); - const addr_t byte_size = range.GetByteSize(); - if (target == nullptr || byte_size == 0 || - !range.GetBaseAddress().IsValid()) - return 0; - - auto data_sp = std::make_shared(byte_size, '\0'); - - Status error; - lldb::addr_t load_addr = LLDB_INVALID_ADDRESS; - const size_t bytes_read = target->ReadMemory( - range.GetBaseAddress(), prefer_file_cache, data_sp->GetBytes(), - data_sp->GetByteSize(), error, &load_addr); - - if (bytes_read > 0) { - if (bytes_read != data_sp->GetByteSize()) - data_sp->SetByteSize(bytes_read); - DataExtractor data(data_sp, m_arch.GetByteOrder(), - m_arch.GetAddressByteSize()); - const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS; - return DecodeInstructions(range.GetBaseAddress(), data, 0, UINT32_MAX, - false, data_from_file); - } else if (error_strm_ptr) { - const char *error_cstr = error.AsCString(); - if (error_cstr) { - error_strm_ptr->Printf("error: %s\n", error_cstr); - } - } + const addr_t byte_size = range.GetByteSize(); + if (byte_size == 0 || !range.GetBaseAddress().IsValid()) + return 0; + + auto data_sp = std::make_shared(byte_size, '\0'); + + Status error; + lldb::addr_t load_addr = LLDB_INVALID_ADDRESS; + const size_t bytes_read = target.ReadMemory( + range.GetBaseAddress(), prefer_file_cache, data_sp->GetBytes(), + data_sp->GetByteSize(), error, &load_addr); + + if (bytes_read > 0) { + if (bytes_read != data_sp->GetByteSize()) + data_sp->SetByteSize(bytes_read); + DataExtractor data(data_sp, m_arch.GetByteOrder(), + m_arch.GetAddressByteSize()); + const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS; + return DecodeInstructions(range.GetBaseAddress(), data, 0, UINT32_MAX, + false, data_from_file); } else if (error_strm_ptr) { - error_strm_ptr->PutCString("error: invalid execution context\n"); + const char *error_cstr = error.AsCString(); + if (error_cstr) { + error_strm_ptr->Printf("error: %s\n", error_cstr); + } } return 0; } -size_t Disassembler::ParseInstructions(const ExecutionContext *exe_ctx, - const Address &start, +size_t Disassembler::ParseInstructions(Target &target, const Address &start, uint32_t num_instructions, bool prefer_file_cache) { m_instruction_list.Clear(); - if (exe_ctx == nullptr || num_instructions == 0 || !start.IsValid()) + if (num_instructions == 0 || !start.IsValid()) return 0; - Target *target = exe_ctx->GetTargetPtr(); // Calculate the max buffer size we will need in order to disassemble const addr_t byte_size = num_instructions * m_arch.GetMaximumOpcodeByteSize(); - if (target == nullptr || byte_size == 0) + if (byte_size == 0) return 0; DataBufferHeap *heap_buffer = new DataBufferHeap(byte_size, '\0'); @@ -1243,8 +1235,8 @@ size_t Disassembler::ParseInstructions(const ExecutionContext *exe_ctx, Status error; lldb::addr_t load_addr = LLDB_INVALID_ADDRESS; const size_t bytes_read = - target->ReadMemory(start, prefer_file_cache, heap_buffer->GetBytes(), - byte_size, error, &load_addr); + target.ReadMemory(start, prefer_file_cache, heap_buffer->GetBytes(), + byte_size, error, &load_addr); const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS; diff --git a/lldb/source/Core/IOHandler.cpp b/lldb/source/Core/IOHandler.cpp index 939c87c1b15ec1..933da6bfbf75f2 100644 --- a/lldb/source/Core/IOHandler.cpp +++ b/lldb/source/Core/IOHandler.cpp @@ -125,6 +125,8 @@ void IOHandlerStack::PrintAsync(Stream *stream, const char *s, size_t len) { std::lock_guard guard(m_mutex); if (m_top) m_top->PrintAsync(stream, s, len); + else + stream->Write(s, len); } } diff --git a/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.cpp b/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.cpp index f426ac63e4b535..e3d1aa3b11dd03 100644 --- a/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.cpp +++ b/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.cpp @@ -120,9 +120,7 @@ lldb::addr_t ArchitectureMips::GetBreakableLoadAddress(lldb::addr_t addr, if (current_offset == 0) return addr; - ExecutionContext ctx; - target.CalculateExecutionContext(ctx); - auto insn = GetInstructionAtAddress(ctx, current_offset, addr); + auto insn = GetInstructionAtAddress(target, current_offset, addr); if (nullptr == insn || !insn->HasDelaySlot()) return addr; @@ -138,8 +136,7 @@ lldb::addr_t ArchitectureMips::GetBreakableLoadAddress(lldb::addr_t addr, } Instruction *ArchitectureMips::GetInstructionAtAddress( - const ExecutionContext &exe_ctx, const Address &resolved_addr, - addr_t symbol_offset) const { + Target &target, const Address &resolved_addr, addr_t symbol_offset) const { auto loop_count = symbol_offset / 2; @@ -174,7 +171,7 @@ Instruction *ArchitectureMips::GetInstructionAtAddress( AddressRange range(addr, i * 2); uint32_t insn_size = 0; - disasm_sp->ParseInstructions(&exe_ctx, range, nullptr, prefer_file_cache); + disasm_sp->ParseInstructions(target, range, nullptr, prefer_file_cache); uint32_t num_insns = disasm_sp->GetInstructionList().GetSize(); if (num_insns) { diff --git a/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.h b/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.h index 40bcc23fd8cd4d..71ee60184b6955 100644 --- a/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.h +++ b/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.h @@ -35,11 +35,10 @@ class ArchitectureMips : public Architecture { AddressClass addr_class) const override; private: - Instruction *GetInstructionAtAddress(const ExecutionContext &exe_ctx, + Instruction *GetInstructionAtAddress(Target &target, const Address &resolved_addr, lldb::addr_t symbol_offset) const; - static std::unique_ptr Create(const ArchSpec &arch); ArchitectureMips(const ArchSpec &arch) : m_arch(arch) {} diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp index 5440367fd71770..1ed3e693d8d21d 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp @@ -50,7 +50,7 @@ #include #endif -#if LLVM_ENABLE_ZLIB +#if defined(HAVE_LIBZ) #include #endif @@ -582,7 +582,7 @@ bool GDBRemoteCommunication::DecompressPacket() { } #endif -#if LLVM_ENABLE_ZLIB +#if defined(HAVE_LIBZ) if (decompressed_bytes == 0 && decompressed_bufsize != ULONG_MAX && decompressed_buffer != nullptr && m_compression_type == CompressionType::ZlibDeflate) { diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index 67e5d59d199ecb..6021c2664b0678 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -1053,7 +1053,7 @@ void GDBRemoteCommunicationClient::MaybeEnableCompression( } #endif -#if LLVM_ENABLE_ZLIB +#if defined(HAVE_LIBZ) if (avail_type == CompressionType::None) { for (auto compression : supported_compressions) { if (compression == "zlib-deflate") { diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 156f6f7f4fc95d..72907a95f3ab3a 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -3123,7 +3123,7 @@ Status ProcessGDBRemote::EnableBreakpointSite(BreakpointSite *bp_site) { if (m_gdb_comm.SupportsGDBStoppointPacket(eBreakpointSoftware)) { if (error_no != UINT8_MAX) error.SetErrorStringWithFormat( - "error: %d sending the breakpoint request", errno); + "error: %d sending the breakpoint request", error_no); else error.SetErrorString("error sending the breakpoint request"); return error; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index c89ccb5bf96056..c27b5c4c349541 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -324,10 +324,8 @@ void SymbolFileDWARF::GetTypes(const DWARFDIE &die, dw_offset_t min_die_offset, if (add_type) { const bool assert_not_being_parsed = true; Type *type = ResolveTypeUID(die, assert_not_being_parsed); - if (type) { - if (type_set.find(type) == type_set.end()) - type_set.insert(type); - } + if (type) + type_set.insert(type); } } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index a3928c8c3dd490..479235c0d86f9a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -12,11 +12,11 @@ #include #include #include -#include #include #include #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Support/Threading.h" #include "lldb/Core/UniqueCStringMap.h" @@ -439,7 +439,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFile, bool FixupAddress(lldb_private::Address &addr); - typedef std::set TypeSet; + typedef llvm::SetVector TypeSet; void GetTypes(const DWARFDIE &die, dw_offset_t min_die_offset, dw_offset_t max_die_offset, uint32_t type_mask, diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 845318471fa15f..ffcab238d09de1 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -84,12 +84,14 @@ using llvm::StringSwitch; LLDB_PLUGIN_DEFINE(TypeSystemClang) namespace { -#ifdef LLDB_CONFIGURATION_DEBUG static void VerifyDecl(clang::Decl *decl) { assert(decl && "VerifyDecl called with nullptr?"); +#ifndef NDEBUG + // We don't care about the actual access value here but only want to trigger + // that Clang calls its internal Decl::AccessDeclContextSanity check. decl->getAccess(); -} #endif +} static inline bool TypeSystemClangSupportsLanguage(lldb::LanguageType language) { @@ -1415,9 +1417,7 @@ ClassTemplateDecl *TypeSystemClang::CreateClassTemplateDecl( decl_ctx->addDecl(class_template_decl); -#ifdef LLDB_CONFIGURATION_DEBUG VerifyDecl(class_template_decl); -#endif } return class_template_decl; @@ -1687,9 +1687,7 @@ NamespaceDecl *TypeSystemClang::GetUniqueNamespaceDeclaration( } } } -#ifdef LLDB_CONFIGURATION_DEBUG VerifyDecl(namespace_decl); -#endif return namespace_decl; } @@ -1892,9 +1890,7 @@ FunctionDecl *TypeSystemClang::CreateFunctionDeclaration( if (func_decl) decl_ctx->addDecl(func_decl); -#ifdef LLDB_CONFIGURATION_DEBUG VerifyDecl(func_decl); -#endif return func_decl; } @@ -6937,9 +6933,7 @@ clang::FieldDecl *TypeSystemClang::AddFieldToRecordType( record_decl->addDecl(field); -#ifdef LLDB_CONFIGURATION_DEBUG VerifyDecl(field); -#endif } } else { clang::ObjCInterfaceDecl *class_interface_decl = @@ -6962,9 +6956,7 @@ clang::FieldDecl *TypeSystemClang::AddFieldToRecordType( if (field) { class_interface_decl->addDecl(field); -#ifdef LLDB_CONFIGURATION_DEBUG VerifyDecl(field); -#endif } } } @@ -7128,9 +7120,7 @@ clang::VarDecl *TypeSystemClang::AddVariableToRecordType( TypeSystemClang::ConvertAccessTypeToAccessSpecifier(access)); record_decl->addDecl(var_decl); -#ifdef LLDB_CONFIGURATION_DEBUG VerifyDecl(var_decl); -#endif return var_decl; } @@ -7310,9 +7300,7 @@ clang::CXXMethodDecl *TypeSystemClang::AddMethodToCXXRecordType( } } -#ifdef LLDB_CONFIGURATION_DEBUG VerifyDecl(cxx_method_decl); -#endif return cxx_method_decl; } @@ -7704,9 +7692,7 @@ clang::ObjCMethodDecl *TypeSystemClang::AddMethodToObjCObjectType( class_interface_decl->addDecl(objc_method_decl); -#ifdef LLDB_CONFIGURATION_DEBUG VerifyDecl(objc_method_decl); -#endif return objc_method_decl; } @@ -7904,10 +7890,7 @@ clang::EnumConstantDecl *TypeSystemClang::AddEnumerationValueToEnumerationType( enutype->getDecl()->addDecl(enumerator_decl); -#ifdef LLDB_CONFIGURATION_DEBUG VerifyDecl(enumerator_decl); -#endif - return enumerator_decl; } diff --git a/lldb/source/Target/CMakeLists.txt b/lldb/source/Target/CMakeLists.txt index 893065442e8064..2d9274ec52cacb 100644 --- a/lldb/source/Target/CMakeLists.txt +++ b/lldb/source/Target/CMakeLists.txt @@ -79,6 +79,7 @@ add_lldb_library(lldbTarget LINK_COMPONENTS Support + MC ) add_dependencies(lldbTarget diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp index 00f8b5ae276e54..60d5617053ec9b 100644 --- a/lldb/source/Target/Thread.cpp +++ b/lldb/source/Target/Thread.cpp @@ -596,8 +596,12 @@ std::string Thread::GetStopDescription() { std::string Thread::GetStopDescriptionRaw() { StopInfoSP stop_info_sp = GetStopInfo(); std::string raw_stop_description; - if (stop_info_sp && stop_info_sp->IsValid()) + if (stop_info_sp && stop_info_sp->IsValid()) { raw_stop_description = stop_info_sp->GetDescription(); + assert((!raw_stop_description.empty() || + stop_info_sp->GetStopReason() == eStopReasonNone) && + "StopInfo returned an empty description."); + } return raw_stop_description; } diff --git a/lldb/source/Utility/Broadcaster.cpp b/lldb/source/Utility/Broadcaster.cpp index 90f91b4f89cfc8..342548c0b0e616 100644 --- a/lldb/source/Utility/Broadcaster.cpp +++ b/lldb/source/Utility/Broadcaster.cpp @@ -373,8 +373,8 @@ bool BroadcasterManager::UnregisterListenerForEvents( if (event_bits_to_remove != iter_event_bits) { uint32_t new_event_bits = iter_event_bits & ~event_bits_to_remove; - to_be_readded.push_back( - BroadcastEventSpec(event_spec.GetBroadcasterClass(), new_event_bits)); + to_be_readded.emplace_back(event_spec.GetBroadcasterClass(), + new_event_bits); } m_event_map.erase(iter); } diff --git a/lldb/test/Shell/Commands/command-thread-select.test b/lldb/test/Shell/Commands/command-thread-select.test new file mode 100644 index 00000000000000..3b48452eea8264 --- /dev/null +++ b/lldb/test/Shell/Commands/command-thread-select.test @@ -0,0 +1,17 @@ +# RUN: %clang_host -g %S/Inputs/main.c -o %t +# RUN: %lldb %t -s %s -o exit | FileCheck %s + +b main +# CHECK-LABEL: b main +# CHECK: Breakpoint 1: where = {{.*}}`main + +run +# CHECK-LABEL: run +# CHECK: Process {{.*}} stopped +# CHECK: stop reason = breakpoint 1 +# CHECK: frame #0: {{.*}}`main at main.c + +thread select 1 +# CHECK-LABEL: thread select 1 +# CHECK: stop reason = breakpoint 1 +# CHECK: frame #0: {{.*}}`main at main.c diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 781098d389167b..02c4bddf21af89 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -347,13 +347,7 @@ option(LLVM_ENABLE_LIBPFM "Use libpfm for performance counters if available." ON option(LLVM_ENABLE_THREADS "Use threads if available." ON) -if(CMAKE_SYSTEM_NAME STREQUAL Windows) - set(zlib_DEFAULT "OFF") -else() - set(zlib_DEFAULT "ON") -endif() - -set(LLVM_ENABLE_ZLIB "${zlib_DEFAULT}" CACHE STRING "Use zlib for compression/decompression if available. Can be ON, OFF, or FORCE_ON") +option(LLVM_ENABLE_ZLIB "Use zlib for compression/decompression if available." ON) set(LLVM_Z3_INSTALL_DIR "" CACHE STRING "Install directory of the Z3 solver.") diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index fc66dbfcbe7a70..f758366bc79d4a 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -56,6 +56,7 @@ check_include_file(sys/types.h HAVE_SYS_TYPES_H) check_include_file(termios.h HAVE_TERMIOS_H) check_include_file(unistd.h HAVE_UNISTD_H) check_include_file(valgrind/valgrind.h HAVE_VALGRIND_VALGRIND_H) +check_include_file(zlib.h HAVE_ZLIB_H) check_include_file(fenv.h HAVE_FENV_H) check_symbol_exists(FE_ALL_EXCEPT "fenv.h" HAVE_DECL_FE_ALL_EXCEPT) check_symbol_exists(FE_INEXACT "fenv.h" HAVE_DECL_FE_INEXACT) @@ -117,6 +118,19 @@ endif() # Don't look for these libraries if we're using MSan, since uninstrumented third # party code may call MSan interceptors like strlen, leading to false positives. if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*") + set(HAVE_LIBZ 0) + if(LLVM_ENABLE_ZLIB) + foreach(library z zlib_static zlib) + string(TOUPPER ${library} library_suffix) + check_library_exists(${library} compress2 "" HAVE_LIBZ_${library_suffix}) + if(HAVE_LIBZ_${library_suffix}) + set(HAVE_LIBZ 1) + set(ZLIB_LIBRARIES "${library}") + break() + endif() + endforeach() + endif() + # Don't look for these libraries on Windows. if (NOT PURE_WINDOWS) # Skip libedit if using ASan as it contains memory leaks. @@ -501,21 +515,10 @@ else( LLVM_ENABLE_THREADS ) message(STATUS "Threads disabled.") endif() -if(LLVM_ENABLE_ZLIB) - if(LLVM_ENABLE_ZLIB STREQUAL FORCE_ON) - find_package(ZLIB REQUIRED) - else() - find_package(ZLIB) - endif() - - if(ZLIB_FOUND) - set(LLVM_ENABLE_ZLIB "YES" CACHE STRING - "Use zlib for compression/decompression if available. Can be ON, OFF, or FORCE_ON" - FORCE) - else() - set(LLVM_ENABLE_ZLIB "NO" CACHE STRING - "Use zlib for compression/decompression if available. Can be ON, OFF, or FORCE_ON" - FORCE) +if (LLVM_ENABLE_ZLIB ) + # Check if zlib is available in the system. + if ( NOT HAVE_ZLIB_H OR NOT HAVE_LIBZ ) + set(LLVM_ENABLE_ZLIB 0) endif() endif() diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h new file mode 100644 index 00000000000000..5b2ec3a265364d --- /dev/null +++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -0,0 +1,162 @@ +//===- llvm/Analysis/LoopNestAnalysis.h -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the interface for the loop nest analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPNESTANALYSIS_H +#define LLVM_ANALYSIS_LOOPNESTANALYSIS_H + +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +using LoopVectorTy = SmallVector; + +/// This class represents a loop nest and can be used to query its properties. +class LoopNest { +public: + /// Construct a loop nest rooted by loop \p Root. + LoopNest(Loop &Root, ScalarEvolution &SE); + + LoopNest() = delete; + LoopNest &operator=(const LoopNest &) = delete; + + /// Construct a LoopNest object. + static std::unique_ptr getLoopNest(Loop &Root, ScalarEvolution &SE); + + /// Return true if the given loops \p OuterLoop and \p InnerLoop are + /// perfectly nested with respect to each other, and false otherwise. + /// Example: + /// \code + /// for(i) + /// for(j) + /// for(k) + /// \endcode + /// arePerfectlyNested(loop_i, loop_j, SE) would return true. + /// arePerfectlyNested(loop_j, loop_k, SE) would return true. + /// arePerfectlyNested(loop_i, loop_k, SE) would return false. + static bool arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop, + ScalarEvolution &SE); + + /// Return the maximum nesting depth of the loop nest rooted by loop \p Root. + /// For example given the loop nest: + /// \code + /// for(i) // loop at level 1 and Root of the nest + /// for(j) // loop at level 2 + /// + /// for(k) // loop at level 3 + /// \endcode + /// getMaxPerfectDepth(Loop_i) would return 2. + static unsigned getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE); + + /// Return the outermost loop in the loop nest. + Loop &getOutermostLoop() const { return *Loops.front(); } + + /// Return the innermost loop in the loop nest if the nest has only one + /// innermost loop, and a nullptr otherwise. + /// Note: the innermost loop returned is not necessarily perfectly nested. + Loop *getInnermostLoop() const { + if (Loops.size() == 1) + return Loops.back(); + + // The loops in the 'Loops' vector have been collected in breadth first + // order, therefore if the last 2 loops in it have the same nesting depth + // there isn't a unique innermost loop in the nest. + Loop *LastLoop = Loops.back(); + auto SecondLastLoopIter = ++Loops.rbegin(); + return (LastLoop->getLoopDepth() == (*SecondLastLoopIter)->getLoopDepth()) + ? nullptr + : LastLoop; + } + + /// Return the loop at the given \p Index. + Loop *getLoop(unsigned Index) const { + assert(Index < Loops.size() && "Index is out of bounds"); + return Loops[Index]; + } + + /// Return the number of loops in the nest. + size_t getNumLoops() const { return Loops.size(); } + + /// Get the loops in the nest. + ArrayRef getLoops() const { return Loops; } + + /// Retrieve a vector of perfect loop nests contained in the current loop + /// nest. For example, given the following nest containing 4 loops, this + /// member function would return {{L1,L2},{L3,L4}}. + /// \code + /// for(i) // L1 + /// for(j) // L2 + /// + /// for(k) // L3 + /// for(l) // L4 + /// \endcode + SmallVector getPerfectLoops(ScalarEvolution &SE) const; + + /// Return the loop nest depth (i.e. the loop depth of the 'deepest' loop) + /// For example given the loop nest: + /// \code + /// for(i) // loop at level 1 and Root of the nest + /// for(j1) // loop at level 2 + /// for(k) // loop at level 3 + /// for(j2) // loop at level 2 + /// \endcode + /// getNestDepth() would return 3. + unsigned getNestDepth() const { + int NestDepth = + Loops.back()->getLoopDepth() - Loops.front()->getLoopDepth() + 1; + assert(NestDepth > 0 && "Expecting NestDepth to be at least 1"); + return NestDepth; + } + + /// Return the maximum perfect nesting depth. + unsigned getMaxPerfectDepth() const { return MaxPerfectDepth; } + + /// Return true if all loops in the loop nest are in simplify form. + bool areAllLoopsSimplifyForm() const { + return llvm::all_of(Loops, + [](const Loop *L) { return L->isLoopSimplifyForm(); }); + } + +protected: + const unsigned MaxPerfectDepth; // maximum perfect nesting depth level. + LoopVectorTy Loops; // the loops in the nest (in breadth first order). +}; + +raw_ostream &operator<<(raw_ostream &, const LoopNest &); + +/// This analysis provides information for a loop nest. The analysis runs on +/// demand and can be initiated via AM.getResult. +class LoopNestAnalysis : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + using Result = LoopNest; + Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR); +}; + +/// Printer pass for the \c LoopNest results. +class LoopNestPrinterPass : public PassInfoMixin { + raw_ostream &OS; + +public: + explicit LoopNestPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_LOOPNESTANALYSIS_H diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h index 6693e40ccf223e..f90dcf604e9b09 100644 --- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h @@ -120,6 +120,11 @@ class ProfileSummaryInfo { bool isFunctionHotInCallGraphNthPercentile(int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI); + /// Returns true if \p F contains cold code with regard to a given cold + /// percentile cutoff value. + bool isFunctionColdInCallGraphNthPercentile(int PercentileCutoff, + const Function *F, + BlockFrequencyInfo &BFI); /// Returns true if count \p C is considered hot. bool isHotCount(uint64_t C); /// Returns true if count \p C is considered cold. @@ -127,6 +132,9 @@ class ProfileSummaryInfo { /// Returns true if count \p C is considered hot with regard to a given /// hot percentile cutoff value. bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C); + /// Returns true if count \p C is considered cold with regard to a given + /// cold percentile cutoff value. + bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C); /// Returns true if BasicBlock \p BB is considered hot. bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI); /// Returns true if BasicBlock \p BB is considered cold. @@ -135,6 +143,10 @@ class ProfileSummaryInfo { /// hot percentile cutoff value. bool isHotBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI); + /// Returns true if BasicBlock \p BB is considered cold with regard to a given + /// cold percentile cutoff value. + bool isColdBlockNthPercentile(int PercentileCutoff, + const BasicBlock *BB, BlockFrequencyInfo *BFI); /// Returns true if CallSite \p CS is considered hot. bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI); /// Returns true if Callsite \p CS is considered cold. @@ -153,6 +165,17 @@ class ProfileSummaryInfo { uint64_t getColdCountThreshold() { return ColdCountThreshold ? ColdCountThreshold.getValue() : 0; } + + private: + template + bool isFunctionHotOrColdInCallGraphNthPercentile(int PercentileCutoff, + const Function *F, + BlockFrequencyInfo &BFI); + template + bool isHotOrColdCountNthPercentile(int PercentileCutoff, uint64_t C); + template + bool isHotOrColdBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB, + BlockFrequencyInfo *BFI); }; /// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 8a1e720d77f6bb..2968a5f37a4617 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -494,6 +494,8 @@ class TargetTransformInfo { bool UpperBound; /// Allow peeling off loop iterations. bool AllowPeeling; + /// Allow peeling off loop iterations for loop nests. + bool AllowLoopNestsPeeling; /// Allow unrolling of all the iterations of the runtime loop remainder. bool UnrollRemainder; /// Allow unroll and jam. Used to enable unroll and jam for the target. diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 9583e2b718e5ba..0552420c3c3362 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2754,6 +2754,13 @@ class TargetLoweringBase { /// The default implementation just freezes the set of reserved registers. virtual void finalizeLowering(MachineFunction &MF) const; + //===----------------------------------------------------------------------===// + // GlobalISel Hooks + //===----------------------------------------------------------------------===// + /// Check whether or not \p MI needs to be moved close to its uses. + virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const; + + private: const TargetMachine &TM; diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake index db170ae5d62d27..1a38bc15ab9d66 100644 --- a/llvm/include/llvm/Config/config.h.cmake +++ b/llvm/include/llvm/Config/config.h.cmake @@ -109,6 +109,9 @@ /* Define to 1 if you have the `pthread_setname_np' function. */ #cmakedefine HAVE_PTHREAD_SETNAME_NP ${HAVE_PTHREAD_SETNAME_NP} +/* Define to 1 if you have the `z' library (-lz). */ +#cmakedefine HAVE_LIBZ ${HAVE_LIBZ} + /* Define to 1 if you have the header file. */ #cmakedefine HAVE_LINK_H ${HAVE_LINK_H} @@ -223,6 +226,9 @@ /* Define to 1 if you have the header file. */ #cmakedefine HAVE_VALGRIND_VALGRIND_H ${HAVE_VALGRIND_VALGRIND_H} +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_ZLIB_H ${HAVE_ZLIB_H} + /* Have host's _alloca */ #cmakedefine HAVE__ALLOCA ${HAVE__ALLOCA} diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h index 7880bcdf688132..e666c82bca0804 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h @@ -39,7 +39,10 @@ class DWARFDebugMacro { }; }; - using MacroList = SmallVector; + struct MacroList { + SmallVector Macros; + uint64_t Offset; + }; /// A list of all the macro entries in the debug_macinfo section. std::vector MacroLists; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 84f4ae7599fd0a..20e5b95a827aea 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -91,6 +91,7 @@ __OMP_DIRECTIVE_EXT(parallel_master_taskloop, "parallel master taskloop") __OMP_DIRECTIVE_EXT(master_taskloop_simd, "master taskloop simd") __OMP_DIRECTIVE_EXT(parallel_master_taskloop_simd, "parallel master taskloop simd") +__OMP_DIRECTIVE(depobj) // Has to be the last because Clang implicitly expects it to be. __OMP_DIRECTIVE(unknown) @@ -130,7 +131,7 @@ __OMP_TYPE(Int32Ptr) #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) #endif -#define __OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ +#define __OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) __OMP_ARRAY_TYPE(KmpCriticalName, Int32, 8) diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 56874ecf9d1e67..8259df101178da 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -833,8 +833,7 @@ struct TypeTestResolution { Single, ///< Single element (last example in "Short Inline Bit Vectors") AllOnes, ///< All-ones bit vector ("Eliminating Bit Vector Checks for /// All-Ones Bit Vectors") - Unknown, ///< Unknown (analysis not performed, don't lower) - } TheKind = Unknown; + } TheKind = Unsat; /// Range of size-1 expressed as a bit width. For example, if the size is in /// range [1,256], this number will be 8. This helps generate the most compact @@ -1028,7 +1027,7 @@ class ModuleSummaryIndex { // in the way some record are interpreted, like flags for instance. // Note that incrementing this may require changes in both BitcodeReader.cpp // and BitcodeWriter.cpp. - static constexpr uint64_t BitcodeSummaryVersion = 9; + static constexpr uint64_t BitcodeSummaryVersion = 8; // Regular LTO module name for ASM writer static constexpr const char *getRegularLTOModuleName() { diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h index e51ec67b4c914f..7dcb455274f891 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -17,7 +17,6 @@ namespace yaml { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, TypeTestResolution::Kind &value) { - io.enumCase(value, "Unknown", TypeTestResolution::Unknown); io.enumCase(value, "Unsat", TypeTestResolution::Unsat); io.enumCase(value, "ByteArray", TypeTestResolution::ByteArray); io.enumCase(value, "Inline", TypeTestResolution::Inline); diff --git a/llvm/include/llvm/MC/LaneBitmask.h b/llvm/include/llvm/MC/LaneBitmask.h index d5f69287a265b0..b070bea3201cfe 100644 --- a/llvm/include/llvm/MC/LaneBitmask.h +++ b/llvm/include/llvm/MC/LaneBitmask.h @@ -38,9 +38,9 @@ namespace llvm { struct LaneBitmask { // When changing the underlying type, change the format string as well. - using Type = unsigned; + using Type = uint64_t; enum : unsigned { BitWidth = 8*sizeof(Type) }; - constexpr static const char *const FormatStr = "%08X"; + constexpr static const char *const FormatStr = "%016lX"; constexpr LaneBitmask() = default; explicit constexpr LaneBitmask(Type V) : Mask(V) {} @@ -76,7 +76,7 @@ namespace llvm { return countPopulation(Mask); } unsigned getHighestLane() const { - return Log2_32(Mask); + return Log2_64(Mask); } static constexpr LaneBitmask getNone() { return LaneBitmask(0); } diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h index caa392a41b2be6..b57439f02ca550 100644 --- a/llvm/include/llvm/MC/MCAssembler.h +++ b/llvm/include/llvm/MC/MCAssembler.h @@ -195,6 +195,7 @@ class MCAssembler { bool relaxFragment(MCAsmLayout &Layout, MCFragment &F); bool relaxInstruction(MCAsmLayout &Layout, MCRelaxableFragment &IF); bool relaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF); + bool relaxBoundaryAlign(MCAsmLayout &Layout, MCBoundaryAlignFragment &BF); bool relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF); bool relaxDwarfCallFrameFragment(MCAsmLayout &Layout, MCDwarfCallFrameFragment &DF); diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h index 610e924f7846c3..e052098611a999 100644 --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -528,6 +528,9 @@ class MCBoundaryAlignFragment : public MCFragment { bool Fused : 1; /// Flag to indicate whether NOPs should be emitted. bool EmitNops : 1; + /// The size of the fragment. The size is lazily set during relaxation, and + /// is not meaningful before that. + uint64_t Size = 0; public: MCBoundaryAlignFragment(Align AlignBoundary = Align(1), bool Fused = false, @@ -535,6 +538,9 @@ class MCBoundaryAlignFragment : public MCFragment { : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary), Fused(Fused), EmitNops(EmitNops) {} + uint64_t getSize() const { return Size; } + void setSize(uint64_t Value) { Size = Value; } + Align getAlignment() const { return AlignBoundary; } void setAlignment(Align Value) { AlignBoundary = Value; } diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h index 99cc52f54ab90e..97f4c32eb035ac 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h @@ -185,11 +185,17 @@ class BinaryCoverageReader : public CoverageMappingReader { std::vector Expressions; std::vector MappingRegions; + // Used to tie the lifetimes of coverage function records to the lifetime of + // this BinaryCoverageReader instance. Needed to support the format change in + // D69471, which can split up function records into multiple sections on ELF. + std::string FuncRecords; + // Used to tie the lifetimes of decompressed strings to the lifetime of this // BinaryCoverageReader instance. DecompressedData Decompressed; - BinaryCoverageReader() = default; + BinaryCoverageReader(std::string &&FuncRecords) + : FuncRecords(std::move(FuncRecords)) {} public: BinaryCoverageReader(const BinaryCoverageReader &) = delete; @@ -200,7 +206,7 @@ class BinaryCoverageReader : public CoverageMappingReader { SmallVectorImpl> &ObjectFileBuffers); static Expected> - createCoverageReaderFromBuffer(StringRef Coverage, StringRef FuncRecords, + createCoverageReaderFromBuffer(StringRef Coverage, std::string &&FuncRecords, InstrProfSymtab &&ProfileNames, uint8_t BytesInAddress, support::endianness Endian); diff --git a/llvm/include/llvm/Support/Base64.h b/llvm/include/llvm/Support/Base64.h index 3f6616633e5fbf..62064a35aa3448 100644 --- a/llvm/include/llvm/Support/Base64.h +++ b/llvm/include/llvm/Support/Base64.h @@ -26,20 +26,23 @@ template std::string encodeBase64(InputBytes const &Bytes) { size_t i = 0, j = 0; for (size_t n = Bytes.size() / 3 * 3; i < n; i += 3, j += 4) { - uint32_t x = (Bytes[i] << 16) | (Bytes[i + 1] << 8) | Bytes[i + 2]; + uint32_t x = ((unsigned char)Bytes[i] << 16) | + ((unsigned char)Bytes[i + 1] << 8) | + (unsigned char)Bytes[i + 2]; Buffer[j + 0] = Table[(x >> 18) & 63]; Buffer[j + 1] = Table[(x >> 12) & 63]; Buffer[j + 2] = Table[(x >> 6) & 63]; Buffer[j + 3] = Table[x & 63]; } if (i + 1 == Bytes.size()) { - uint32_t x = (Bytes[i] << 16); + uint32_t x = ((unsigned char)Bytes[i] << 16); Buffer[j + 0] = Table[(x >> 18) & 63]; Buffer[j + 1] = Table[(x >> 12) & 63]; Buffer[j + 2] = '='; Buffer[j + 3] = '='; } else if (i + 2 == Bytes.size()) { - uint32_t x = (Bytes[i] << 16) | (Bytes[i + 1] << 8); + uint32_t x = + ((unsigned char)Bytes[i] << 16) | ((unsigned char)Bytes[i + 1] << 8); Buffer[j + 0] = Table[(x >> 18) & 63]; Buffer[j + 1] = Table[(x >> 12) & 63]; Buffer[j + 2] = Table[(x >> 6) & 63]; diff --git a/llvm/include/llvm/Transforms/Utils/KnowledgeRetention.h b/llvm/include/llvm/Transforms/Utils/KnowledgeRetention.h index 27d83373e07456..c3baf8a43c0d80 100644 --- a/llvm/include/llvm/Transforms/Utils/KnowledgeRetention.h +++ b/llvm/include/llvm/Transforms/Utils/KnowledgeRetention.h @@ -19,6 +19,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/PassManager.h" +#include "llvm/ADT/DenseMap.h" namespace llvm { @@ -58,9 +59,41 @@ inline bool hasAttributeInAssume(CallInst &AssumeCI, Value *IsOn, AssumeCI, IsOn, Attribute::getNameFromAttrKind(Kind), ArgVal, AQR); } -/// TODO: Add an function to create/fill a map from the bundle when users intend -/// to make many different queries on the same bundles. to be used for example -/// in the Attributor. +template<> struct DenseMapInfo { + static constexpr auto MaxValue = std::numeric_limits< + std::underlying_type::type>::max(); + static Attribute::AttrKind getEmptyKey() { + return static_cast(MaxValue); + } + static Attribute::AttrKind getTombstoneKey() { + return static_cast(MaxValue - 1); + } + static unsigned getHashValue(Attribute::AttrKind AK) { + return hash_combine(AK); + } + static bool isEqual(Attribute::AttrKind LHS, Attribute::AttrKind RHS) { + return LHS == RHS; + } +}; + +/// The map Key contains the Value on for which the attribute is valid and +/// the Attribute that is valid for that value. +/// If the Attribute is not on any value, the Value is nullptr. +using RetainedKnowledgeKey = std::pair; + +struct MinMax { + unsigned Min; + unsigned Max; +}; + +using RetainedKnowledgeMap = DenseMap; + +/// Insert into the map all the informations contained in the operand bundles of +/// the llvm.assume. This should be used instead of hasAttributeInAssume when +/// many queries are going to be made on the same llvm.assume. +/// String attributes are not inserted in the map. +/// If the IR changes the map will be outdated. +void fillMapFromAssume(CallInst &AssumeCI, RetainedKnowledgeMap &Result); //===----------------------------------------------------------------------===// // Utilities for testing diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 3de4318cc7b3b9..15a3be5487ebf8 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -24,6 +24,7 @@ #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/IVDescriptors.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" @@ -31,6 +32,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/ValueMapper.h" namespace llvm { @@ -426,6 +428,12 @@ void appendReversedLoopsToWorklist(RangeT &&, /// already reversed loops in LI. /// FIXME: Consider changing the order in LoopInfo. void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist &); + +/// Recursively clone the specified loop and all of its children, +/// mapping the blocks with the specified map. +Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, + LoopInfo *LI, LPPassManager *LPM); + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 34140e18677d26..969049f9078217 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -53,6 +53,7 @@ add_llvm_component_library(LLVMAnalysis LoopAccessAnalysis.cpp LoopAnalysisManager.cpp LoopCacheAnalysis.cpp + LoopNestAnalysis.cpp LoopUnrollAnalyzer.cpp LoopInfo.cpp LoopPass.cpp diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp new file mode 100644 index 00000000000000..61e53de93151aa --- /dev/null +++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp @@ -0,0 +1,296 @@ +//===- LoopNestAnalysis.cpp - Loop Nest Analysis --------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// The implementation for the loop nest analysis. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopNestAnalysis.h" +#include "llvm/ADT/BreadthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/ValueTracking.h" + +using namespace llvm; + +#define DEBUG_TYPE "loopnest" +#ifndef NDEBUG +static const char *VerboseDebug = DEBUG_TYPE "-verbose"; +#endif + +/// Determine whether the loops structure violates basic requirements for +/// perfect nesting: +/// - the inner loop should be the outer loop's only child +/// - the outer loop header should 'flow' into the inner loop preheader +/// or jump around the inner loop to the outer loop latch +/// - if the inner loop latch exits the inner loop, it should 'flow' into +/// the outer loop latch. +/// Returns true if the loop structure satisfies the basic requirements and +/// false otherwise. +static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, + ScalarEvolution &SE); + +//===----------------------------------------------------------------------===// +// LoopNest implementation +// + +LoopNest::LoopNest(Loop &Root, ScalarEvolution &SE) + : MaxPerfectDepth(getMaxPerfectDepth(Root, SE)) { + for (Loop *L : breadth_first(&Root)) + Loops.push_back(L); +} + +std::unique_ptr LoopNest::getLoopNest(Loop &Root, + ScalarEvolution &SE) { + return std::make_unique(Root, SE); +} + +bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop, + ScalarEvolution &SE) { + assert(!OuterLoop.getSubLoops().empty() && "Outer loop should have subloops"); + assert(InnerLoop.getParentLoop() && "Inner loop should have a parent"); + LLVM_DEBUG(dbgs() << "Checking whether loop '" << OuterLoop.getName() + << "' and '" << InnerLoop.getName() + << "' are perfectly nested.\n"); + + // Determine whether the loops structure satisfies the following requirements: + // - the inner loop should be the outer loop's only child + // - the outer loop header should 'flow' into the inner loop preheader + // or jump around the inner loop to the outer loop latch + // - if the inner loop latch exits the inner loop, it should 'flow' into + // the outer loop latch. + if (!checkLoopsStructure(OuterLoop, InnerLoop, SE)) { + LLVM_DEBUG(dbgs() << "Not perfectly nested: invalid loop structure.\n"); + return false; + } + + // Bail out if we cannot retrieve the outer loop bounds. + auto OuterLoopLB = OuterLoop.getBounds(SE); + if (OuterLoopLB == None) { + LLVM_DEBUG(dbgs() << "Cannot compute loop bounds of OuterLoop: " + << OuterLoop << "\n";); + return false; + } + + // Identify the outer loop latch comparison instruction. + const BasicBlock *Latch = OuterLoop.getLoopLatch(); + assert(Latch && "Expecting a valid loop latch"); + const BranchInst *BI = dyn_cast(Latch->getTerminator()); + assert(BI && BI->isConditional() && + "Expecting loop latch terminator to be a branch instruction"); + + const CmpInst *OuterLoopLatchCmp = dyn_cast(BI->getCondition()); + DEBUG_WITH_TYPE( + VerboseDebug, if (OuterLoopLatchCmp) { + dbgs() << "Outer loop latch compare instruction: " << *OuterLoopLatchCmp + << "\n"; + }); + + // Identify the inner loop guard instruction. + BranchInst *InnerGuard = InnerLoop.getLoopGuardBranch(); + const CmpInst *InnerLoopGuardCmp = + (InnerGuard) ? dyn_cast(InnerGuard->getCondition()) : nullptr; + + DEBUG_WITH_TYPE( + VerboseDebug, if (InnerLoopGuardCmp) { + dbgs() << "Inner loop guard compare instruction: " << *InnerLoopGuardCmp + << "\n"; + }); + + // Determine whether instructions in a basic block are one of: + // - the inner loop guard comparison + // - the outer loop latch comparison + // - the outer loop induction variable increment + // - a phi node, a cast or a branch + auto containsOnlySafeInstructions = [&](const BasicBlock &BB) { + return llvm::all_of(BB, [&](const Instruction &I) { + bool isAllowed = isSafeToSpeculativelyExecute(&I) || isa(I) || + isa(I); + if (!isAllowed) { + DEBUG_WITH_TYPE(VerboseDebug, { + dbgs() << "Instruction: " << I << "\nin basic block: " << BB + << " is considered unsafe.\n"; + }); + return false; + } + + // The only binary instruction allowed is the outer loop step instruction, + // the only comparison instructions allowed are the inner loop guard + // compare instruction and the outer loop latch compare instruction. + if ((isa(I) && &I != &OuterLoopLB->getStepInst()) || + (isa(I) && &I != OuterLoopLatchCmp && + &I != InnerLoopGuardCmp)) { + DEBUG_WITH_TYPE(VerboseDebug, { + dbgs() << "Instruction: " << I << "\nin basic block:" << BB + << "is unsafe.\n"; + }); + return false; + } + return true; + }); + }; + + // Check the code surrounding the inner loop for instructions that are deemed + // unsafe. + const BasicBlock *OuterLoopHeader = OuterLoop.getHeader(); + const BasicBlock *OuterLoopLatch = OuterLoop.getLoopLatch(); + const BasicBlock *InnerLoopPreHeader = InnerLoop.getLoopPreheader(); + + if (!containsOnlySafeInstructions(*OuterLoopHeader) || + !containsOnlySafeInstructions(*OuterLoopLatch) || + (InnerLoopPreHeader != OuterLoopHeader && + !containsOnlySafeInstructions(*InnerLoopPreHeader)) || + !containsOnlySafeInstructions(*InnerLoop.getExitBlock())) { + LLVM_DEBUG(dbgs() << "Not perfectly nested: code surrounding inner loop is " + "unsafe\n";); + return false; + } + + LLVM_DEBUG(dbgs() << "Loop '" << OuterLoop.getName() << "' and '" + << InnerLoop.getName() << "' are perfectly nested.\n"); + + return true; +} + +SmallVector +LoopNest::getPerfectLoops(ScalarEvolution &SE) const { + SmallVector LV; + LoopVectorTy PerfectNest; + + for (Loop *L : depth_first(const_cast(Loops.front()))) { + if (PerfectNest.empty()) + PerfectNest.push_back(L); + + auto &SubLoops = L->getSubLoops(); + if (SubLoops.size() == 1 && arePerfectlyNested(*L, *SubLoops.front(), SE)) { + PerfectNest.push_back(SubLoops.front()); + } else { + LV.push_back(PerfectNest); + PerfectNest.clear(); + } + } + + return LV; +} + +unsigned LoopNest::getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE) { + LLVM_DEBUG(dbgs() << "Get maximum perfect depth of loop nest rooted by loop '" + << Root.getName() << "'\n"); + + const Loop *CurrentLoop = &Root; + const auto *SubLoops = &CurrentLoop->getSubLoops(); + unsigned CurrentDepth = 1; + + while (SubLoops->size() == 1) { + const Loop *InnerLoop = SubLoops->front(); + if (!arePerfectlyNested(*CurrentLoop, *InnerLoop, SE)) { + LLVM_DEBUG({ + dbgs() << "Not a perfect nest: loop '" << CurrentLoop->getName() + << "' is not perfectly nested with loop '" + << InnerLoop->getName() << "'\n"; + }); + break; + } + + CurrentLoop = InnerLoop; + SubLoops = &CurrentLoop->getSubLoops(); + ++CurrentDepth; + } + + return CurrentDepth; +} + +static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, + ScalarEvolution &SE) { + // The inner loop must be the only outer loop's child. + if ((OuterLoop.getSubLoops().size() != 1) || + (InnerLoop.getParentLoop() != &OuterLoop)) + return false; + + // We expect loops in normal form which have a preheader, header, latch... + if (!OuterLoop.isLoopSimplifyForm() || !InnerLoop.isLoopSimplifyForm()) + return false; + + const BasicBlock *OuterLoopHeader = OuterLoop.getHeader(); + const BasicBlock *OuterLoopLatch = OuterLoop.getLoopLatch(); + const BasicBlock *InnerLoopPreHeader = InnerLoop.getLoopPreheader(); + const BasicBlock *InnerLoopLatch = InnerLoop.getLoopLatch(); + const BasicBlock *InnerLoopExit = InnerLoop.getExitBlock(); + + // We expect rotated loops. The inner loop should have a single exit block. + if (OuterLoop.getExitingBlock() != OuterLoopLatch || + InnerLoop.getExitingBlock() != InnerLoopLatch || !InnerLoopExit) + return false; + + // Ensure the only branch that may exist between the loops is the inner loop + // guard. + if (OuterLoopHeader != InnerLoopPreHeader) { + const BranchInst *BI = + dyn_cast(OuterLoopHeader->getTerminator()); + + if (!BI || BI != InnerLoop.getLoopGuardBranch()) + return false; + + // The successors of the inner loop guard should be the inner loop + // preheader and the outer loop latch. + for (const BasicBlock *Succ : BI->successors()) { + if (Succ == InnerLoopPreHeader) + continue; + if (Succ == OuterLoopLatch) + continue; + + DEBUG_WITH_TYPE(VerboseDebug, { + dbgs() << "Inner loop guard successor " << Succ->getName() + << " doesn't lead to inner loop preheader or " + "outer loop latch.\n"; + }); + return false; + } + } + + // Ensure the inner loop exit block leads to the outer loop latch. + if (InnerLoopExit->getSingleSuccessor() != OuterLoopLatch) { + DEBUG_WITH_TYPE( + VerboseDebug, + dbgs() << "Inner loop exit block " << *InnerLoopExit + << " does not directly lead to the outer loop latch.\n";); + return false; + } + + return true; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const LoopNest &LN) { + OS << "IsPerfect="; + if (LN.getMaxPerfectDepth() == LN.getNestDepth()) + OS << "true"; + else + OS << "false"; + OS << ", Depth=" << LN.getNestDepth(); + OS << ", OutermostLoop: " << LN.getOutermostLoop().getName(); + OS << ", Loops: ( "; + for (const Loop *L : LN.getLoops()) + OS << L->getName() << " "; + OS << ")"; + + return OS; +} + +//===----------------------------------------------------------------------===// +// LoopNestPrinterPass implementation +// + +PreservedAnalyses LoopNestPrinterPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + if (auto LN = LoopNest::getLoopNest(L, AR.SE)) + OS << *LN << "\n"; + + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp index 911d39d9a2637b..678d66f632a8ab 100644 --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -195,15 +195,19 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, return true; } -// Like isFunctionHotInCallGraph but for a given cutoff. -bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile( +template +bool ProfileSummaryInfo::isFunctionHotOrColdInCallGraphNthPercentile( int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) { if (!F || !computeSummary()) return false; - if (auto FunctionCount = F->getEntryCount()) - if (isHotCountNthPercentile(PercentileCutoff, FunctionCount.getCount())) + if (auto FunctionCount = F->getEntryCount()) { + if (isHot && + isHotCountNthPercentile(PercentileCutoff, FunctionCount.getCount())) return true; - + if (!isHot && + !isColdCountNthPercentile(PercentileCutoff, FunctionCount.getCount())) + return false; + } if (hasSampleProfile()) { uint64_t TotalCallCount = 0; for (const auto &BB : *F) @@ -211,13 +215,31 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile( if (isa(I) || isa(I)) if (auto CallCount = getProfileCount(&I, nullptr)) TotalCallCount += CallCount.getValue(); - if (isHotCountNthPercentile(PercentileCutoff, TotalCallCount)) + if (isHot && isHotCountNthPercentile(PercentileCutoff, TotalCallCount)) return true; + if (!isHot && !isColdCountNthPercentile(PercentileCutoff, TotalCallCount)) + return false; } - for (const auto &BB : *F) - if (isHotBlockNthPercentile(PercentileCutoff, &BB, &BFI)) + for (const auto &BB : *F) { + if (isHot && isHotBlockNthPercentile(PercentileCutoff, &BB, &BFI)) return true; - return false; + if (!isHot && !isColdBlockNthPercentile(PercentileCutoff, &BB, &BFI)) + return false; + } + return !isHot; +} + +// Like isFunctionHotInCallGraph but for a given cutoff. +bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile( + int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) { + return isFunctionHotOrColdInCallGraphNthPercentile( + PercentileCutoff, F, BFI); +} + +bool ProfileSummaryInfo::isFunctionColdInCallGraphNthPercentile( + int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) { + return isFunctionHotOrColdInCallGraphNthPercentile( + PercentileCutoff, F, BFI); } /// Returns true if the function's entry is a cold. If it returns false, it @@ -299,9 +321,22 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) { return ColdCountThreshold && C <= ColdCountThreshold.getValue(); } -bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, uint64_t C) { +template +bool ProfileSummaryInfo::isHotOrColdCountNthPercentile(int PercentileCutoff, + uint64_t C) { auto CountThreshold = computeThreshold(PercentileCutoff); - return CountThreshold && C >= CountThreshold.getValue(); + if (isHot) + return CountThreshold && C >= CountThreshold.getValue(); + else + return CountThreshold && C <= CountThreshold.getValue(); +} + +bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, uint64_t C) { + return isHotOrColdCountNthPercentile(PercentileCutoff, C); +} + +bool ProfileSummaryInfo::isColdCountNthPercentile(int PercentileCutoff, uint64_t C) { + return isHotOrColdCountNthPercentile(PercentileCutoff, C); } uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() { @@ -327,11 +362,27 @@ bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB, return Count && isColdCount(*Count); } +template +bool ProfileSummaryInfo::isHotOrColdBlockNthPercentile(int PercentileCutoff, + const BasicBlock *BB, + BlockFrequencyInfo *BFI) { + auto Count = BFI->getBlockProfileCount(BB); + if (isHot) + return Count && isHotCountNthPercentile(PercentileCutoff, *Count); + else + return Count && isColdCountNthPercentile(PercentileCutoff, *Count); +} + bool ProfileSummaryInfo::isHotBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) { - auto Count = BFI->getBlockProfileCount(BB); - return Count && isHotCountNthPercentile(PercentileCutoff, *Count); + return isHotOrColdBlockNthPercentile(PercentileCutoff, BB, BFI); +} + +bool ProfileSummaryInfo::isColdBlockNthPercentile(int PercentileCutoff, + const BasicBlock *BB, + BlockFrequencyInfo *BFI) { + return isHotOrColdBlockNthPercentile(PercentileCutoff, BB, BFI); } bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS, diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index ad74303a784da4..1279e936607ecc 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -7665,9 +7665,6 @@ bool LLParser::ParseTypeTestResolution(TypeTestResolution &TTRes) { return true; switch (Lex.getKind()) { - case lltok::kw_unknown: - TTRes.TheKind = TypeTestResolution::Unknown; - break; case lltok::kw_unsat: TTRes.TheKind = TypeTestResolution::Unsat; break; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index def2dc0e0889ed..134ef74b27047c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1045,6 +1045,8 @@ void DwarfUnit::constructTemplateTypeParameterDIE( addType(ParamDIE, TP->getType()); if (!TP->getName().empty()) addString(ParamDIE, dwarf::DW_AT_name, TP->getName()); + if (TP->isDefault() && (DD->getDwarfVersion() >= 5)) + addFlag(ParamDIE, dwarf::DW_AT_default_value); } void DwarfUnit::constructTemplateValueParameterDIE( @@ -1057,6 +1059,8 @@ void DwarfUnit::constructTemplateValueParameterDIE( addType(ParamDIE, VP->getType()); if (!VP->getName().empty()) addString(ParamDIE, dwarf::DW_AT_name, VP->getName()); + if (VP->isDefault() && (DD->getDwarfVersion() >= 5)) + addFlag(ParamDIE, dwarf::DW_AT_default_value); if (Metadata *Val = VP->getValue()) { if (ConstantInt *CI = mdconst::dyn_extract(Val)) addConstantValue(ParamDIE, CI, VP->getType()); diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index a1adf4ef9820cd..d0dd538f1f525a 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -76,7 +76,7 @@ class MemCmpExpansion { IRBuilder<> Builder; // Represents the decomposition in blocks of the expansion. For example, // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and - // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}. + // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}. struct LoadEntry { LoadEntry(unsigned LoadSize, uint64_t Offset) : LoadSize(LoadSize), Offset(Offset) { diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index 1c4a668e5f316c..9aac47ecb35078 100644 --- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" @@ -40,60 +41,6 @@ void Localizer::init(MachineFunction &MF) { TTI = &getAnalysis().getTTI(MF.getFunction()); } -bool Localizer::shouldLocalize(const MachineInstr &MI) { - // Assuming a spill and reload of a value has a cost of 1 instruction each, - // this helper function computes the maximum number of uses we should consider - // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We - // break even in terms of code size when the original MI has 2 users vs - // choosing to potentially spill. Any more than 2 users we we have a net code - // size increase. This doesn't take into account register pressure though. - auto maxUses = [](unsigned RematCost) { - // A cost of 1 means remats are basically free. - if (RematCost == 1) - return UINT_MAX; - if (RematCost == 2) - return 2U; - - // Remat is too expensive, only sink if there's one user. - if (RematCost > 2) - return 1U; - llvm_unreachable("Unexpected remat cost"); - }; - - // Helper to walk through uses and terminate if we've reached a limit. Saves - // us spending time traversing uses if all we want to know is if it's >= min. - auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) { - unsigned NumUses = 0; - auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end(); - for (; UI != UE && NumUses < MaxUses; ++UI) { - NumUses++; - } - // If we haven't reached the end yet then there are more than MaxUses users. - return UI == UE; - }; - - switch (MI.getOpcode()) { - default: - return false; - // Constants-like instructions should be close to their users. - // We don't want long live-ranges for them. - case TargetOpcode::G_CONSTANT: - case TargetOpcode::G_FCONSTANT: - case TargetOpcode::G_FRAME_INDEX: - case TargetOpcode::G_INTTOPTR: - return true; - case TargetOpcode::G_GLOBAL_VALUE: { - unsigned RematCost = TTI->getGISelRematGlobalCost(); - Register Reg = MI.getOperand(0).getReg(); - unsigned MaxUses = maxUses(RematCost); - if (MaxUses == UINT_MAX) - return true; // Remats are "free" so always localize. - bool B = isUsesAtMost(Reg, MaxUses); - return B; - } - } -} - void Localizer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); getSelectionDAGFallbackAnalysisUsage(AU); @@ -119,9 +66,10 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, // we only localize instructions in the entry block here. This might change if // we start doing CSE across blocks. auto &MBB = MF.front(); + auto &TL = *MF.getSubtarget().getTargetLowering(); for (auto RI = MBB.rbegin(), RE = MBB.rend(); RI != RE; ++RI) { MachineInstr &MI = *RI; - if (!shouldLocalize(MI)) + if (!TL.shouldLocalize(MI, TTI)) continue; LLVM_DEBUG(dbgs() << "Should localize: " << MI); assert(MI.getDesc().getNumDefs() == 1 && diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues.cpp index ad54378d9edc6d..4d0c2462b7d3ca 100644 --- a/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -138,7 +138,10 @@ struct LocIndex { return (static_cast(Location) << 32) | Index; } - static LocIndex fromRawInteger(uint64_t ID) { + template static LocIndex fromRawInteger(IntT ID) { + static_assert(std::is_unsigned::value && + sizeof(ID) == sizeof(uint64_t), + "Cannot convert raw integer to LocIndex"); return {static_cast(ID >> 32), static_cast(ID)}; } diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 2a220c02613c81..c20c1552377dcc 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -750,10 +750,10 @@ bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) { if (Token.isNot(MIToken::IntegerLiteral) && Token.isNot(MIToken::HexLiteral)) return error("expected a lane mask"); - static_assert(sizeof(LaneBitmask::Type) == sizeof(unsigned), + static_assert(sizeof(LaneBitmask::Type) == sizeof(uint64_t), "Use correct get-function for lane mask"); LaneBitmask::Type V; - if (getUnsigned(V)) + if (getUint64(V)) return error("invalid lane mask value"); Mask = LaneBitmask(V); lex(); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index ae11d7c5dfee9a..eafa0974a7357f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3864,33 +3864,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // the comparison operands is infinity or negative infinity, convert the // condition to a less-awkward <= or >=. if (CFP->getValueAPF().isInfinity()) { - if (CFP->getValueAPF().isNegative()) { - if (Cond == ISD::SETOEQ && - isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE); - if (Cond == ISD::SETUEQ && - isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE); - if (Cond == ISD::SETUNE && - isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT); - if (Cond == ISD::SETONE && - isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT); - } else { - if (Cond == ISD::SETOEQ && - isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE); - if (Cond == ISD::SETUEQ && - isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE); - if (Cond == ISD::SETUNE && - isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT); - if (Cond == ISD::SETONE && - isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT); + bool IsNegInf = CFP->getValueAPF().isNegative(); + ISD::CondCode NewCond = ISD::SETCC_INVALID; + switch (Cond) { + case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break; + case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break; + case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break; + case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break; + default: break; } + if (NewCond != ISD::SETCC_INVALID && + isCondCodeLegal(NewCond, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, NewCond); } } } diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 95c63d09718cfc..436857d6b2150b 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -2072,3 +2073,64 @@ TargetLoweringBase::getAtomicMemOperandFlags(const Instruction &AI, Flags |= getTargetMMOFlags(AI); return Flags; } + +//===----------------------------------------------------------------------===// +// GlobalISel Hooks +//===----------------------------------------------------------------------===// + +bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI, + const TargetTransformInfo *TTI) const { + auto &MF = *MI.getMF(); + auto &MRI = MF.getRegInfo(); + // Assuming a spill and reload of a value has a cost of 1 instruction each, + // this helper function computes the maximum number of uses we should consider + // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We + // break even in terms of code size when the original MI has 2 users vs + // choosing to potentially spill. Any more than 2 users we we have a net code + // size increase. This doesn't take into account register pressure though. + auto maxUses = [](unsigned RematCost) { + // A cost of 1 means remats are basically free. + if (RematCost == 1) + return UINT_MAX; + if (RematCost == 2) + return 2U; + + // Remat is too expensive, only sink if there's one user. + if (RematCost > 2) + return 1U; + llvm_unreachable("Unexpected remat cost"); + }; + + // Helper to walk through uses and terminate if we've reached a limit. Saves + // us spending time traversing uses if all we want to know is if it's >= min. + auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) { + unsigned NumUses = 0; + auto UI = MRI.use_instr_nodbg_begin(Reg), UE = MRI.use_instr_nodbg_end(); + for (; UI != UE && NumUses < MaxUses; ++UI) { + NumUses++; + } + // If we haven't reached the end yet then there are more than MaxUses users. + return UI == UE; + }; + + switch (MI.getOpcode()) { + default: + return false; + // Constants-like instructions should be close to their users. + // We don't want long live-ranges for them. + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_FRAME_INDEX: + case TargetOpcode::G_INTTOPTR: + return true; + case TargetOpcode::G_GLOBAL_VALUE: { + unsigned RematCost = TTI->getGISelRematGlobalCost(); + Register Reg = MI.getOperand(0).getReg(); + unsigned MaxUses = maxUses(RematCost); + if (MaxUses == UINT_MAX) + return true; // Remats are "free" so always localize. + bool B = isUsesAtMost(Reg, MaxUses); + return B; + } + } +} diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp index 8cb259ebc6222b..2e06e14ee3be30 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp @@ -18,7 +18,8 @@ using namespace dwarf; void DWARFDebugMacro::dump(raw_ostream &OS) const { unsigned IndLevel = 0; for (const auto &Macros : MacroLists) { - for (const Entry &E : Macros) { + OS << format("0x%08" PRIx64 ":\n", Macros.Offset); + for (const Entry &E : Macros.Macros) { // There should not be DW_MACINFO_end_file when IndLevel is Zero. However, // this check handles the case of corrupted ".debug_macinfo" section. if (IndLevel > 0) @@ -51,7 +52,6 @@ void DWARFDebugMacro::dump(raw_ostream &OS) const { } OS << "\n"; } - OS << "\n"; } } @@ -62,15 +62,17 @@ void DWARFDebugMacro::parse(DataExtractor data) { if (!M) { MacroLists.emplace_back(); M = &MacroLists.back(); + M->Offset = Offset; } // A macro list entry consists of: - M->emplace_back(); - Entry &E = M->back(); + M->Macros.emplace_back(); + Entry &E = M->Macros.back(); // 1. Macinfo type E.Type = data.getULEB128(&Offset); if (E.Type == 0) { // Reached end of a ".debug_macinfo" section contribution. + M = nullptr; continue; } diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp index 3a84ac41e86cad..091f1af9d11a40 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -29,7 +29,13 @@ uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) { llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); llvm::StringRef filename = llvm::sys::path::filename(Path, Style); - FileEntry FE(insertString(directory), insertString(filename)); + // We must insert the strings first, then call the FileEntry constructor. + // If we inline the insertString() function call into the constructor, the + // call order is undefined due to parameter lists not having any ordering + // requirements. + const uint32_t Dir = insertString(directory); + const uint32_t Base = insertString(filename); + FileEntry FE(Dir, Base); std::lock_guard Guard(Mutex); const auto NextIndex = Files.size(); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 7c0b79fcabd93d..c2added23de80d 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -2780,8 +2780,6 @@ static const char *getWholeProgDevirtResByArgKindName( static const char *getTTResKindName(TypeTestResolution::Kind K) { switch (K) { - case TypeTestResolution::Unknown: - return "unknown"; case TypeTestResolution::Unsat: return "unsat"; case TypeTestResolution::ByteArray: diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index b630a2893b4d93..bd8829e92656e3 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -660,12 +660,7 @@ DISubprogram *DISubprogram::getImpl( bool DISubprogram::describes(const Function *F) const { assert(F && "Invalid function"); - if (F->getSubprogram() == this) - return true; - StringRef Name = getLinkageName(); - if (Name.empty()) - Name = getName(); - return F->getName() == Name; + return F->getSubprogram() == this; } DILexicalBlock *DILexicalBlock::getImpl(LLVMContext &Context, Metadata *Scope, diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index cf0eac90865249..eaabd553e95b48 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2364,8 +2364,7 @@ void Verifier::visitFunction(const Function &F) { if (!HasDebugInfo) return; - // Check that all !dbg attachments lead to back to N (or, at least, another - // subprogram that describes the same function). + // Check that all !dbg attachments lead to back to N. // // FIXME: Check this incrementally while visiting !dbg attachments. // FIXME: Only check when N is the canonical subprogram for F. @@ -2394,11 +2393,9 @@ void Verifier::visitFunction(const Function &F) { if (SP && ((Scope != SP) && !Seen.insert(SP).second)) return; - // FIXME: Once N is canonical, check "SP == &N". AssertDI(SP->describes(&F), "!dbg attachment points at wrong subprogram for function", N, &F, &I, DL, Scope, SP); - visitMDNode(*SP); }; for (auto &BB : F) for (auto &I : BB) { diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index ec57744cf4803d..b749909d7871a1 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -61,8 +61,10 @@ Error Config::addSaveTemps(std::string OutputFileName, std::error_code EC; ResolutionFile = std::make_unique( OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::OF_Text); - if (EC) + if (EC) { + ResolutionFile.reset(); return errorCodeToError(EC); + } auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) { // Keep track of the hook provided by the linker, which also needs to run. diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index 8582d5143aa8b6..b32c9b5fdfacbc 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -285,43 +285,6 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, return IsResolved; } -/// Check if the branch crosses the boundary. -/// -/// \param StartAddr start address of the fused/unfused branch. -/// \param Size size of the fused/unfused branch. -/// \param BoundaryAlignment alignment requirement of the branch. -/// \returns true if the branch cross the boundary. -static bool mayCrossBoundary(uint64_t StartAddr, uint64_t Size, - Align BoundaryAlignment) { - uint64_t EndAddr = StartAddr + Size; - return (StartAddr >> Log2(BoundaryAlignment)) != - ((EndAddr - 1) >> Log2(BoundaryAlignment)); -} - -/// Check if the branch is against the boundary. -/// -/// \param StartAddr start address of the fused/unfused branch. -/// \param Size size of the fused/unfused branch. -/// \param BoundaryAlignment alignment requirement of the branch. -/// \returns true if the branch is against the boundary. -static bool isAgainstBoundary(uint64_t StartAddr, uint64_t Size, - Align BoundaryAlignment) { - uint64_t EndAddr = StartAddr + Size; - return (EndAddr & (BoundaryAlignment.value() - 1)) == 0; -} - -/// Check if the branch needs padding. -/// -/// \param StartAddr start address of the fused/unfused branch. -/// \param Size size of the fused/unfused branch. -/// \param BoundaryAlignment alignment requirement of the branch. -/// \returns true if the branch needs padding. -static bool needPadding(uint64_t StartAddr, uint64_t Size, - Align BoundaryAlignment) { - return mayCrossBoundary(StartAddr, Size, BoundaryAlignment) || - isAgainstBoundary(StartAddr, Size, BoundaryAlignment); -} - uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, const MCFragment &F) const { assert(getBackendPtr() && "Requires assembler backend"); @@ -351,26 +314,8 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, case MCFragment::FT_LEB: return cast(F).getContents().size(); - case MCFragment::FT_BoundaryAlign: { - const MCBoundaryAlignFragment &BF = cast(F); - // MCBoundaryAlignFragment that doesn't emit NOP should have 0 size. - if (!BF.canEmitNops()) - return 0; - - uint64_t AlignedOffset = Layout.getFragmentOffset(&BF); - uint64_t AlignedSize = 0; - const MCFragment *F = BF.getNextNode(); - // If the branch is unfused, it is emitted into one fragment, otherwise it - // is emitted into two fragments at most, the next - // MCBoundaryAlignFragment(if exists) also marks the end of the branch. - for (int I = 0, N = BF.isFused() ? 2 : 1; - I != N && !isa(F); ++I, F = F->getNextNode()) - AlignedSize += computeFragmentSize(Layout, *F); - Align BoundaryAlignment = BF.getAlignment(); - return needPadding(AlignedOffset, AlignedSize, BoundaryAlignment) - ? offsetToAlignment(AlignedOffset, BoundaryAlignment) - : 0U; - } + case MCFragment::FT_BoundaryAlign: + return cast(F).getSize(); case MCFragment::FT_SymbolId: return 4; @@ -1012,6 +957,72 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { return OldSize != LF.getContents().size(); } +/// Check if the branch crosses the boundary. +/// +/// \param StartAddr start address of the fused/unfused branch. +/// \param Size size of the fused/unfused branch. +/// \param BoundaryAlignment alignment requirement of the branch. +/// \returns true if the branch cross the boundary. +static bool mayCrossBoundary(uint64_t StartAddr, uint64_t Size, + Align BoundaryAlignment) { + uint64_t EndAddr = StartAddr + Size; + return (StartAddr >> Log2(BoundaryAlignment)) != + ((EndAddr - 1) >> Log2(BoundaryAlignment)); +} + +/// Check if the branch is against the boundary. +/// +/// \param StartAddr start address of the fused/unfused branch. +/// \param Size size of the fused/unfused branch. +/// \param BoundaryAlignment alignment requirement of the branch. +/// \returns true if the branch is against the boundary. +static bool isAgainstBoundary(uint64_t StartAddr, uint64_t Size, + Align BoundaryAlignment) { + uint64_t EndAddr = StartAddr + Size; + return (EndAddr & (BoundaryAlignment.value() - 1)) == 0; +} + +/// Check if the branch needs padding. +/// +/// \param StartAddr start address of the fused/unfused branch. +/// \param Size size of the fused/unfused branch. +/// \param BoundaryAlignment alignment requirement of the branch. +/// \returns true if the branch needs padding. +static bool needPadding(uint64_t StartAddr, uint64_t Size, + Align BoundaryAlignment) { + return mayCrossBoundary(StartAddr, Size, BoundaryAlignment) || + isAgainstBoundary(StartAddr, Size, BoundaryAlignment); +} + +bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout, + MCBoundaryAlignFragment &BF) { + // The MCBoundaryAlignFragment that doesn't emit NOP should not be relaxed. + if (!BF.canEmitNops()) + return false; + + uint64_t AlignedOffset = Layout.getFragmentOffset(BF.getNextNode()); + uint64_t AlignedSize = 0; + const MCFragment *F = BF.getNextNode(); + // If the branch is unfused, it is emitted into one fragment, otherwise it is + // emitted into two fragments at most, the next MCBoundaryAlignFragment(if + // exists) also marks the end of the branch. + for (auto i = 0, N = BF.isFused() ? 2 : 1; + i != N && !isa(F); ++i, F = F->getNextNode()) { + AlignedSize += computeFragmentSize(Layout, *F); + } + uint64_t OldSize = BF.getSize(); + AlignedOffset -= OldSize; + Align BoundaryAlignment = BF.getAlignment(); + uint64_t NewSize = needPadding(AlignedOffset, AlignedSize, BoundaryAlignment) + ? offsetToAlignment(AlignedOffset, BoundaryAlignment) + : 0U; + if (NewSize == OldSize) + return false; + BF.setSize(NewSize); + Layout.invalidateFragmentsFrom(&BF); + return true; +} + bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF) { MCContext &Context = Layout.getAssembler().getContext(); @@ -1112,6 +1123,8 @@ bool MCAssembler::relaxFragment(MCAsmLayout &Layout, MCFragment &F) { cast(F)); case MCFragment::FT_LEB: return relaxLEB(Layout, cast(F)); + case MCFragment::FT_BoundaryAlign: + return relaxBoundaryAlign(Layout, cast(F)); case MCFragment::FT_CVInlineLines: return relaxCVInlineLineTable(Layout, cast(F)); case MCFragment::FT_CVDefRange: @@ -1127,11 +1140,11 @@ bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSection &Sec) { MCFragment *FirstRelaxedFragment = nullptr; // Attempt to relax all the fragments in the section. - for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) { + for (MCFragment &Frag : Sec) { // Check if this is a fragment that needs relaxation. - bool RelaxedFrag = relaxFragment(Layout, *I); + bool RelaxedFrag = relaxFragment(Layout, Frag); if (RelaxedFrag && !FirstRelaxedFragment) - FirstRelaxedFragment = &*I; + FirstRelaxedFragment = &Frag; } if (FirstRelaxedFragment) { Layout.invalidateFragmentsFrom(FirstRelaxedFragment); @@ -1144,8 +1157,7 @@ bool MCAssembler::layoutOnce(MCAsmLayout &Layout) { ++stats::RelaxationSteps; bool WasRelaxed = false; - for (iterator it = begin(), ie = end(); it != ie; ++it) { - MCSection &Sec = *it; + for (MCSection &Sec : *this) { while (layoutSectionOnce(Layout, Sec)) WasRelaxed = true; } diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp index 42ba3b40c51f98..a96b8e86aed3c7 100644 --- a/llvm/lib/MC/MCFragment.cpp +++ b/llvm/lib/MC/MCFragment.cpp @@ -431,7 +431,8 @@ LLVM_DUMP_METHOD void MCFragment::dump() const { else OS << " unfused branch)"; OS << "\n "; - OS << " BoundarySize:" << BF->getAlignment().value(); + OS << " BoundarySize:" << BF->getAlignment().value() + << " Size:" << BF->getSize(); break; } case MCFragment::FT_SymbolId: { diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index eb5b3a61fa89c6..e0ef37a9562b92 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopCacheAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" @@ -761,12 +762,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, } MPM.addPass(AttributorPass()); - // Lower type metadata and the type.test intrinsic in the ThinLTO - // post link pipeline after ICP. This is to enable usage of the type - // tests in ICP sequences. - if (Phase == ThinLTOPhase::PostLink) - MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); - // Interprocedural constant propagation now that basic cleanup has occurred // and prior to optimizing globals. // FIXME: This position in the pipeline hasn't been carefully considered in @@ -1211,9 +1206,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // metadata and intrinsics. MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); - // Run a second time to clean up any type tests left behind by WPD for use - // in ICP. - MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); return MPM; } @@ -1280,10 +1272,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // The LowerTypeTestsPass needs to run to lower type metadata and the // type.test intrinsics. The pass does nothing if CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); - // Run a second time to clean up any type tests left behind by WPD for use - // in ICP (which is performed earlier than this in the regular LTO - // pipeline). - MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); return MPM; } @@ -1411,9 +1399,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // to be run at link time if CFI is enabled. This pass does nothing if // CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); - // Run a second time to clean up any type tests left behind by WPD for use - // in ICP (which is performed earlier than this in the regular LTO pipeline). - MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); // Enable splitting late in the FullLTO post-link pipeline. This is done in // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses). diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 12f4f85548f319..056e8833ab83e4 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -325,6 +325,7 @@ LOOP_PASS("unroll-full", LoopFullUnrollPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) LOOP_PASS("print", IVUsersPrinterPass(dbgs())) +LOOP_PASS("print", LoopNestPrinterPass(dbgs())) LOOP_PASS("print", LoopCachePrinterPass(dbgs())) LOOP_PASS("loop-predication", LoopPredicationPass()) LOOP_PASS("guard-widening", GuardWideningPass()) diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp index 227b12bea5c55c..b75738bc360ce3 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -776,33 +776,35 @@ static const char *TestingFormatMagic = "llvmcovmtestdata"; Expected> BinaryCoverageReader::createCoverageReaderFromBuffer( - StringRef Coverage, StringRef FuncRecords, InstrProfSymtab &&ProfileNames, + StringRef Coverage, std::string &&FuncRecords, InstrProfSymtab &&ProfileNames, uint8_t BytesInAddress, support::endianness Endian) { - std::unique_ptr Reader(new BinaryCoverageReader()); + std::unique_ptr Reader( + new BinaryCoverageReader(std::move(FuncRecords))); Reader->ProfileNames = std::move(ProfileNames); + StringRef FuncRecordsRef = Reader->FuncRecords; if (BytesInAddress == 4 && Endian == support::endianness::little) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, FuncRecords, + Reader->ProfileNames, Coverage, FuncRecordsRef, Reader->MappingRecords, Reader->Filenames, Reader->Decompressed)) return std::move(E); } else if (BytesInAddress == 4 && Endian == support::endianness::big) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, FuncRecords, Reader->MappingRecords, - Reader->Filenames, Reader->Decompressed)) + Reader->ProfileNames, Coverage, FuncRecordsRef, + Reader->MappingRecords, Reader->Filenames, Reader->Decompressed)) return std::move(E); } else if (BytesInAddress == 8 && Endian == support::endianness::little) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, FuncRecords, + Reader->ProfileNames, Coverage, FuncRecordsRef, Reader->MappingRecords, Reader->Filenames, Reader->Decompressed)) return std::move(E); } else if (BytesInAddress == 8 && Endian == support::endianness::big) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, FuncRecords, Reader->MappingRecords, - Reader->Filenames, Reader->Decompressed)) + Reader->ProfileNames, Coverage, FuncRecordsRef, + Reader->MappingRecords, Reader->Filenames, Reader->Decompressed)) return std::move(E); } else return make_error(coveragemap_error::malformed); @@ -846,7 +848,10 @@ loadTestingFormat(StringRef Data) { CoverageMapping, "", std::move(ProfileNames), BytesInAddress, Endian); } -static Expected lookupSection(ObjectFile &OF, StringRef Name) { +/// Find all sections that match \p Name. There may be more than one if comdats +/// are in use, e.g. for the __llvm_covfun section on ELF. +static Expected> lookupSections(ObjectFile &OF, + StringRef Name) { // On COFF, the object file section name may end in "$M". This tells the // linker to sort these sections between "$A" and "$Z". The linker removes the // dollar and everything after it in the final binary. Do the same to match. @@ -856,14 +861,17 @@ static Expected lookupSection(ObjectFile &OF, StringRef Name) { }; Name = stripSuffix(Name); + std::vector Sections; for (const auto &Section : OF.sections()) { Expected NameOrErr = Section.getName(); if (!NameOrErr) return NameOrErr.takeError(); if (stripSuffix(*NameOrErr) == Name) - return Section; + Sections.push_back(Section); } - return make_error(coveragemap_error::no_data_found); + if (Sections.empty()) + return make_error(coveragemap_error::no_data_found); + return Sections; } static Expected> @@ -895,41 +903,51 @@ loadBinaryFormat(std::unique_ptr Bin, StringRef Arch) { // Look for the sections that we are interested in. auto ObjFormat = OF->getTripleObjectFormat(); auto NamesSection = - lookupSection(*OF, getInstrProfSectionName(IPSK_name, ObjFormat, + lookupSections(*OF, getInstrProfSectionName(IPSK_name, ObjFormat, /*AddSegmentInfo=*/false)); if (auto E = NamesSection.takeError()) return std::move(E); auto CoverageSection = - lookupSection(*OF, getInstrProfSectionName(IPSK_covmap, ObjFormat, - /*AddSegmentInfo=*/false)); + lookupSections(*OF, getInstrProfSectionName(IPSK_covmap, ObjFormat, + /*AddSegmentInfo=*/false)); if (auto E = CoverageSection.takeError()) return std::move(E); - auto CoverageMappingOrErr = CoverageSection->getContents(); + std::vector CoverageSectionRefs = *CoverageSection; + if (CoverageSectionRefs.size() != 1) + return make_error(coveragemap_error::malformed); + auto CoverageMappingOrErr = CoverageSectionRefs.back().getContents(); if (!CoverageMappingOrErr) return CoverageMappingOrErr.takeError(); StringRef CoverageMapping = CoverageMappingOrErr.get(); InstrProfSymtab ProfileNames; - if (Error E = ProfileNames.create(*NamesSection)) + std::vector NamesSectionRefs = *NamesSection; + if (NamesSectionRefs.size() != 1) + return make_error(coveragemap_error::malformed); + if (Error E = ProfileNames.create(NamesSectionRefs.back())) return std::move(E); // Look for the coverage records section (Version4 only). - StringRef FuncRecords; - auto CoverageRecordsSection = - lookupSection(*OF, getInstrProfSectionName(IPSK_covfun, ObjFormat, - /*AddSegmentInfo=*/false)); - if (auto E = CoverageRecordsSection.takeError()) + std::string FuncRecords; + auto CoverageRecordsSections = + lookupSections(*OF, getInstrProfSectionName(IPSK_covfun, ObjFormat, + /*AddSegmentInfo=*/false)); + if (auto E = CoverageRecordsSections.takeError()) consumeError(std::move(E)); else { - auto CoverageRecordsOrErr = CoverageRecordsSection->getContents(); - if (!CoverageRecordsOrErr) - return CoverageRecordsOrErr.takeError(); - FuncRecords = CoverageRecordsOrErr.get(); + for (SectionRef Section : *CoverageRecordsSections) { + auto CoverageRecordsOrErr = Section.getContents(); + if (!CoverageRecordsOrErr) + return CoverageRecordsOrErr.takeError(); + FuncRecords += CoverageRecordsOrErr.get(); + while (FuncRecords.size() % 8 != 0) + FuncRecords += '\0'; + } } return BinaryCoverageReader::createCoverageReaderFromBuffer( - CoverageMapping, FuncRecords, std::move(ProfileNames), BytesInAddress, - Endian); + CoverageMapping, std::move(FuncRecords), std::move(ProfileNames), + BytesInAddress, Endian); } Expected>> diff --git a/llvm/lib/Support/APSInt.cpp b/llvm/lib/Support/APSInt.cpp index 7c48880f96eac5..6805e06df33308 100644 --- a/llvm/lib/Support/APSInt.cpp +++ b/llvm/lib/Support/APSInt.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/StringRef.h" +#include using namespace llvm; diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 77b507d20a50eb..75a62f45da3669 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -1,7 +1,7 @@ -if(LLVM_ENABLE_ZLIB) - set(imported_libs ZLIB::ZLIB) +set(system_libs) +if ( LLVM_ENABLE_ZLIB AND HAVE_LIBZ ) + set(system_libs ${system_libs} ${ZLIB_LIBRARIES}) endif() - if( MSVC OR MINGW ) # libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc. # advapi32 required for CryptAcquireContextW in lib/Support/Windows/Path.inc. @@ -186,31 +186,10 @@ add_llvm_component_library(LLVMSupport ${LLVM_MAIN_INCLUDE_DIR}/llvm/ADT ${LLVM_MAIN_INCLUDE_DIR}/llvm/Support ${Backtrace_INCLUDE_DIRS} - LINK_LIBS ${system_libs} ${imported_libs} ${delayload_flags} ${Z3_LINK_FILES} + LINK_LIBS ${system_libs} ${delayload_flags} ${Z3_LINK_FILES} ) -set(llvm_system_libs ${system_libs}) - -if(LLVM_ENABLE_ZLIB) - string(TOUPPER ${CMAKE_BUILD_TYPE} build_type) - get_property(zlib_library TARGET ZLIB::ZLIB PROPERTY LOCATION_${build_type}) - if(NOT zlib_library) - get_property(zlib_library TARGET ZLIB::ZLIB PROPERTY LOCATION) - endif() - get_filename_component(zlib_library ${zlib_library} NAME) - if(CMAKE_STATIC_LIBRARY_PREFIX AND - zlib_library MATCHES "^${CMAKE_STATIC_LIBRARY_PREFIX}.*${CMAKE_STATIC_LIBRARY_SUFFIX}$") - STRING(REGEX REPLACE "^${CMAKE_STATIC_LIBRARY_PREFIX}" "" zlib_library ${zlib_library}) - STRING(REGEX REPLACE "${CMAKE_STATIC_LIBRARY_SUFFIX}$" "" zlib_library ${zlib_library}) - elseif(CMAKE_SHARED_LIBRARY_PREFIX AND - zlib_library MATCHES "^${CMAKE_SHARED_LIBRARY_PREFIX}.*${CMAKE_SHARED_LIBRARY_SUFFIX}$") - STRING(REGEX REPLACE "^${CMAKE_SHARED_LIBRARY_PREFIX}" "" zlib_library ${zlib_library}) - STRING(REGEX REPLACE "${CMAKE_SHARED_LIBRARY_SUFFIX}$" "" zlib_library ${zlib_library}) - endif() - set(llvm_system_libs ${llvm_system_libs} "${zlib_library}") -endif() - -set_property(TARGET LLVMSupport PROPERTY LLVM_SYSTEM_LIBS "${llvm_system_libs}") +set_property(TARGET LLVMSupport PROPERTY LLVM_SYSTEM_LIBS "${system_libs}") if(LLVM_WITH_Z3) target_include_directories(LLVMSupport SYSTEM diff --git a/llvm/lib/Support/CRC.cpp b/llvm/lib/Support/CRC.cpp index 2bc668beed3223..7ff09debe3b7c8 100644 --- a/llvm/lib/Support/CRC.cpp +++ b/llvm/lib/Support/CRC.cpp @@ -25,7 +25,7 @@ using namespace llvm; -#if !LLVM_ENABLE_ZLIB +#if LLVM_ENABLE_ZLIB == 0 || !HAVE_ZLIB_H static const uint32_t CRCTable[256] = { 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp index 4165a2740cd030..97d5ffaadf8273 100644 --- a/llvm/lib/Support/Compression.cpp +++ b/llvm/lib/Support/Compression.cpp @@ -17,13 +17,13 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#if LLVM_ENABLE_ZLIB +#if LLVM_ENABLE_ZLIB == 1 && HAVE_ZLIB_H #include #endif using namespace llvm; -#if LLVM_ENABLE_ZLIB +#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ static Error createError(StringRef Err) { return make_error(Err, inconvertibleErrorCode()); } diff --git a/llvm/lib/Support/FileUtilities.cpp b/llvm/lib/Support/FileUtilities.cpp index d11fbb54dc0d85..b95f92c86e996a 100644 --- a/llvm/lib/Support/FileUtilities.cpp +++ b/llvm/lib/Support/FileUtilities.cpp @@ -318,9 +318,8 @@ llvm::Error llvm::writeFileAtomically( atomic_write_error::output_stream_error); } - if (const std::error_code Error = - sys::fs::rename(/*from=*/GeneratedUniqPath.c_str(), - /*to=*/FinalPath.str().c_str())) { + if (sys::fs::rename(/*from=*/GeneratedUniqPath.c_str(), + /*to=*/FinalPath.str().c_str())) { return llvm::make_error( atomic_write_error::failed_to_rename_temp_file); } diff --git a/llvm/lib/Support/FormatVariadic.cpp b/llvm/lib/Support/FormatVariadic.cpp index f9e89f69b528cd..0d61fae223239d 100644 --- a/llvm/lib/Support/FormatVariadic.cpp +++ b/llvm/lib/Support/FormatVariadic.cpp @@ -6,6 +6,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/FormatVariadic.h" +#include using namespace llvm; diff --git a/llvm/lib/Support/IntEqClasses.cpp b/llvm/lib/Support/IntEqClasses.cpp index 4a976dcefc65fc..ebb02e6c01e521 100644 --- a/llvm/lib/Support/IntEqClasses.cpp +++ b/llvm/lib/Support/IntEqClasses.cpp @@ -18,6 +18,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/IntEqClasses.h" +#include using namespace llvm; diff --git a/llvm/lib/Support/IntervalMap.cpp b/llvm/lib/Support/IntervalMap.cpp index f15c7c9403c36f..674e0f962fa1b2 100644 --- a/llvm/lib/Support/IntervalMap.cpp +++ b/llvm/lib/Support/IntervalMap.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/IntervalMap.h" +#include namespace llvm { namespace IntervalMapImpl { diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index 8f3f4aa8caeaf2..2f1cff7914bca8 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/KnownBits.h" +#include using namespace llvm; diff --git a/llvm/lib/Support/PrettyStackTrace.cpp b/llvm/lib/Support/PrettyStackTrace.cpp index bfb238cc853919..30a12f65966aeb 100644 --- a/llvm/lib/Support/PrettyStackTrace.cpp +++ b/llvm/lib/Support/PrettyStackTrace.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp index 615e48a5df7e87..f065adadc62bb0 100644 --- a/llvm/lib/Support/Regex.cpp +++ b/llvm/lib/Support/Regex.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include #include // Important this comes last because it defines "_REGEX_H_". At least on diff --git a/llvm/lib/Support/StringPool.cpp b/llvm/lib/Support/StringPool.cpp index 82351017b8ccac..2746444453897d 100644 --- a/llvm/lib/Support/StringPool.cpp +++ b/llvm/lib/Support/StringPool.cpp @@ -12,6 +12,7 @@ #include "llvm/Support/StringPool.h" #include "llvm/ADT/StringRef.h" +#include using namespace llvm; diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp index e09abd24eb5b11..79f31efefb787e 100644 --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/TargetParser.h" +#include #include using namespace llvm; diff --git a/llvm/lib/Support/VersionTuple.cpp b/llvm/lib/Support/VersionTuple.cpp index 60b59424fbb49a..e8265c0d41bb1e 100644 --- a/llvm/lib/Support/VersionTuple.cpp +++ b/llvm/lib/Support/VersionTuple.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/VersionTuple.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; diff --git a/llvm/lib/TableGen/TableGenBackend.cpp b/llvm/lib/TableGen/TableGenBackend.cpp index e11b28e8cff99c..252f126d2d00ff 100644 --- a/llvm/lib/TableGen/TableGenBackend.cpp +++ b/llvm/lib/TableGen/TableGenBackend.cpp @@ -13,6 +13,7 @@ #include "llvm/TableGen/TableGenBackend.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 472b5e628e19e6..436b26c36d2d9a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6158,6 +6158,14 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, if (FuncInfo->getLOHRelated().count(&MI)) return outliner::InstrType::Illegal; + // We can only outline these if we will tail call the outlined function, or + // fix up the CFI offsets. For the sake of safety, don't outline CFI + // instructions. + // + // FIXME: If the proper fixups are implemented, this should be possible. + if (MI.isCFIInstruction()) + return outliner::InstrType::Illegal; + // Don't allow debug values to impact outlining type. if (MI.isDebugInstr() || MI.isIndirectDebugValue()) return outliner::InstrType::Invisible; diff --git a/llvm/lib/Target/AArch64/AArch64StackOffset.h b/llvm/lib/Target/AArch64/AArch64StackOffset.h index f95b5dc5246e91..6fa1c744f77e23 100644 --- a/llvm/lib/Target/AArch64/AArch64StackOffset.h +++ b/llvm/lib/Target/AArch64/AArch64StackOffset.h @@ -16,6 +16,7 @@ #include "llvm/Support/MachineValueType.h" #include "llvm/Support/TypeSize.h" +#include namespace llvm { diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index e18f63cce109a8..f36468d56a263c 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -206,10 +206,6 @@ namespace { /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions. SmallVector T2JumpTables; - /// HasFarJump - True if any far jump instruction has been emitted during - /// the branch fix up pass. - bool HasFarJump; - MachineFunction *MF; MachineConstantPool *MCP; const ARMBaseInstrInfo *TII; @@ -270,7 +266,6 @@ namespace { bool fixupImmediateBr(ImmBranch &Br); bool fixupConditionalBr(ImmBranch &Br); bool fixupUnconditionalBr(ImmBranch &Br); - bool undoLRSpillRestore(); bool optimizeThumb2Instructions(); bool optimizeThumb2Branches(); bool reorderThumb2JumpTables(); @@ -363,7 +358,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { isThumb1 = AFI->isThumb1OnlyFunction(); isThumb2 = AFI->isThumb2Function(); - HasFarJump = false; bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB); // Renumber all of the machine basic blocks in the function, guaranteeing that @@ -456,11 +450,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // After a while, this might be made debug-only, but it is not expensive. verify(); - // If LR has been forced spilled and no far jump (i.e. BL) has been issued, - // undo the spill / restore of LR if possible. - if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) - MadeChange |= undoLRSpillRestore(); - // Save the mapping between original and cloned constpool entries. for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) { @@ -1633,7 +1622,6 @@ ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) { BBInfoVector &BBInfo = BBUtils->getBBInfo(); BBInfo[MBB->getNumber()].Size += 2; BBUtils->adjustBBOffsetsAfter(MBB); - HasFarJump = true; ++NumUBrFixed; LLVM_DEBUG(dbgs() << " Changed B to long jump " << *MI); @@ -1735,34 +1723,6 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { return true; } -/// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills -/// LR / restores LR to pc. FIXME: This is done here because it's only possible -/// to do this if tBfar is not used. -bool ARMConstantIslands::undoLRSpillRestore() { - bool MadeChange = false; - for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) { - MachineInstr *MI = PushPopMIs[i]; - // First two operands are predicates. - if (MI->getOpcode() == ARM::tPOP_RET && - MI->getOperand(2).getReg() == ARM::PC && - MI->getNumExplicitOperands() == 3) { - // Create the new insn and copy the predicate from the old. - BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET)) - .add(MI->getOperand(0)) - .add(MI->getOperand(1)); - MI->eraseFromParent(); - MadeChange = true; - } else if (MI->getOpcode() == ARM::tPUSH && - MI->getOperand(2).getReg() == ARM::LR && - MI->getNumExplicitOperands() == 3) { - // Just remove the push. - MI->eraseFromParent(); - MadeChange = true; - } - } - return MadeChange; -} - bool ARMConstantIslands::optimizeThumb2Instructions() { bool MadeChange = false; diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 8f1bd3ce514512..b8434735451c3a 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -1768,8 +1768,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, if (!LRSpilled && AFI->isThumb1OnlyFunction()) { unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII); // Force LR to be spilled if the Thumb function size is > 2048. This enables - // use of BL to implement far jump. If it turns out that it's not needed - // then the branch fix up path will undo it. + // use of BL to implement far jump. if (FnSize >= (1 << 11)) { CanEliminateFrame = false; ForceLRSpill = true; @@ -2120,10 +2119,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } } - if (ForceLRSpill) { + if (ForceLRSpill) SavedRegs.set(ARM::LR); - AFI->setLRIsSpilledForFarJump(true); - } AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); } diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index bb136e92329ba1..7adf52e1598fdc 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -58,10 +58,6 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// emitPrologue. bool RestoreSPFromFP = false; - /// LRSpilledForFarJump - True if the LR register has been for spilled to - /// enable far jump. - bool LRSpilledForFarJump = false; - /// LRSpilled - True if the LR register has been for spilled for /// any reason, so it's legal to emit an ARM::tBfar (i.e. "bl"). bool LRSpilled = false; @@ -162,9 +158,6 @@ class ARMFunctionInfo : public MachineFunctionInfo { bool isLRSpilled() const { return LRSpilled; } void setLRIsSpilled(bool s) { LRSpilled = s; } - bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; } - void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; } - unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; } void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; } diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index f466c5c053ad54..1c6a5046456e66 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -597,8 +597,8 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { // Load low byte. auto MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(CurDstLoReg, RegState::Define) - .addReg(SrcReg, RegState::Define); + .addReg(CurDstLoReg, RegState::Define) + .addReg(SrcReg); // Push low byte onto stack if necessary. if (TmpReg) diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index 880688807702d0..9d14eb9b212a05 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -259,6 +259,8 @@ const char *AVRTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE(ASR); NODE(LSLLOOP); NODE(LSRLOOP); + NODE(ROLLOOP); + NODE(RORLOOP); NODE(ASRLOOP); NODE(BRCOND); NODE(CMP); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 4a53cc489184d4..5ab000df2db08a 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -1565,14 +1565,7 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { if (BitWidth == 64) return Combines[0]; - // It must be i128. I128 is not a legal type, so this part will be - // executed during type legalization. We need to generate code that - // the default expansion can break up into smaller pieces. - SDValue C0 = DAG.getZExtOrTrunc(Combines[0], dl, ResTy); - SDValue C1 = DAG.getNode(ISD::SHL, dl, ResTy, - DAG.getZExtOrTrunc(Combines[1], dl, ResTy), - DAG.getConstant(64, dl, MVT::i32)); - return DAG.getNode(ISD::OR, dl, ResTy, C0, C1); + return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines); } return Op; diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 8c7b8a81889efd..af1451cc470453 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -12,6 +12,7 @@ #include "PPCMCAsmInfo.h" #include "llvm/ADT/Triple.h" +#include using namespace llvm; diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 9eeccc25e1e6a6..0f43ccf630974a 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -315,9 +315,10 @@ void SystemZFrameLowering:: processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo &MFFrame = MF.getFrameInfo(); + bool BackChain = MF.getFunction().hasFnAttribute("backchain"); - if (!usePackedStack(MF)) - // Always create the full incoming register save area. + if (!usePackedStack(MF) || BackChain) + // Create the incoming register save area. getOrCreateFramePointerSaveIndex(MF); // Get the size of our stack frame to be allocated ... diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 51b97d3b6f6a09..739377ed0f952e 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -442,9 +442,9 @@ class SystemZTargetLowering : public TargetLowering { bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const override { - // Using overflow ops for overflow checks only should beneficial on - // SystemZ. - return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); + // Form add and sub with overflow intrinsics regardless of any extra + // users of the math result. + return VT == MVT::i32 || VT == MVT::i64; } const char *getTargetNodeName(unsigned Opcode) const override; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 067748fdb1f873..a97f8e95769d0a 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -373,6 +373,27 @@ bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const { return true; } +/// X86 has certain instructions which enable interrupts exactly one +/// instruction *after* the instruction which stores to SS. Return true if the +/// given instruction has such an interrupt delay slot. +static bool hasInterruptDelaySlot(const MCInst &Inst) { + switch (Inst.getOpcode()) { + case X86::POPSS16: + case X86::POPSS32: + case X86::STI: + return true; + + case X86::MOV16sr: + case X86::MOV32sr: + case X86::MOV64sr: + case X86::MOV16sm: + if (Inst.getOperand(0).getReg() == X86::SS) + return true; + break; + } + return false; +} + /// Check if the instruction operand needs to be aligned. Padding is disabled /// before intruction which may be rewritten by linker(e.g. TLSCALL). bool X86AsmBackend::needAlignInst(const MCInst &Inst) const { @@ -401,7 +422,10 @@ void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS, MCFragment *CF = OS.getCurrentFragment(); bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused; - if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) { + if (hasInterruptDelaySlot(PrevInst)) { + // If this instruction follows an interrupt enabling instruction with a one + // instruction delay, inserting a nop would change behavior. + } else if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) { // Macro fusion actually happens and there is no other fragment inserted // after the previous instruction. NOP can be emitted in PF to align fused // jcc. @@ -442,7 +466,7 @@ void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) { if (!needAlign(OS)) return; // If the branch is emitted into a MCRelaxableFragment, we can determine the - // size of the branch easily in during the process of layout. When the + // size of the branch easily in MCAssembler::relaxBoundaryAlign. When the // branch is fused, the fused branch(macro fusion pair) must be emitted into // two fragments. Or when the branch is unfused, the branch must be emitted // into one fragment. The MCRelaxableFragment naturally marks the end of the diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index ed0c050a2b9759..f705f59b4d8f83 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1826,7 +1826,7 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, // There is nothing we can do here unless the mask is removing some bits. // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. - if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true; + if (AMShiftAmt == 0 || AMShiftAmt > 3) return true; // We also need to ensure that mask is a continuous run of bits. if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true; @@ -1921,7 +1921,7 @@ static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, // There is nothing we can do here unless the mask is removing some bits. // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. - if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true; + if (AMShiftAmt == 0 || AMShiftAmt > 3) return true; MVT VT = N.getSimpleValueType(); SDLoc DL(N); @@ -4845,23 +4845,23 @@ void X86DAGToDAGISel::Select(SDNode *Node) { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); - unsigned Opc, MOpc; + unsigned ROpc, MOpc; bool isSigned = Opcode == ISD::SDIVREM; if (!isSigned) { switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); - case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; - case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; - case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; - case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; + case MVT::i8: ROpc = X86::DIV8r; MOpc = X86::DIV8m; break; + case MVT::i16: ROpc = X86::DIV16r; MOpc = X86::DIV16m; break; + case MVT::i32: ROpc = X86::DIV32r; MOpc = X86::DIV32m; break; + case MVT::i64: ROpc = X86::DIV64r; MOpc = X86::DIV64m; break; } } else { switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); - case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; - case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; - case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; - case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; + case MVT::i8: ROpc = X86::IDIV8r; MOpc = X86::IDIV8m; break; + case MVT::i16: ROpc = X86::IDIV16r; MOpc = X86::IDIV16m; break; + case MVT::i32: ROpc = X86::IDIV32r; MOpc = X86::IDIV32m; break; + case MVT::i64: ROpc = X86::IDIV64r; MOpc = X86::IDIV64m; break; } } @@ -4970,7 +4970,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { CurDAG->setNodeMemRefs(CNode, {cast(N1)->getMemOperand()}); } else { InFlag = - SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0); + SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InFlag), 0); } // Prevent use of AH in a REX instruction by explicitly copying it to diff --git a/llvm/lib/TextAPI/MachO/TextStub.cpp b/llvm/lib/TextAPI/MachO/TextStub.cpp index cdfe7f47ee61a3..5637639b8ef83e 100644 --- a/llvm/lib/TextAPI/MachO/TextStub.cpp +++ b/llvm/lib/TextAPI/MachO/TextStub.cpp @@ -959,7 +959,8 @@ template <> struct MappingTraits { for (auto &sym : CurrentSection.WeakSymbols) File->addSymbol(SymbolKind::GlobalSymbol, sym, - CurrentSection.Targets); + CurrentSection.Targets, SymbolFlags::WeakDefined); + for (auto &sym : CurrentSection.TlvSymbols) File->addSymbol(SymbolKind::GlobalSymbol, sym, CurrentSection.Targets, diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 7c26f156d4c90b..6f38a3123932cd 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -735,9 +735,6 @@ static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL, /// replace the call with. Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, const TypeIdLowering &TIL) { - // Delay lowering if the resolution is currently unknown. - if (TIL.TheKind == TypeTestResolution::Unknown) - return nullptr; if (TIL.TheKind == TypeTestResolution::Unsat) return ConstantInt::getFalse(M.getContext()); @@ -1046,10 +1043,8 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) { TypeIdLowering TIL = importTypeId(TypeIdStr->getString()); Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL); - if (Lowered) { - CI->replaceAllUsesWith(Lowered); - CI->eraseFromParent(); - } + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); } // ThinLTO backend: the function F has a jump table entry; update this module @@ -1172,10 +1167,8 @@ void LowerTypeTestsModule::lowerTypeTestCalls( for (CallInst *CI : TIUI.CallSites) { ++NumTypeTestCallsLowered; Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL); - if (Lowered) { - CI->replaceAllUsesWith(Lowered); - CI->eraseFromParent(); - } + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); } } } diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 5b9cf7296df219..d321aa055a19d5 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -504,7 +504,6 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createBarrierNoopPass()); if (PerformThinLTO) { - MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); // Drop available_externally and unreferenced globals. This is necessary // with ThinLTO in order to avoid leaving undefined references to dead // globals in the object file. @@ -538,11 +537,9 @@ void PassManagerBuilder::populateModulePassManager( // inter-module indirect calls. For that we perform indirect call promotion // earlier in the pass pipeline, here before globalopt. Otherwise imported // available_externally functions look unreferenced and are removed. - if (PerformThinLTO) { + if (PerformThinLTO) MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, !PGOSampleUse.empty())); - MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); - } // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops // as it will change the CFG too much to make the 2nd profile annotation @@ -1063,8 +1060,8 @@ void PassManagerBuilder::populateThinLTOPassManager( PM.add(createVerifierPass()); if (ImportSummary) { - // This pass imports type identifier resolutions for whole-program - // devirtualization and CFI. It must run early because other passes may + // These passes import type identifier resolutions for whole-program + // devirtualization and CFI. They must run early because other passes may // disturb the specific instruction patterns that these passes look for, // creating dependencies on resolutions that may not appear in the summary. // @@ -1112,9 +1109,6 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at // link time if CFI is enabled. The pass does nothing if CFI is disabled. PM.add(createLowerTypeTestsPass(ExportSummary, nullptr)); - // Run a second time to clean up any type tests left behind by WPD for use - // in ICP (which is performed earlier than this in the regular LTO pipeline). - PM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); if (OptLevel != 0) addLateLTOOptimizationPasses(PM); diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 26beb54c205c50..bbc1433a22e895 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -510,9 +510,7 @@ struct DevirtModule { bool areRemarksEnabled(); - void - scanTypeTestUsers(Function *TypeTestFunc, - DenseMap> &TypeIdMap); + void scanTypeTestUsers(Function *TypeTestFunc); void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc); void buildTypeIdentifierMap( @@ -1668,9 +1666,7 @@ bool DevirtModule::areRemarksEnabled() { return false; } -void DevirtModule::scanTypeTestUsers( - Function *TypeTestFunc, - DenseMap> &TypeIdMap) { +void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc) { // Find all virtual calls via a virtual table pointer %p under an assumption // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p // points to a member of the type identifier %md. Group calls by (type ID, @@ -1690,10 +1686,10 @@ void DevirtModule::scanTypeTestUsers( auto &DT = LookupDomTree(*CI->getFunction()); findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); - Metadata *TypeId = - cast(CI->getArgOperand(1))->getMetadata(); // If we found any, add them to CallSlots. if (!Assumes.empty()) { + Metadata *TypeId = + cast(CI->getArgOperand(1))->getMetadata(); Value *Ptr = CI->getArgOperand(0)->stripPointerCasts(); for (DevirtCallSite Call : DevirtCalls) { // Only add this CallSite if we haven't seen it before. The vtable @@ -1706,13 +1702,6 @@ void DevirtModule::scanTypeTestUsers( } } - // If we have any uses on type metadata, keep the type test assumes for - // later analysis. Otherwise remove as they aren't useful, and - // LowerTypeTests will think they are Unsat and lower to False, which - // breaks any uses on assumes. - if (TypeIdMap.count(TypeId)) - continue; - // We no longer need the assumes or the type test. for (auto Assume : Assumes) Assume->eraseFromParent(); @@ -1911,13 +1900,8 @@ bool DevirtModule::run() { (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty())) return false; - // Rebuild type metadata into a map for easy lookup. - std::vector Bits; - DenseMap> TypeIdMap; - buildTypeIdentifierMap(Bits, TypeIdMap); - if (TypeTestFunc && AssumeFunc) - scanTypeTestUsers(TypeTestFunc, TypeIdMap); + scanTypeTestUsers(TypeTestFunc); if (TypeCheckedLoadFunc) scanTypeCheckedLoadUsers(TypeCheckedLoadFunc); @@ -1939,6 +1923,10 @@ bool DevirtModule::run() { return true; } + // Rebuild type metadata into a map for easy lookup. + std::vector Bits; + DenseMap> TypeIdMap; + buildTypeIdentifierMap(Bits, TypeIdMap); if (TypeIdMap.empty()) return true; @@ -1995,18 +1983,14 @@ bool DevirtModule::run() { // function implementation at offset S.first.ByteOffset, and add to // TargetsForSlot. std::vector TargetsForSlot; - WholeProgramDevirtResolution *Res = nullptr; - if (ExportSummary && isa(S.first.TypeID) && - TypeIdMap.count(S.first.TypeID)) - // For any type id used on a global's type metadata, create the type id - // summary resolution regardless of whether we can devirtualize, so that - // lower type tests knows the type id is not Unsat. - Res = &ExportSummary - ->getOrInsertTypeIdSummary( - cast(S.first.TypeID)->getString()) - .WPDRes[S.first.ByteOffset]; if (tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID], S.first.ByteOffset)) { + WholeProgramDevirtResolution *Res = nullptr; + if (ExportSummary && isa(S.first.TypeID)) + Res = &ExportSummary + ->getOrInsertTypeIdSummary( + cast(S.first.TypeID)->getString()) + .WPDRes[S.first.ByteOffset]; if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { DidVirtualConstProp |= @@ -2120,14 +2104,11 @@ void DevirtIndex::run() { std::vector TargetsForSlot; auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID); assert(TidSummary); - // Create the type id summary resolution regardlness of whether we can - // devirtualize, so that lower type tests knows the type id is used on - // a global and not Unsat. - WholeProgramDevirtResolution *Res = - &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) - .WPDRes[S.first.ByteOffset]; if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary, S.first.ByteOffset)) { + WholeProgramDevirtResolution *Res = + &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) + .WPDRes[S.first.ByteOffset]; if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res, DevirtTargets)) diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index d429fc24dc3343..ee436a4b319374 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -988,12 +988,12 @@ namespace { bool isHoistableAndSinkableInst(Instruction &I) { // Only these instructions are hoistable/sinkable. return (isa(I) || isa(I) || isa(I) || - isa(I) || isa(I) || - isa(I) || isa(I) || - isa(I) || isa(I) || isa(I) || + isa(I) || isa(I) || isa(I) || + isa(I) || isa(I) || + isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || - isa(I)); + isa(I) || isa(I)); } /// Return true if all of the alias sets within this AST are known not to /// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop. diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 6f060800f760d0..ee4d973b2326e1 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3530,9 +3530,6 @@ static bool mayUsePostIncMode(const TargetTransformInfo &TTI, const SCEV *LoopStep = AR->getStepRecurrence(SE); if (!isa(LoopStep)) return false; - if (LU.AccessTy.getType()->getScalarSizeInBits() != - LoopStep->getType()->getScalarSizeInBits()) - return false; // Check if a post-indexed load/store can be used. if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) || TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) { diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 15ab2abbc1a849..a1df49300b9062 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -154,6 +154,10 @@ static cl::opt cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low.")); +static cl::opt UnrollAllowLoopNestsPeeling( + "unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, + cl::desc("Allows loop nests to be peeled.")); + static cl::opt UnrollUnrollRemainder( "unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled.")); @@ -215,6 +219,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.Force = false; UP.UpperBound = false; UP.AllowPeeling = true; + UP.AllowLoopNestsPeeling = false; UP.UnrollAndJam = false; UP.PeelProfiledIterations = true; UP.UnrollAndJamInnerLoopThreshold = 60; @@ -255,6 +260,8 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.UpperBound = false; if (UnrollAllowPeeling.getNumOccurrences() > 0) UP.AllowPeeling = UnrollAllowPeeling; + if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0) + UP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling; if (UnrollUnrollRemainder.getNumOccurrences() > 0) UP.UnrollRemainder = UnrollUnrollRemainder; diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index 6889facbd05040..5a8127e465e8c4 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -903,30 +903,6 @@ bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val, return true; } -/// Recursively clone the specified loop and all of its children, -/// mapping the blocks with the specified map. -static Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, - LPPassManager *LPM) { - Loop &New = *LI->AllocateLoop(); - if (PL) - PL->addChildLoop(&New); - else - LI->addTopLevelLoop(&New); - LPM->addLoop(New); - - // Add all of the blocks in L to the new loop. - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) - if (LI->getLoopFor(*I) == L) - New.addBasicBlockToLoop(cast(VM[*I]), *LI); - - // Add all of the subloops to the new loop. - for (Loop *I : *L) - cloneLoop(I, &New, VM, LI, LPM); - - return &New; -} - /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, /// otherwise branch to FalseDest. Insert the code immediately before OldBranch /// and remove (but not erase!) it from the function. diff --git a/llvm/lib/Transforms/Utils/KnowledgeRetention.cpp b/llvm/lib/Transforms/Utils/KnowledgeRetention.cpp index f3c9ee42b77f9d..f2f87f9200ed1a 100644 --- a/llvm/lib/Transforms/Utils/KnowledgeRetention.cpp +++ b/llvm/lib/Transforms/Utils/KnowledgeRetention.cpp @@ -171,6 +171,18 @@ CallInst *llvm::BuildAssumeFromInst(const Instruction *I, Module *M) { return Builder.build(); } +static bool BundleHasArguement(const CallBase::BundleOpInfo &BOI, + unsigned Idx) { + return BOI.End - BOI.Begin > Idx; +} + +static Value *getValueFromBundleOpInfo(IntrinsicInst &Assume, + const CallBase::BundleOpInfo &BOI, + unsigned Idx) { + assert(BundleHasArguement(BOI, Idx) && "index out of range"); + return (Assume.op_begin() + BOI.Begin + Idx)->get(); +} + #ifndef NDEBUG static bool isExistingAttribute(StringRef Name) { @@ -219,12 +231,6 @@ bool llvm::hasAttributeInAssume(CallInst &AssumeCI, Value *IsOn, return LHS < BOI.Tag->getKey(); })); - auto getValueFromBundleOpInfo = [&Assume](const CallBase::BundleOpInfo &BOI, - unsigned Idx) { - assert(BOI.End - BOI.Begin > Idx && "index out of range"); - return (Assume.op_begin() + BOI.Begin + Idx)->get(); - }; - if (Lookup == Assume.bundle_op_info_end() || Lookup->Tag->getKey() != AttrName) return false; @@ -235,7 +241,7 @@ bool llvm::hasAttributeInAssume(CallInst &AssumeCI, Value *IsOn, if (Lookup == Assume.bundle_op_info_end() || Lookup->Tag->getKey() != AttrName) return false; - if (getValueFromBundleOpInfo(*Lookup, BOIE_WasOn) == IsOn) + if (getValueFromBundleOpInfo(Assume, *Lookup, BOIE_WasOn) == IsOn) break; if (AQR == AssumeQuery::Highest && Lookup == Assume.bundle_op_info_begin()) @@ -247,12 +253,41 @@ bool llvm::hasAttributeInAssume(CallInst &AssumeCI, Value *IsOn, if (Lookup->End - Lookup->Begin < BOIE_Argument) return true; if (ArgVal) - *ArgVal = - cast(getValueFromBundleOpInfo(*Lookup, BOIE_Argument)) - ->getZExtValue(); + *ArgVal = cast( + getValueFromBundleOpInfo(Assume, *Lookup, BOIE_Argument)) + ->getZExtValue(); return true; } +void llvm::fillMapFromAssume(CallInst &AssumeCI, RetainedKnowledgeMap &Result) { + IntrinsicInst &Assume = cast(AssumeCI); + assert(Assume.getIntrinsicID() == Intrinsic::assume && + "this function is intended to be used on llvm.assume"); + for (auto &Bundles : Assume.bundle_op_infos()) { + std::pair Key{ + nullptr, Attribute::getAttrKindFromName(Bundles.Tag->getKey())}; + if (BundleHasArguement(Bundles, BOIE_WasOn)) + Key.first = getValueFromBundleOpInfo(Assume, Bundles, BOIE_WasOn); + + if (Key.first == nullptr && Key.second == Attribute::None) + continue; + if (!BundleHasArguement(Bundles, BOIE_Argument)) { + Result[Key] = {0, 0}; + continue; + } + unsigned Val = cast( + getValueFromBundleOpInfo(Assume, Bundles, BOIE_Argument)) + ->getZExtValue(); + auto Lookup = Result.find(Key); + if (Lookup == Result.end()) { + Result[Key] = {Val, Val}; + continue; + } + Lookup->second.Min = std::min(Val, Lookup->second.Min); + Lookup->second.Max = std::max(Val, Lookup->second.Max); + } +} + PreservedAnalyses AssumeBuilderPass::run(Function &F, FunctionAnalysisManager &AM) { for (Instruction &I : instructions(F)) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 0fb5ac8ebcc321..9cb73230086c2a 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -75,6 +75,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -1230,24 +1231,6 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, /// Dbg Intrinsic utilities /// -/// See if there is a dbg.value intrinsic for DIVar before I. -static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, - Instruction *I) { - // Since we can't guarantee that the original dbg.declare instrinsic - // is removed by LowerDbgDeclare(), we need to make sure that we are - // not inserting the same dbg.value intrinsic over and over. - BasicBlock::InstListType::iterator PrevI(I); - if (PrevI != I->getParent()->getInstList().begin()) { - --PrevI; - if (DbgValueInst *DVI = dyn_cast(PrevI)) - if (DVI->getValue() == I->getOperand(0) && - DVI->getVariable() == DIVar && - DVI->getExpression() == DIExpr) - return true; - } - return false; -} - /// See if there is a dbg.value intrinsic for DIVar for the PHI node. static bool PhiHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, @@ -1324,13 +1307,11 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, // know which part) we insert an dbg.value instrinsic to indicate that we // know nothing about the variable's content. DV = UndefValue::get(DV->getType()); - if (!LdStHasDebugValue(DIVar, DIExpr, SI)) - Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI); + Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI); return; } - if (!LdStHasDebugValue(DIVar, DIExpr, SI)) - Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI); + Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI); } /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value @@ -1341,9 +1322,6 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, auto *DIExpr = DII->getExpression(); assert(DIVar && "Missing variable"); - if (LdStHasDebugValue(DIVar, DIExpr, LI)) - return; - if (!valueCoversEntireFragment(LI->getType(), DII)) { // FIXME: If only referring to a part of the variable described by the // dbg.declare, then we want to insert a dbg.value for the corresponding @@ -1410,6 +1388,7 @@ static bool isStructure(AllocaInst *AI) { /// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set /// of llvm.dbg.value intrinsics. bool llvm::LowerDbgDeclare(Function &F) { + bool Changed = false; DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false); SmallVector Dbgs; for (auto &FI : F) @@ -1418,7 +1397,7 @@ bool llvm::LowerDbgDeclare(Function &F) { Dbgs.push_back(DDI); if (Dbgs.empty()) - return false; + return Changed; for (auto &I : Dbgs) { DbgDeclareInst *DDI = I; @@ -1471,8 +1450,14 @@ bool llvm::LowerDbgDeclare(Function &F) { } } DDI->eraseFromParent(); + Changed = true; } - return true; + + if (Changed) + for (BasicBlock &BB : F) + RemoveRedundantDbgInstrs(&BB); + + return Changed; } /// Propagate dbg.value intrinsics through the newly inserted PHIs. diff --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp index 7a168ff6f32b0d..afc4bbd7227d1c 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -289,8 +289,10 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (!canPeel(L)) return; - // Only try to peel innermost loops. - if (!L->empty()) + // Only try to peel innermost loops by default. + // The constraint can be relaxed by the target in TTI.getUnrollingPreferences + // or by the flag -unroll-allow-loop-nests-peeling. + if (!UP.AllowLoopNestsPeeling && !L->empty()) return; // If the user provided a peel count, use that. @@ -508,7 +510,10 @@ static void cloneLoopBlocks( BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F); NewBlocks.push_back(NewBB); - if (ParentLoop) + // If an original block is an immediate child of the loop L, its copy + // is a child of a ParentLoop after peeling. If a block is a child of + // a nested loop, it is handled in the cloneLoop() call below. + if (ParentLoop && LI->getLoopFor(*BB) == L) ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; @@ -525,6 +530,12 @@ static void cloneLoopBlocks( } } + // Recursively create the new Loop objects for nested loops, if any, + // to preserve LoopInfo. + for (Loop *ChildLoop : *L) { + cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr); + } + // Hook-up the control flow for the newly inserted blocks. // The new header is hooked up directly to the "top", which is either // the original loop preheader (for the first iteration) or the previous diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 69020219d9d41d..b86a67faf64658 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -46,6 +46,11 @@ using namespace llvm; using namespace llvm::PatternMatch; +static cl::opt ForceReductionIntrinsic( + "force-reduction-intrinsics", cl::Hidden, + cl::desc("Force creating reduction intrinsics for testing."), + cl::init(false)); + #define DEBUG_TYPE "loop-utils" static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; @@ -1015,7 +1020,8 @@ Value *llvm::createSimpleTargetReduction( llvm_unreachable("Unhandled opcode"); break; } - if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) + if (ForceReductionIntrinsic || + TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) return BuildFunc(); return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); } @@ -1499,3 +1505,27 @@ void llvm::appendLoopsToWorklist(LoopInfo &LI, SmallPriorityWorklist &Worklist) { appendReversedLoopsToWorklist(LI, Worklist); } + +Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, + LoopInfo *LI, LPPassManager *LPM) { + Loop &New = *LI->AllocateLoop(); + if (PL) + PL->addChildLoop(&New); + else + LI->addTopLevelLoop(&New); + + if (LPM) + LPM->addLoop(New); + + // Add all of the blocks in L to the new loop. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + if (LI->getLoopFor(*I) == L) + New.addBasicBlockToLoop(cast(VM[*I]), *LI); + + // Add all of the subloops to the new loop. + for (Loop *I : *L) + cloneLoop(I, &New, VM, LI, LPM); + + return &New; +} diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d3da26ece05125..283cc9cf87146d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7193,8 +7193,9 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( // --------------------------------------------------------------------------- // Create a dummy pre-entry VPBasicBlock to start building the VPlan. + auto Plan = std::make_unique(); VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); - auto Plan = std::make_unique(VPBB); + Plan->setEntry(VPBB); // Represent values that will have defs inside VPlan. for (Value *V : NeedDef) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index f1c708720ccf4b..02150f8c2fb2f2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -56,6 +56,32 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) { return OS; } +// Get the top-most entry block of \p Start. This is the entry block of the +// containing VPlan. This function is templated to support both const and non-const blocks +template static T *getPlanEntry(T *Start) { + T *Next = Start; + T *Current = Start; + while ((Next = Next->getParent())) + Current = Next; + + SmallSetVector WorkList; + WorkList.insert(Current); + + for (unsigned i = 0; i < WorkList.size(); i++) { + T *Current = WorkList[i]; + if (Current->getNumPredecessors() == 0) + return Current; + auto &Predecessors = Current->getPredecessors(); + WorkList.insert(Predecessors.begin(), Predecessors.end()); + } + + llvm_unreachable("VPlan without any entry node without predecessors"); +} + +VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; } + +const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; } + /// \return the VPBasicBlock that is the entry of Block, possibly indirectly. const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const { const VPBlockBase *Block = this; @@ -71,6 +97,12 @@ VPBasicBlock *VPBlockBase::getEntryBasicBlock() { return cast(Block); } +void VPBlockBase::setPlan(VPlan *ParentPlan) { + assert(ParentPlan->getEntry() == this && + "Can only set plan on its entry block."); + Plan = ParentPlan; +} + /// \return the VPBasicBlock that is the exit of Block, possibly indirectly. const VPBasicBlock *VPBlockBase::getExitBasicBlock() const { const VPBlockBase *Block = this; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c65abc3639d731..914dfe603c5af7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -365,6 +365,10 @@ class VPBlockBase { /// Current block predicate - null if the block does not need a predicate. VPValue *Predicate = nullptr; + /// VPlan containing the block. Can only be set on the entry block of the + /// plan. + VPlan *Plan = nullptr; + /// Add \p Successor as the last successor to this block. void appendSuccessor(VPBlockBase *Successor) { assert(Successor && "Cannot add nullptr successor!"); @@ -418,6 +422,14 @@ class VPBlockBase { VPRegionBlock *getParent() { return Parent; } const VPRegionBlock *getParent() const { return Parent; } + /// \return A pointer to the plan containing the current block. + VPlan *getPlan(); + const VPlan *getPlan() const; + + /// Sets the pointer of the plan containing the block. The block must be the + /// entry block into the VPlan. + void setPlan(VPlan *ParentPlan); + void setParent(VPRegionBlock *P) { Parent = P; } /// \return the VPBasicBlock that is the entry of this VPBlockBase, @@ -1402,7 +1414,11 @@ class VPlan { VPBlockBase *getEntry() { return Entry; } const VPBlockBase *getEntry() const { return Entry; } - VPBlockBase *setEntry(VPBlockBase *Block) { return Entry = Block; } + VPBlockBase *setEntry(VPBlockBase *Block) { + Entry = Block; + Block->setPlan(this); + return Entry; + } /// The backedge taken count of the original loop. VPValue *getOrCreateBackedgeTakenCount() { diff --git a/llvm/test/Analysis/LoopNestAnalysis/imperfectnest.ll b/llvm/test/Analysis/LoopNestAnalysis/imperfectnest.ll new file mode 100644 index 00000000000000..4c8066ec58775e --- /dev/null +++ b/llvm/test/Analysis/LoopNestAnalysis/imperfectnest.ll @@ -0,0 +1,493 @@ +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +; Test an imperfect 2-dim loop nest of the form: +; for (int i = 0; i < nx; ++i) { +; x[i] = i; +; for (int j = 0; j < ny; ++j) +; y[j][i] = x[i] + j; +; } + +define void @imperf_nest_1(i32 signext %nx, i32 signext %ny) { +; CHECK-LABEL: IsPerfect=false, Depth=2, OutermostLoop: imperf_nest_1_loop_i, Loops: ( imperf_nest_1_loop_i imperf_nest_1_loop_j ) +entry: + %0 = zext i32 %ny to i64 + %1 = zext i32 %nx to i64 + %2 = mul nuw i64 %0, %1 + %vla = alloca double, i64 %2, align 8 + %3 = zext i32 %ny to i64 + %vla1 = alloca double, i64 %3, align 8 + br label %imperf_nest_1_loop_i + +imperf_nest_1_loop_i: + %i2.0 = phi i32 [ 0, %entry ], [ %inc16, %for.inc15 ] + %cmp = icmp slt i32 %i2.0, %nx + br i1 %cmp, label %for.body, label %for.end17 + +for.body: + %conv = sitofp i32 %i2.0 to double + %idxprom = sext i32 %i2.0 to i64 + %arrayidx = getelementptr inbounds double, double* %vla1, i64 %idxprom + store double %conv, double* %arrayidx, align 8 + br label %imperf_nest_1_loop_j + +imperf_nest_1_loop_j: + %j3.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %cmp5 = icmp slt i32 %j3.0, %ny + br i1 %cmp5, label %for.body7, label %for.end + +for.body7: + %idxprom8 = sext i32 %i2.0 to i64 + %arrayidx9 = getelementptr inbounds double, double* %vla1, i64 %idxprom8 + %4 = load double, double* %arrayidx9, align 8 + %conv10 = sitofp i32 %j3.0 to double + %add = fadd double %4, %conv10 + %idxprom11 = sext i32 %j3.0 to i64 + %5 = mul nsw i64 %idxprom11, %1 + %arrayidx12 = getelementptr inbounds double, double* %vla, i64 %5 + %idxprom13 = sext i32 %i2.0 to i64 + %arrayidx14 = getelementptr inbounds double, double* %arrayidx12, i64 %idxprom13 + store double %add, double* %arrayidx14, align 8 + br label %for.inc + +for.inc: + %inc = add nsw i32 %j3.0, 1 + br label %imperf_nest_1_loop_j + +for.end: + br label %for.inc15 + +for.inc15: + %inc16 = add nsw i32 %i2.0, 1 + br label %imperf_nest_1_loop_i + +for.end17: + ret void +} + +; Test an imperfect 2-dim loop nest of the form: +; for (int i = 0; i < nx; ++i) { +; for (int j = 0; j < ny; ++j) +; y[j][i] = x[i] + j; +; y[0][i] += i; +; } + +define void @imperf_nest_2(i32 signext %nx, i32 signext %ny) { +; CHECK-LABEL: IsPerfect=false, Depth=2, OutermostLoop: imperf_nest_2_loop_i, Loops: ( imperf_nest_2_loop_i imperf_nest_2_loop_j ) +entry: + %0 = zext i32 %ny to i64 + %1 = zext i32 %nx to i64 + %2 = mul nuw i64 %0, %1 + %vla = alloca double, i64 %2, align 8 + %3 = zext i32 %ny to i64 + %vla1 = alloca double, i64 %3, align 8 + br label %imperf_nest_2_loop_i + +imperf_nest_2_loop_i: + %i2.0 = phi i32 [ 0, %entry ], [ %inc17, %for.inc16 ] + %cmp = icmp slt i32 %i2.0, %nx + br i1 %cmp, label %for.body, label %for.end18 + +for.body: + br label %imperf_nest_2_loop_j + +imperf_nest_2_loop_j: + %j3.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %cmp5 = icmp slt i32 %j3.0, %ny + br i1 %cmp5, label %for.body6, label %for.end + +for.body6: + %idxprom = sext i32 %i2.0 to i64 + %arrayidx = getelementptr inbounds double, double* %vla1, i64 %idxprom + %4 = load double, double* %arrayidx, align 8 + %conv = sitofp i32 %j3.0 to double + %add = fadd double %4, %conv + %idxprom7 = sext i32 %j3.0 to i64 + %5 = mul nsw i64 %idxprom7, %1 + %arrayidx8 = getelementptr inbounds double, double* %vla, i64 %5 + %idxprom9 = sext i32 %i2.0 to i64 + %arrayidx10 = getelementptr inbounds double, double* %arrayidx8, i64 %idxprom9 + store double %add, double* %arrayidx10, align 8 + br label %for.inc + +for.inc: + %inc = add nsw i32 %j3.0, 1 + br label %imperf_nest_2_loop_j + +for.end: + %conv11 = sitofp i32 %i2.0 to double + %6 = mul nsw i64 0, %1 + %arrayidx12 = getelementptr inbounds double, double* %vla, i64 %6 + %idxprom13 = sext i32 %i2.0 to i64 + %arrayidx14 = getelementptr inbounds double, double* %arrayidx12, i64 %idxprom13 + %7 = load double, double* %arrayidx14, align 8 + %add15 = fadd double %7, %conv11 + store double %add15, double* %arrayidx14, align 8 + br label %for.inc16 + +for.inc16: + %inc17 = add nsw i32 %i2.0, 1 + br label %imperf_nest_2_loop_i + +for.end18: + ret void +} + +; Test an imperfect 2-dim loop nest of the form: +; for (i = 0; i < nx; ++i) { +; for (j = 0; j < ny-nk; ++j) +; y[i][j] = x[i] + j; +; for (j = ny-nk; j < ny; ++j) +; y[i][j] = x[i] - j; +; } + +define void @imperf_nest_3(i32 signext %nx, i32 signext %ny, i32 signext %nk) { +; CHECK-LABEL: IsPerfect=false, Depth=2, OutermostLoop: imperf_nest_3_loop_i, Loops: ( imperf_nest_3_loop_i imperf_nest_3_loop_j imperf_nest_3_loop_k ) +entry: + %0 = zext i32 %nx to i64 + %1 = zext i32 %ny to i64 + %2 = mul nuw i64 %0, %1 + %vla = alloca double, i64 %2, align 8 + %3 = zext i32 %ny to i64 + %vla1 = alloca double, i64 %3, align 8 + br label %imperf_nest_3_loop_i + +imperf_nest_3_loop_i: ; preds = %for.inc25, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc26, %for.inc25 ] + %cmp = icmp slt i32 %i.0, %nx + br i1 %cmp, label %for.body, label %for.end27 + +for.body: ; preds = %for.cond + br label %imperf_nest_3_loop_j + +imperf_nest_3_loop_j: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %sub = sub nsw i32 %ny, %nk + %cmp3 = icmp slt i32 %j.0, %sub + br i1 %cmp3, label %for.body4, label %for.end + +for.body4: ; preds = %imperf_nest_3_loop_j + %idxprom = sext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds double, double* %vla1, i64 %idxprom + %4 = load double, double* %arrayidx, align 8 + %conv = sitofp i32 %j.0 to double + %add = fadd double %4, %conv + %idxprom5 = sext i32 %i.0 to i64 + %5 = mul nsw i64 %idxprom5, %1 + %arrayidx6 = getelementptr inbounds double, double* %vla, i64 %5 + %idxprom7 = sext i32 %j.0 to i64 + %arrayidx8 = getelementptr inbounds double, double* %arrayidx6, i64 %idxprom7 + store double %add, double* %arrayidx8, align 8 + br label %for.inc + +for.inc: ; preds = %for.body4 + %inc = add nsw i32 %j.0, 1 + br label %imperf_nest_3_loop_j + +for.end: ; preds = %imperf_nest_3_loop_j + %sub9 = sub nsw i32 %ny, %nk + br label %imperf_nest_3_loop_k + +imperf_nest_3_loop_k: ; preds = %for.inc22, %for.end + %j.1 = phi i32 [ %sub9, %for.end ], [ %inc23, %for.inc22 ] + %cmp11 = icmp slt i32 %j.1, %ny + br i1 %cmp11, label %for.body13, label %for.end24 + +for.body13: ; preds = %imperf_nest_3_loop_k + %idxprom14 = sext i32 %i.0 to i64 + %arrayidx15 = getelementptr inbounds double, double* %vla1, i64 %idxprom14 + %6 = load double, double* %arrayidx15, align 8 + %conv16 = sitofp i32 %j.1 to double + %sub17 = fsub double %6, %conv16 + %idxprom18 = sext i32 %i.0 to i64 + %7 = mul nsw i64 %idxprom18, %1 + %arrayidx19 = getelementptr inbounds double, double* %vla, i64 %7 + %idxprom20 = sext i32 %j.1 to i64 + %arrayidx21 = getelementptr inbounds double, double* %arrayidx19, i64 %idxprom20 + store double %sub17, double* %arrayidx21, align 8 + br label %for.inc22 + +for.inc22: ; preds = %for.body13 + %inc23 = add nsw i32 %j.1, 1 + br label %imperf_nest_3_loop_k + +for.end24: ; preds = %imperf_nest_3_loop_k + br label %for.inc25 + +for.inc25: ; preds = %for.end24 + %inc26 = add nsw i32 %i.0, 1 + br label %imperf_nest_3_loop_i + +for.end27: ; preds = %for.cond + ret void +} + +; Test an imperfect loop nest of the form: +; for (i = 0; i < nx; ++i) { +; for (j = 0; j < ny-nk; ++j) +; for (k = 0; k < nk; ++k) +; y[i][j][k] = x[i+j] + k; +; for (j = ny-nk; j < ny; ++j) +; y[i][j][0] = x[i] - j; +; } + +define void @imperf_nest_4(i32 signext %nx, i32 signext %ny, i32 signext %nk) { +; CHECK-LABEL: IsPerfect=false, Depth=2, OutermostLoop: imperf_nest_4_loop_j, Loops: ( imperf_nest_4_loop_j imperf_nest_4_loop_k ) +; CHECK-LABEL: IsPerfect=false, Depth=3, OutermostLoop: imperf_nest_4_loop_i, Loops: ( imperf_nest_4_loop_i imperf_nest_4_loop_j imperf_nest_4_loop_j2 imperf_nest_4_loop_k ) +entry: + %0 = zext i32 %nx to i64 + %1 = zext i32 %ny to i64 + %2 = zext i32 %nk to i64 + %3 = mul nuw i64 %0, %1 + %4 = mul nuw i64 %3, %2 + %vla = alloca double, i64 %4, align 8 + %5 = zext i32 %ny to i64 + %vla1 = alloca double, i64 %5, align 8 + %cmp5 = icmp slt i32 0, %nx + br i1 %cmp5, label %imperf_nest_4_loop_i.lr.ph, label %for.end37 + +imperf_nest_4_loop_i.lr.ph: + br label %imperf_nest_4_loop_i + +imperf_nest_4_loop_i: + %i.0 = phi i32 [ 0, %imperf_nest_4_loop_i.lr.ph ], [ %inc36, %for.inc35 ] + %sub2 = sub nsw i32 %ny, %nk + %cmp33 = icmp slt i32 0, %sub2 + br i1 %cmp33, label %imperf_nest_4_loop_j.lr.ph, label %for.end17 + +imperf_nest_4_loop_j.lr.ph: + br label %imperf_nest_4_loop_j + +imperf_nest_4_loop_j: + %j.0 = phi i32 [ 0, %imperf_nest_4_loop_j.lr.ph ], [ %inc16, %for.inc15 ] + %cmp61 = icmp slt i32 0, %nk + br i1 %cmp61, label %imperf_nest_4_loop_k.lr.ph, label %for.end + +imperf_nest_4_loop_k.lr.ph: + br label %imperf_nest_4_loop_k + +imperf_nest_4_loop_k: + %k.0 = phi i32 [ 0, %imperf_nest_4_loop_k.lr.ph ], [ %inc, %for.inc ] + %add = add nsw i32 %i.0, %j.0 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds double, double* %vla1, i64 %idxprom + %6 = load double, double* %arrayidx, align 8 + %conv = sitofp i32 %k.0 to double + %add8 = fadd double %6, %conv + %idxprom9 = sext i32 %i.0 to i64 + %7 = mul nuw i64 %1, %2 + %8 = mul nsw i64 %idxprom9, %7 + %arrayidx10 = getelementptr inbounds double, double* %vla, i64 %8 + %idxprom11 = sext i32 %j.0 to i64 + %9 = mul nsw i64 %idxprom11, %2 + %arrayidx12 = getelementptr inbounds double, double* %arrayidx10, i64 %9 + %idxprom13 = sext i32 %k.0 to i64 + %arrayidx14 = getelementptr inbounds double, double* %arrayidx12, i64 %idxprom13 + store double %add8, double* %arrayidx14, align 8 + br label %for.inc + +for.inc: + %inc = add nsw i32 %k.0, 1 + %cmp6 = icmp slt i32 %inc, %nk + br i1 %cmp6, label %imperf_nest_4_loop_k, label %for.cond5.for.end_crit_edge + +for.cond5.for.end_crit_edge: + br label %for.end + +for.end: + br label %for.inc15 + +for.inc15: + %inc16 = add nsw i32 %j.0, 1 + %sub = sub nsw i32 %ny, %nk + %cmp3 = icmp slt i32 %inc16, %sub + br i1 %cmp3, label %imperf_nest_4_loop_j, label %for.cond2.for.end17_crit_edge + +for.cond2.for.end17_crit_edge: + br label %for.end17 + +for.end17: + %sub18 = sub nsw i32 %ny, %nk + %cmp204 = icmp slt i32 %sub18, %ny + br i1 %cmp204, label %imperf_nest_4_loop_j2.lr.ph, label %for.end34 + +imperf_nest_4_loop_j2.lr.ph: + br label %imperf_nest_4_loop_j2 + +imperf_nest_4_loop_j2: + %j.1 = phi i32 [ %sub18, %imperf_nest_4_loop_j2.lr.ph ], [ %inc33, %for.inc32 ] + %idxprom23 = sext i32 %i.0 to i64 + %arrayidx24 = getelementptr inbounds double, double* %vla1, i64 %idxprom23 + %10 = load double, double* %arrayidx24, align 8 + %conv25 = sitofp i32 %j.1 to double + %sub26 = fsub double %10, %conv25 + %idxprom27 = sext i32 %i.0 to i64 + %idxprom29 = sext i32 %j.1 to i64 + %11 = mul nsw i64 %idxprom29, %2 + %12 = mul nuw i64 %1, %2 + %13 = mul nsw i64 %idxprom27, %12 + %arrayidx28 = getelementptr inbounds double, double* %vla, i64 %13 + %arrayidx30 = getelementptr inbounds double, double* %arrayidx28, i64 %11 + %arrayidx31 = getelementptr inbounds double, double* %arrayidx30, i64 0 + store double %sub26, double* %arrayidx31, align 8 + br label %for.inc32 + +for.inc32: + %inc33 = add nsw i32 %j.1, 1 + %cmp20 = icmp slt i32 %inc33, %ny + br i1 %cmp20, label %imperf_nest_4_loop_j2, label %for.cond19.for.end34_crit_edge + +for.cond19.for.end34_crit_edge: + br label %for.end34 + +for.end34: + br label %for.inc35 + +for.inc35: + %inc36 = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %inc36, %nx + br i1 %cmp, label %imperf_nest_4_loop_i, label %for.cond.for.end37_crit_edge + +for.cond.for.end37_crit_edge: + br label %for.end37 + +for.end37: + ret void +} + +; Test an imperfect loop nest of the form: +; for (int i = 0; i < nx; ++i) +; if (i > 5) { +; for (int j = 0; j < ny; ++j) +; y[j][i] = x[i][j] + j; +; } + +define void @imperf_nest_5(i32** %y, i32** %x, i32 signext %nx, i32 signext %ny) { +; CHECK-LABEL: IsPerfect=false, Depth=2, OutermostLoop: imperf_nest_5_loop_i, Loops: ( imperf_nest_5_loop_i imperf_nest_5_loop_j ) +entry: + %cmp2 = icmp slt i32 0, %nx + br i1 %cmp2, label %imperf_nest_5_loop_i.lr.ph, label %for.end13 + +imperf_nest_5_loop_i.lr.ph: + br label %imperf_nest_5_loop_i + +imperf_nest_5_loop_i: + %i.0 = phi i32 [ 0, %imperf_nest_5_loop_i.lr.ph ], [ %inc12, %for.inc11 ] + %cmp1 = icmp sgt i32 %i.0, 5 + br i1 %cmp1, label %if.then, label %if.end + +if.then: + %cmp31 = icmp slt i32 0, %ny + br i1 %cmp31, label %imperf_nest_5_loop_j.lr.ph, label %for.end + +imperf_nest_5_loop_j.lr.ph: + br label %imperf_nest_5_loop_j + +imperf_nest_5_loop_j: + %j.0 = phi i32 [ 0, %imperf_nest_5_loop_j.lr.ph ], [ %inc, %for.inc ] + %idxprom = sext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %idxprom + %0 = load i32*, i32** %arrayidx, align 8 + %idxprom5 = sext i32 %j.0 to i64 + %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %idxprom5 + %1 = load i32, i32* %arrayidx6, align 4 + %add = add nsw i32 %1, %j.0 + %idxprom7 = sext i32 %j.0 to i64 + %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %idxprom7 + %2 = load i32*, i32** %arrayidx8, align 8 + %idxprom9 = sext i32 %i.0 to i64 + %arrayidx10 = getelementptr inbounds i32, i32* %2, i64 %idxprom9 + store i32 %add, i32* %arrayidx10, align 4 + br label %for.inc + +for.inc: + %inc = add nsw i32 %j.0, 1 + %cmp3 = icmp slt i32 %inc, %ny + br i1 %cmp3, label %imperf_nest_5_loop_j, label %for.cond2.for.end_crit_edge + +for.cond2.for.end_crit_edge: + br label %for.end + +for.end: + br label %if.end + +if.end: + br label %for.inc11 + +for.inc11: + %inc12 = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %inc12, %nx + br i1 %cmp, label %imperf_nest_5_loop_i, label %for.cond.for.end13_crit_edge + +for.cond.for.end13_crit_edge: + br label %for.end13 + +for.end13: + ret void +} + +; Test an imperfect loop nest of the form: +; for (int i = 0; i < nx; ++i) +; if (i > 5) { // user branch +; for (int j = 1; j <= 5; j+=2) +; y[j][i] = x[i][j] + j; +; } + +define void @imperf_nest_6(i32** %y, i32** %x, i32 signext %nx, i32 signext %ny) { +; CHECK-LABEL: IsPerfect=false, Depth=2, OutermostLoop: imperf_nest_6_loop_i, Loops: ( imperf_nest_6_loop_i imperf_nest_6_loop_j ) +entry: + %cmp2 = icmp slt i32 0, %nx + br i1 %cmp2, label %imperf_nest_6_loop_i.lr.ph, label %for.end13 + +imperf_nest_6_loop_i.lr.ph: + br label %imperf_nest_6_loop_i + +imperf_nest_6_loop_i: + %i.0 = phi i32 [ 0, %imperf_nest_6_loop_i.lr.ph ], [ %inc12, %for.inc11 ] + %cmp1 = icmp sgt i32 %i.0, 5 + br i1 %cmp1, label %imperf_nest_6_loop_j.lr.ph, label %if.end + +imperf_nest_6_loop_j.lr.ph: + br label %imperf_nest_6_loop_j + +imperf_nest_6_loop_j: + %j.0 = phi i32 [ 1, %imperf_nest_6_loop_j.lr.ph ], [ %inc, %for.inc ] + %idxprom = sext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %idxprom + %0 = load i32*, i32** %arrayidx, align 8 + %idxprom5 = sext i32 %j.0 to i64 + %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %idxprom5 + %1 = load i32, i32* %arrayidx6, align 4 + %add = add nsw i32 %1, %j.0 + %idxprom7 = sext i32 %j.0 to i64 + %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %idxprom7 + %2 = load i32*, i32** %arrayidx8, align 8 + %idxprom9 = sext i32 %i.0 to i64 + %arrayidx10 = getelementptr inbounds i32, i32* %2, i64 %idxprom9 + store i32 %add, i32* %arrayidx10, align 4 + br label %for.inc + +for.inc: + %inc = add nsw i32 %j.0, 2 + %cmp3 = icmp sle i32 %inc, 5 + br i1 %cmp3, label %imperf_nest_6_loop_j, label %for.cond2.for.end_crit_edge + +for.cond2.for.end_crit_edge: + br label %for.end + +for.end: + br label %if.end + +if.end: + br label %for.inc11 + +for.inc11: + %inc12 = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %inc12, %nx + br i1 %cmp, label %imperf_nest_6_loop_i, label %for.cond.for.end13_crit_edge + +for.cond.for.end13_crit_edge: + br label %for.end13 + +for.end13: + ret void +} diff --git a/llvm/test/Analysis/LoopNestAnalysis/infinite.ll b/llvm/test/Analysis/LoopNestAnalysis/infinite.ll new file mode 100644 index 00000000000000..7a6cf21584fff2 --- /dev/null +++ b/llvm/test/Analysis/LoopNestAnalysis/infinite.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +; Test that the loop nest analysis is able to analyze an infinite loop in a loop nest. +define void @test1(i32** %A, i1 %cond) { +; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: for.inner, Loops: ( for.inner ) +; CHECK-LABEL: IsPerfect=false, Depth=2, OutermostLoop: for.outer, Loops: ( for.outer for.inner ) +; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: for.infinite, Loops: ( for.infinite ) +entry: + br label %for.outer + +for.outer: + %i = phi i64 [ 0, %entry ], [ %inc_i, %for.outer.latch ] + br i1 %cond, label %for.inner, label %for.infinite + +for.inner: + %j = phi i64 [ 0, %for.outer ], [ %inc_j, %for.inner ] + %arrayidx_i = getelementptr inbounds i32*, i32** %A, i64 %i + %0 = load i32*, i32** %arrayidx_i, align 8 + %arrayidx_j = getelementptr inbounds i32, i32* %0, i64 %j + store i32 0, i32* %arrayidx_j, align 4 + %inc_j = add nsw i64 %j, 1 + %cmp_j = icmp slt i64 %inc_j, 100 + br i1 %cmp_j, label %for.inner, label %for.outer.latch + +for.infinite: + br label %for.infinite + +for.outer.latch: + %inc_i = add nsw i64 %i, 1 + %cmp_i = icmp slt i64 %inc_i, 100 + br i1 %cmp_i, label %for.outer, label %for.end + +for.end: + ret void +} diff --git a/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll b/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll new file mode 100644 index 00000000000000..b7b3b7a7c93e0c --- /dev/null +++ b/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll @@ -0,0 +1,275 @@ +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +; Test a perfect 2-dim loop nest of the form: +; for(i=0; i +; CHECK: diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index 9433fd1a31b0a9..d1bc970f3643a2 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -1,12 +1,12 @@ llvm_canonicalize_cmake_booleans( BUILD_SHARED_LIBS HAVE_LIBXAR + HAVE_LIBZ HAVE_OCAMLOPT HAVE_OCAML_OUNIT LLVM_ENABLE_DIA_SDK LLVM_ENABLE_FFI LLVM_ENABLE_THREADS - LLVM_ENABLE_ZLIB LLVM_INCLUDE_GO_TESTS LLVM_LIBXML2_ENABLED LLVM_LINK_LLVM_DYLIB diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-cfi.mir b/llvm/test/CodeGen/AArch64/machine-outliner-cfi.mir new file mode 100644 index 00000000000000..707785a566a217 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-cfi.mir @@ -0,0 +1,63 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-apple-unknown -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s + +# Outlining CFI instructions is unsafe. It is possible if the call is tail +# called, but otherwise, it requires fixups. Show that we don't include CFI +# instructions in outlined sequences right now. + +--- | + define void @foo() #0 { ret void } + define void @bar() #0 { ret void } + define void @baz() #0 { ret void } + attributes #0 = { noredzone } +... +--- +name: foo +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + ; CHECK-LABEL: name: foo + ; CHECK: liveins: $lr + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa $w29, 16 + ; CHECK: TCRETURNdi @OUTLINED_FUNCTION_0, 0, implicit $sp, implicit-def $w12, implicit-def $w13, implicit-def $w14, implicit-def $w15 + frame-setup CFI_INSTRUCTION def_cfa $w29, 16 + $w12 = ORRWri $wzr, 1 + $w13 = ORRWri $wzr, 2 + $w14 = ORRWri $wzr, 3 + $w15 = ORRWri $wzr, 4 + RET undef $lr +... +--- +name: bar +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + ; CHECK-LABEL: name: bar + ; CHECK: liveins: $lr + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa $w29, 16 + ; CHECK: TCRETURNdi @OUTLINED_FUNCTION_0, 0, implicit $sp, implicit-def $w12, implicit-def $w13, implicit-def $w14, implicit-def $w15 + frame-setup CFI_INSTRUCTION def_cfa $w29, 16 + $w12 = ORRWri $wzr, 1 + $w13 = ORRWri $wzr, 2 + $w14 = ORRWri $wzr, 3 + $w15 = ORRWri $wzr, 4 + RET undef $lr +... +--- +name: baz +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + ; CHECK-LABEL: name: baz + ; CHECK: liveins: $lr + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa $w29, 16 + ; CHECK: TCRETURNdi @OUTLINED_FUNCTION_0, 0, implicit $sp, implicit-def $w12, implicit-def $w13, implicit-def $w14, implicit-def $w15 + frame-setup CFI_INSTRUCTION def_cfa $w29, 16 + $w12 = ORRWri $wzr, 1 + $w13 = ORRWri $wzr, 2 + $w14 = ORRWri $wzr, 3 + $w15 = ORRWri $wzr, 4 + RET undef $lr diff --git a/llvm/test/CodeGen/AMDGPU/at-least-one-def-value-assert.mir b/llvm/test/CodeGen/AMDGPU/at-least-one-def-value-assert.mir index eb244190e562e7..4503ed12cb6fb2 100644 --- a/llvm/test/CodeGen/AMDGPU/at-least-one-def-value-assert.mir +++ b/llvm/test/CodeGen/AMDGPU/at-least-one-def-value-assert.mir @@ -5,7 +5,7 @@ # CHECK-NEXT: - basic block: %bb.0 # CHECK-NEXT: - instruction: 48B dead undef %2.sub0:vreg_128 = COPY %0.sub0:vreg_128 # CHECK-NEXT: - operand 1: %0.sub0:vreg_128 -# CHECK-NEXT: - interval: %0 [16r,48r:0) 0@16r L00000002 [16r,32r:0) 0@16r weight:0.000000e+00 +# CHECK-NEXT: - interval: %0 [16r,48r:0) 0@16r L0000000000000002 [16r,32r:0) 0@16r weight:0.000000e+00 # This used to assert with: !SR.empty() && "At least one value should be defined by this mask" diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll index a9a91803b08e96..babb18f08576eb 100644 --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -1,3 +1,10 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; NOTE: The checks for opt are NOT added by the update script. Those +; checks are looking for the absence of specific metadata, which +; cannot be expressed reliably by the generated checks. + +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefix=ISA ; RUN: opt --amdgpu-annotate-uniform -S %s | FileCheck %s -check-prefix=UNIFORM ; RUN: opt --amdgpu-annotate-uniform --si-annotate-control-flow -S %s | FileCheck %s -check-prefix=CONTROLFLOW @@ -9,6 +16,56 @@ target triple = "amdgcn-mesa-mesa3d" define amdgpu_ps void @main(i32 %0, float %1) { +; ISA-LABEL: main: +; ISA: ; %bb.0: ; %start +; ISA-NEXT: v_readfirstlane_b32 s0, v0 +; ISA-NEXT: s_mov_b32 m0, s0 +; ISA-NEXT: s_mov_b32 s0, 0 +; ISA-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x +; ISA-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 +; ISA-NEXT: s_mov_b64 s[2:3], 0 +; ISA-NEXT: ; implicit-def: $sgpr6_sgpr7 +; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5 +; ISA-NEXT: s_branch BB0_3 +; ISA-NEXT: BB0_1: ; %Flow1 +; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; ISA-NEXT: s_or_b64 exec, exec, s[8:9] +; ISA-NEXT: s_add_i32 s0, s0, 1 +; ISA-NEXT: s_mov_b64 s[8:9], 0 +; ISA-NEXT: BB0_2: ; %Flow +; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; ISA-NEXT: s_and_b64 s[10:11], exec, s[6:7] +; ISA-NEXT: s_or_b64 s[2:3], s[10:11], s[2:3] +; ISA-NEXT: s_andn2_b64 s[4:5], s[4:5], exec +; ISA-NEXT: s_and_b64 s[8:9], s[8:9], exec +; ISA-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; ISA-NEXT: s_andn2_b64 exec, exec, s[2:3] +; ISA-NEXT: s_cbranch_execz BB0_6 +; ISA-NEXT: BB0_3: ; %loop +; ISA-NEXT: ; =>This Inner Loop Header: Depth=1 +; ISA-NEXT: s_or_b64 s[6:7], s[6:7], exec +; ISA-NEXT: s_cmp_lt_u32 s0, 32 +; ISA-NEXT: s_mov_b64 s[8:9], -1 +; ISA-NEXT: s_cbranch_scc0 BB0_2 +; ISA-NEXT: ; %bb.4: ; %endif1 +; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; ISA-NEXT: s_mov_b64 s[6:7], -1 +; ISA-NEXT: s_and_saveexec_b64 s[8:9], vcc +; ISA-NEXT: s_cbranch_execz BB0_1 +; ISA-NEXT: ; %bb.5: ; %endif2 +; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; ISA-NEXT: s_xor_b64 s[6:7], exec, -1 +; ISA-NEXT: s_branch BB0_1 +; ISA-NEXT: BB0_6: ; %Flow2 +; ISA-NEXT: s_or_b64 exec, exec, s[2:3] +; ISA-NEXT: v_mov_b32_e32 v1, 0 +; ISA-NEXT: s_and_saveexec_b64 s[0:1], s[4:5] +; ISA-NEXT: ; %bb.7: ; %if1 +; ISA-NEXT: v_sqrt_f32_e32 v1, v0 +; ISA-NEXT: ; %bb.8: ; %endloop +; ISA-NEXT: s_or_b64 exec, exec, s[0:1] +; ISA-NEXT: exp mrt0 v1, v1, v1, v1 done vm +; ISA-NEXT: s_endpgm start: %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0) br label %loop diff --git a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll index 75ad58df43b347..c18a076aad4e6c 100644 --- a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll @@ -1,13 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify %s | FileCheck -check-prefix=IR %s -; SI-LABEL: {{^}}infinite_loop: -; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7 -; SI: [[LOOP:BB[0-9]+_[0-9]+]]: ; %loop -; SI: s_waitcnt lgkmcnt(0) -; SI: buffer_store_dword [[REG]] -; SI: s_branch [[LOOP]] define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) { +; SI-LABEL: infinite_loop: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_mov_b32_e32 v0, 0x3e7 +; SI-NEXT: BB0_1: ; %loop +; SI-NEXT: ; =>This Inner Loop Header: Depth=1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_branch BB0_1 +; IR-LABEL: @infinite_loop( +; IR-NEXT: entry: +; IR-NEXT: br label [[LOOP:%.*]] +; IR: loop: +; IR-NEXT: store volatile i32 999, i32 addrspace(1)* [[OUT:%.*]], align 4 +; IR-NEXT: br label [[LOOP]] +; entry: br label %loop @@ -16,31 +30,36 @@ loop: br label %loop } - -; IR-LABEL: @infinite_loop_ret( -; IR: br i1 %cond, label %loop, label %UnifiedReturnBlock - -; IR: loop: -; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4 -; IR: br i1 true, label %loop, label %UnifiedReturnBlock - -; IR: UnifiedReturnBlock: -; IR: ret void - - -; SI-LABEL: {{^}}infinite_loop_ret: -; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]] - -; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7 -; SI: s_and_b64 vcc, exec, -1 -; SI: [[LOOP:BB[0-9]+_[0-9]+]]: ; %loop -; SI: s_waitcnt lgkmcnt(0) -; SI: buffer_store_dword [[REG]] -; SI: s_cbranch_vccnz [[LOOP]] - -; SI: [[RET]]: ; %UnifiedReturnBlock -; SI: s_endpgm define amdgpu_kernel void @infinite_loop_ret(i32 addrspace(1)* %out) { +; SI-LABEL: infinite_loop_ret: +; SI: ; %bb.0: ; %entry +; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc +; SI-NEXT: s_cbranch_execz BB1_3 +; SI-NEXT: ; %bb.1: ; %loop.preheader +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_mov_b32_e32 v0, 0x3e7 +; SI-NEXT: s_and_b64 vcc, exec, -1 +; SI-NEXT: BB1_2: ; %loop +; SI-NEXT: ; =>This Inner Loop Header: Depth=1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_cbranch_vccnz BB1_2 +; SI-NEXT: BB1_3: ; %UnifiedReturnBlock +; SI-NEXT: s_endpgm +; IR-LABEL: @infinite_loop_ret( +; IR-NEXT: entry: +; IR-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; IR-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP]], 1 +; IR-NEXT: br i1 [[COND]], label [[LOOP:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]] +; IR: loop: +; IR-NEXT: store volatile i32 999, i32 addrspace(1)* [[OUT:%.*]], align 4 +; IR-NEXT: br i1 true, label [[LOOP]], label [[UNIFIEDRETURNBLOCK]] +; IR: UnifiedReturnBlock: +; IR-NEXT: ret void +; entry: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %cond = icmp eq i32 %tmp, 1 @@ -54,44 +73,44 @@ return: ret void } - -; IR-LABEL: @infinite_loops( -; IR: br i1 undef, label %loop1, label %loop2 - -; IR: loop1: -; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4 -; IR: br i1 true, label %loop1, label %DummyReturnBlock - -; IR: loop2: -; IR: store volatile i32 888, i32 addrspace(1)* %out, align 4 -; IR: br i1 true, label %loop2, label %DummyReturnBlock - -; IR: DummyReturnBlock: -; IR: ret void - - -; SI-LABEL: {{^}}infinite_loops: - -; SI: v_mov_b32_e32 [[REG1:v[0-9]+]], 0x3e7 -; SI: s_and_b64 vcc, exec, -1 - -; SI: [[LOOP1:BB[0-9]+_[0-9]+]]: ; %loop1 -; SI: s_waitcnt lgkmcnt(0) -; SI: buffer_store_dword [[REG1]] -; SI: s_cbranch_vccnz [[LOOP1]] -; SI: s_branch [[RET:BB[0-9]+_[0-9]+]] - -; SI: v_mov_b32_e32 [[REG2:v[0-9]+]], 0x378 -; SI: s_and_b64 vcc, exec, -1 - -; SI: [[LOOP2:BB[0-9]+_[0-9]+]]: ; %loop2 -; SI: s_waitcnt lgkmcnt(0) -; SI: buffer_store_dword [[REG2]] -; SI: s_cbranch_vccnz [[LOOP2]] - -; SI: [[RET]]: ; %DummyReturnBlock -; SI: s_endpgm define amdgpu_kernel void @infinite_loops(i32 addrspace(1)* %out) { +; SI-LABEL: infinite_loops: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_cbranch_scc0 BB2_3 +; SI-NEXT: ; %bb.1: ; %loop1.preheader +; SI-NEXT: v_mov_b32_e32 v0, 0x3e7 +; SI-NEXT: s_and_b64 vcc, exec, -1 +; SI-NEXT: BB2_2: ; %loop1 +; SI-NEXT: ; =>This Inner Loop Header: Depth=1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_cbranch_vccnz BB2_2 +; SI-NEXT: s_branch BB2_5 +; SI-NEXT: BB2_3: +; SI-NEXT: v_mov_b32_e32 v0, 0x378 +; SI-NEXT: s_and_b64 vcc, exec, -1 +; SI-NEXT: BB2_4: ; %loop2 +; SI-NEXT: ; =>This Inner Loop Header: Depth=1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_cbranch_vccnz BB2_4 +; SI-NEXT: BB2_5: ; %DummyReturnBlock +; SI-NEXT: s_endpgm +; IR-LABEL: @infinite_loops( +; IR-NEXT: entry: +; IR-NEXT: br i1 undef, label [[LOOP1:%.*]], label [[LOOP2:%.*]] +; IR: loop1: +; IR-NEXT: store volatile i32 999, i32 addrspace(1)* [[OUT:%.*]], align 4 +; IR-NEXT: br i1 true, label [[LOOP1]], label [[DUMMYRETURNBLOCK:%.*]] +; IR: loop2: +; IR-NEXT: store volatile i32 888, i32 addrspace(1)* [[OUT]], align 4 +; IR-NEXT: br i1 true, label [[LOOP2]], label [[DUMMYRETURNBLOCK]] +; IR: DummyReturnBlock: +; IR-NEXT: ret void +; entry: br i1 undef, label %loop1, label %loop2 @@ -104,55 +123,68 @@ loop2: br label %loop2 } - - -; IR-LABEL: @infinite_loop_nest_ret( -; IR: br i1 %cond1, label %outer_loop, label %UnifiedReturnBlock - -; IR: outer_loop: -; IR: br label %inner_loop - -; IR: inner_loop: -; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4 -; IR: %cond3 = icmp eq i32 %tmp, 3 -; IR: br i1 true, label %TransitionBlock, label %UnifiedReturnBlock - -; IR: TransitionBlock: -; IR: br i1 %cond3, label %inner_loop, label %outer_loop - -; IR: UnifiedReturnBlock: -; IR: ret void - -; SI-LABEL: {{^}}infinite_loop_nest_ret: -; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]] - -; SI: s_mov_b32 -; SI: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %outer_loop - -; SI: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %inner_loop -; SI: s_waitcnt expcnt(0) -; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7 -; SI: s_waitcnt lgkmcnt(0) -; SI: buffer_store_dword [[REG]] - -; SI: s_andn2_b64 exec -; SI: s_cbranch_execnz [[INNER_LOOP]] - -; SI: s_andn2_b64 exec -; SI: s_cbranch_execnz [[OUTER_LOOP]] - -; SI: [[RET]]: ; %UnifiedReturnBlock -; SI: s_endpgm define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) { +; SI-LABEL: infinite_loop_nest_ret: +; SI: ; %bb.0: ; %entry +; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc +; SI-NEXT: s_cbranch_execz BB3_5 +; SI-NEXT: ; %bb.1: ; %outer_loop.preheader +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 +; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], 3, v0 +; SI-NEXT: s_mov_b64 s[2:3], 0 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: BB3_2: ; %outer_loop +; SI-NEXT: ; =>This Loop Header: Depth=1 +; SI-NEXT: ; Child Loop BB3_3 Depth 2 +; SI-NEXT: s_and_b64 s[8:9], exec, vcc +; SI-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3] +; SI-NEXT: s_mov_b64 s[8:9], 0 +; SI-NEXT: BB3_3: ; %inner_loop +; SI-NEXT: ; Parent Loop BB3_2 Depth=1 +; SI-NEXT: ; => This Inner Loop Header: Depth=2 +; SI-NEXT: s_and_b64 s[10:11], exec, s[0:1] +; SI-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, 0x3e7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_andn2_b64 exec, exec, s[8:9] +; SI-NEXT: s_cbranch_execnz BB3_3 +; SI-NEXT: ; %bb.4: ; %Flow +; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1 +; SI-NEXT: s_or_b64 exec, exec, s[8:9] +; SI-NEXT: s_andn2_b64 exec, exec, s[2:3] +; SI-NEXT: s_cbranch_execnz BB3_2 +; SI-NEXT: BB3_5: ; %UnifiedReturnBlock +; SI-NEXT: s_endpgm +; IR-LABEL: @infinite_loop_nest_ret( +; IR-NEXT: entry: +; IR-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; IR-NEXT: [[COND1:%.*]] = icmp eq i32 [[TMP]], 1 +; IR-NEXT: br i1 [[COND1]], label [[OUTER_LOOP:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]] +; IR: outer_loop: +; IR-NEXT: br label [[INNER_LOOP:%.*]] +; IR: inner_loop: +; IR-NEXT: store volatile i32 999, i32 addrspace(1)* [[OUT:%.*]], align 4 +; IR-NEXT: [[COND3:%.*]] = icmp eq i32 [[TMP]], 3 +; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK]] +; IR: TransitionBlock: +; IR-NEXT: br i1 [[COND3]], label [[INNER_LOOP]], label [[OUTER_LOOP]] +; IR: UnifiedReturnBlock: +; IR-NEXT: ret void +; entry: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %cond1 = icmp eq i32 %tmp, 1 br i1 %cond1, label %outer_loop, label %return outer_loop: - ; %cond2 = icmp eq i32 %tmp, 2 - ; br i1 %cond2, label %outer_loop, label %inner_loop - br label %inner_loop + ; %cond2 = icmp eq i32 %tmp, 2 + ; br i1 %cond2, label %outer_loop, label %inner_loop + br label %inner_loop inner_loop: ; preds = %LeafBlock, %LeafBlock1 store volatile i32 999, i32 addrspace(1)* %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll index d02d406689a9c0..b9788e8babf533 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_break.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll @@ -1,56 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s ; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s ; Uses llvm.amdgcn.break -; OPT-LABEL: @break_loop( -; OPT: bb1: -; OPT: icmp slt i32 -; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow - -; OPT: bb4: -; OPT: load volatile -; OPT: icmp slt i32 -; OPT: xor i1 %cmp1 -; OPT: br label %Flow - -; OPT: Flow: -; OPT: call i64 @llvm.amdgcn.if.break.i64( -; OPT: call i1 @llvm.amdgcn.loop.i64(i64 -; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1 - -; OPT: bb9: -; OPT: call void @llvm.amdgcn.end.cf.i64(i64 - -; GCN-LABEL: {{^}}break_loop: -; GCN: s_mov_b64 [[ACCUM_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}} - -; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1 -; GCN: s_add_i32 s6, s6, 1 -; GCN: s_or_b64 [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec -; GCN: s_cmp_gt_i32 s6, -1 -; GCN: s_cbranch_scc1 [[FLOW:BB[0-9]+_[0-9]+]] - -; GCN: ; %bb4 -; GCN: buffer_load_dword -; GCN: v_cmp_ge_i32_e32 vcc -; GCN: s_andn2_b64 [[INNER_MASK]], [[INNER_MASK]], exec -; GCN: s_and_b64 [[BROKEN_MASK:s\[[0-9]+:[0-9]+\]]], vcc, exec -; GCN: s_or_b64 [[INNER_MASK]], [[INNER_MASK]], [[BROKEN_MASK]] - -; GCN: [[FLOW]]: ; %Flow -; GCN: ; in Loop: Header=BB0_1 Depth=1 -; GCN: s_and_b64 [[AND_MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]] -; GCN-NEXT: s_or_b64 [[ACCUM_MASK]], [[AND_MASK]], [[ACCUM_MASK]] -; GCN-NEXT: s_andn2_b64 exec, exec, [[ACCUM_MASK]] -; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]] - -; GCN: ; %bb.4: ; %bb9 -; GCN-NEXT: s_endpgm define amdgpu_kernel void @break_loop(i32 %arg) #0 { +; OPT-LABEL: @break_loop( +; OPT-NEXT: bb: +; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] +; OPT-NEXT: br label [[BB1:%.*]] +; OPT: bb1: +; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] +; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] +; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 +; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 +; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] +; OPT: bb4: +; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 +; OPT-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MY_TMP]], [[LOAD]] +; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[CMP1]], true +; OPT-NEXT: br label [[FLOW]] +; OPT: Flow: +; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB4]] ], [ true, [[BB1]] ] +; OPT-NEXT: [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]]) +; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]]) +; OPT-NEXT: br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]] +; OPT: bb9: +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) +; OPT-NEXT: ret void +; +; GCN-LABEL: break_loop: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 +; GCN-NEXT: s_mov_b64 s[0:1], 0 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 +; GCN-NEXT: ; implicit-def: $sgpr6 +; GCN-NEXT: BB0_1: ; %bb1 +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_add_i32 s6, s6, 1 +; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec +; GCN-NEXT: s_cmp_gt_i32 s6, -1 +; GCN-NEXT: s_cbranch_scc1 BB0_3 +; GCN-NEXT: ; %bb.2: ; %bb4 +; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 +; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec +; GCN-NEXT: s_and_b64 s[8:9], vcc, exec +; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; GCN-NEXT: BB0_3: ; %Flow +; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] +; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] +; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GCN-NEXT: s_cbranch_execnz BB0_1 +; GCN-NEXT: ; %bb.4: ; %bb9 +; GCN-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() - %tmp = sub i32 %id, %arg + %my.tmp = sub i32 %id, %arg br label %bb1 bb1: @@ -61,58 +76,98 @@ bb1: bb4: %load = load volatile i32, i32 addrspace(1)* undef, align 4 - %cmp1 = icmp slt i32 %tmp, %load + %cmp1 = icmp slt i32 %my.tmp, %load br i1 %cmp1, label %bb1, label %bb9 bb9: ret void } -; OPT-LABEL: @undef_phi_cond_break_loop( -; OPT: bb1: -; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ] -; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] -; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 -; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 -; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow - -; OPT: bb4: -; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 -; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load -; OPT-NEXT: br label %Flow - -; OPT: Flow: -; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] -; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] -; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64(i1 %tmp3, i64 %phi.broken) -; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) -; OPT-NEXT: br i1 %1, label %bb9, label %bb1 - -; OPT: bb9: ; preds = %Flow -; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) -; OPT-NEXT: store volatile i32 7 -; OPT-NEXT: ret void define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 { +; OPT-LABEL: @undef_phi_cond_break_loop( +; OPT-NEXT: bb: +; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] +; OPT-NEXT: br label [[BB1:%.*]] +; OPT: bb1: +; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] +; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] +; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 +; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 +; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] +; OPT: bb4: +; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 +; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] +; OPT-NEXT: br label [[FLOW]] +; OPT: Flow: +; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] +; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ] +; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) +; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) +; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] +; OPT: bb9: +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) +; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef +; OPT-NEXT: ret void +; +; GCN-LABEL: undef_phi_cond_break_loop: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 +; GCN-NEXT: s_mov_b64 s[0:1], 0 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: ; implicit-def: $sgpr6_sgpr7 +; GCN-NEXT: ; implicit-def: $sgpr4 +; GCN-NEXT: BB1_1: ; %bb1 +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec +; GCN-NEXT: s_and_b64 s[8:9], s[0:1], exec +; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] +; GCN-NEXT: s_cmp_gt_i32 s4, -1 +; GCN-NEXT: s_cbranch_scc1 BB1_3 +; GCN-NEXT: ; %bb.2: ; %bb4 +; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 +; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec +; GCN-NEXT: s_and_b64 s[8:9], vcc, exec +; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] +; GCN-NEXT: BB1_3: ; %Flow +; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1 +; GCN-NEXT: s_add_i32 s4, s4, 1 +; GCN-NEXT: s_and_b64 s[8:9], exec, s[6:7] +; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] +; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GCN-NEXT: s_cbranch_execnz BB1_1 +; GCN-NEXT: ; %bb.4: ; %bb9 +; GCN-NEXT: s_or_b64 exec, exec, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, 7 +; GCN-NEXT: s_mov_b32 m0, -1 +; GCN-NEXT: ds_write_b32 v0, v0 +; GCN-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() - %tmp = sub i32 %id, %arg + %my.tmp = sub i32 %id, %arg br label %bb1 bb1: ; preds = %Flow, %bb - %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] + %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] %lsr.iv.next = add i32 %lsr.iv, 1 %cmp0 = icmp slt i32 %lsr.iv.next, 0 br i1 %cmp0, label %bb4, label %Flow bb4: ; preds = %bb1 %load = load volatile i32, i32 addrspace(1)* undef, align 4 - %cmp1 = icmp sge i32 %tmp, %load + %cmp1 = icmp sge i32 %my.tmp, %load br label %Flow Flow: ; preds = %bb4, %bb1 - %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] - %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] - br i1 %tmp3, label %bb9, label %bb1 + %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] + %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] + br i1 %my.tmp3, label %bb9, label %bb1 bb9: ; preds = %Flow store volatile i32 7, i32 addrspace(3)* undef @@ -122,152 +177,271 @@ bb9: ; preds = %Flow ; FIXME: ConstantExpr compare of address to null folds away @lds = addrspace(3) global i32 undef -; OPT-LABEL: @constexpr_phi_cond_break_loop( -; OPT: bb1: -; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ] -; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] -; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 -; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 -; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow - -; OPT: bb4: -; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 -; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load -; OPT-NEXT: br label %Flow - -; OPT: Flow: -; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] -; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] -; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64(i1 %tmp3, i64 %phi.broken) -; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) -; OPT-NEXT: br i1 %1, label %bb9, label %bb1 - -; OPT: bb9: ; preds = %Flow -; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) -; OPT-NEXT: store volatile i32 7 -; OPT-NEXT: ret void define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 { +; OPT-LABEL: @constexpr_phi_cond_break_loop( +; OPT-NEXT: bb: +; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] +; OPT-NEXT: br label [[BB1:%.*]] +; OPT: bb1: +; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] +; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] +; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 +; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 +; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] +; OPT: bb4: +; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 +; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] +; OPT-NEXT: br label [[FLOW]] +; OPT: Flow: +; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] +; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), [[BB1]] ] +; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) +; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) +; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] +; OPT: bb9: +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) +; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef +; OPT-NEXT: ret void +; +; GCN-LABEL: constexpr_phi_cond_break_loop: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 +; GCN-NEXT: s_mov_b64 s[0:1], 0 +; GCN-NEXT: s_mov_b32 s2, lds@abs32@lo +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 +; GCN-NEXT: ; implicit-def: $sgpr3 +; GCN-NEXT: BB2_1: ; %bb1 +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_cmp_ne_u32_e64 s[8:9], s2, 4 +; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec +; GCN-NEXT: s_and_b64 s[8:9], s[8:9], exec +; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; GCN-NEXT: s_cmp_gt_i32 s3, -1 +; GCN-NEXT: s_cbranch_scc1 BB2_3 +; GCN-NEXT: ; %bb.2: ; %bb4 +; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1 +; GCN-NEXT: buffer_load_dword v1, off, s[4:7], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 +; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec +; GCN-NEXT: s_and_b64 s[8:9], vcc, exec +; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; GCN-NEXT: BB2_3: ; %Flow +; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1 +; GCN-NEXT: s_add_i32 s3, s3, 1 +; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] +; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] +; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GCN-NEXT: s_cbranch_execnz BB2_1 +; GCN-NEXT: ; %bb.4: ; %bb9 +; GCN-NEXT: s_or_b64 exec, exec, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, 7 +; GCN-NEXT: s_mov_b32 m0, -1 +; GCN-NEXT: ds_write_b32 v0, v0 +; GCN-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() - %tmp = sub i32 %id, %arg + %my.tmp = sub i32 %id, %arg br label %bb1 bb1: ; preds = %Flow, %bb - %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] + %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] %lsr.iv.next = add i32 %lsr.iv, 1 %cmp0 = icmp slt i32 %lsr.iv.next, 0 br i1 %cmp0, label %bb4, label %Flow bb4: ; preds = %bb1 %load = load volatile i32, i32 addrspace(1)* undef, align 4 - %cmp1 = icmp sge i32 %tmp, %load + %cmp1 = icmp sge i32 %my.tmp, %load br label %Flow Flow: ; preds = %bb4, %bb1 - %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] - %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] - br i1 %tmp3, label %bb9, label %bb1 + %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] + %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] + br i1 %my.tmp3, label %bb9, label %bb1 bb9: ; preds = %Flow store volatile i32 7, i32 addrspace(3)* undef ret void } -; OPT-LABEL: @true_phi_cond_break_loop( -; OPT: bb1: -; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ] -; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] -; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 -; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 -; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow - -; OPT: bb4: -; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 -; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load -; OPT-NEXT: br label %Flow - -; OPT: Flow: -; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] -; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] -; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64(i1 %tmp3, i64 %phi.broken) -; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) -; OPT-NEXT: br i1 %1, label %bb9, label %bb1 - -; OPT: bb9: ; preds = %Flow -; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) -; OPT-NEXT: store volatile i32 7 -; OPT-NEXT: ret void define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 { +; OPT-LABEL: @true_phi_cond_break_loop( +; OPT-NEXT: bb: +; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] +; OPT-NEXT: br label [[BB1:%.*]] +; OPT: bb1: +; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] +; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] +; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 +; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 +; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] +; OPT: bb4: +; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 +; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] +; OPT-NEXT: br label [[FLOW]] +; OPT: Flow: +; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] +; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] +; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) +; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) +; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] +; OPT: bb9: +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) +; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef +; OPT-NEXT: ret void +; +; GCN-LABEL: true_phi_cond_break_loop: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 +; GCN-NEXT: s_mov_b64 s[0:1], 0 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 +; GCN-NEXT: ; implicit-def: $sgpr6 +; GCN-NEXT: BB3_1: ; %bb1 +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec +; GCN-NEXT: s_cmp_gt_i32 s6, -1 +; GCN-NEXT: s_cbranch_scc1 BB3_3 +; GCN-NEXT: ; %bb.2: ; %bb4 +; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 +; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec +; GCN-NEXT: s_and_b64 s[8:9], vcc, exec +; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; GCN-NEXT: BB3_3: ; %Flow +; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1 +; GCN-NEXT: s_add_i32 s6, s6, 1 +; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] +; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] +; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GCN-NEXT: s_cbranch_execnz BB3_1 +; GCN-NEXT: ; %bb.4: ; %bb9 +; GCN-NEXT: s_or_b64 exec, exec, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, 7 +; GCN-NEXT: s_mov_b32 m0, -1 +; GCN-NEXT: ds_write_b32 v0, v0 +; GCN-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() - %tmp = sub i32 %id, %arg + %my.tmp = sub i32 %id, %arg br label %bb1 bb1: ; preds = %Flow, %bb - %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] + %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] %lsr.iv.next = add i32 %lsr.iv, 1 %cmp0 = icmp slt i32 %lsr.iv.next, 0 br i1 %cmp0, label %bb4, label %Flow bb4: ; preds = %bb1 %load = load volatile i32, i32 addrspace(1)* undef, align 4 - %cmp1 = icmp sge i32 %tmp, %load + %cmp1 = icmp sge i32 %my.tmp, %load br label %Flow Flow: ; preds = %bb4, %bb1 - %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] - %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] - br i1 %tmp3, label %bb9, label %bb1 + %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] + %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] + br i1 %my.tmp3, label %bb9, label %bb1 bb9: ; preds = %Flow store volatile i32 7, i32 addrspace(3)* undef ret void } -; OPT-LABEL: @false_phi_cond_break_loop( -; OPT: bb1: -; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ] -; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] -; OPT-NOT: call -; OPT: br i1 %cmp0, label %bb4, label %Flow - -; OPT: bb4: -; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 -; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load -; OPT-NEXT: br label %Flow - -; OPT: Flow: -; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] -; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] -; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64(i1 %tmp3, i64 %phi.broken) -; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) -; OPT-NEXT: br i1 %1, label %bb9, label %bb1 - -; OPT: bb9: ; preds = %Flow -; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) -; OPT-NEXT: store volatile i32 7 -; OPT-NEXT: ret void define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 { +; OPT-LABEL: @false_phi_cond_break_loop( +; OPT-NEXT: bb: +; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] +; OPT-NEXT: br label [[BB1:%.*]] +; OPT: bb1: +; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] +; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] +; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 +; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 +; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] +; OPT: bb4: +; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 +; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] +; OPT-NEXT: br label [[FLOW]] +; OPT: Flow: +; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] +; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ] +; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) +; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) +; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] +; OPT: bb9: +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) +; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef +; OPT-NEXT: ret void +; +; GCN-LABEL: false_phi_cond_break_loop: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 +; GCN-NEXT: s_mov_b64 s[0:1], 0 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 +; GCN-NEXT: ; implicit-def: $sgpr6 +; GCN-NEXT: BB4_1: ; %bb1 +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec +; GCN-NEXT: s_cmp_gt_i32 s6, -1 +; GCN-NEXT: s_cbranch_scc1 BB4_3 +; GCN-NEXT: ; %bb.2: ; %bb4 +; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 +; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec +; GCN-NEXT: s_and_b64 s[8:9], vcc, exec +; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; GCN-NEXT: BB4_3: ; %Flow +; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1 +; GCN-NEXT: s_add_i32 s6, s6, 1 +; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] +; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] +; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GCN-NEXT: s_cbranch_execnz BB4_1 +; GCN-NEXT: ; %bb.4: ; %bb9 +; GCN-NEXT: s_or_b64 exec, exec, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, 7 +; GCN-NEXT: s_mov_b32 m0, -1 +; GCN-NEXT: ds_write_b32 v0, v0 +; GCN-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() - %tmp = sub i32 %id, %arg + %my.tmp = sub i32 %id, %arg br label %bb1 bb1: ; preds = %Flow, %bb - %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] + %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] %lsr.iv.next = add i32 %lsr.iv, 1 %cmp0 = icmp slt i32 %lsr.iv.next, 0 br i1 %cmp0, label %bb4, label %Flow bb4: ; preds = %bb1 %load = load volatile i32, i32 addrspace(1)* undef, align 4 - %cmp1 = icmp sge i32 %tmp, %load + %cmp1 = icmp sge i32 %my.tmp, %load br label %Flow Flow: ; preds = %bb4, %bb1 - %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] - %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] - br i1 %tmp3, label %bb9, label %bb1 + %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] + %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] + br i1 %my.tmp3, label %bb9, label %bb1 bb9: ; preds = %Flow store volatile i32 7, i32 addrspace(3)* undef @@ -277,52 +451,91 @@ bb9: ; preds = %Flow ; Swap order of branches in flow block so that the true phi is ; continue. -; OPT-LABEL: @invert_true_phi_cond_break_loop( -; OPT: bb1: -; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ] -; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] -; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 -; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 -; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow - -; OPT: bb4: -; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 -; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load -; OPT-NEXT: br label %Flow - -; OPT: Flow: -; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] -; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] -; OPT-NEXT: %0 = xor i1 %tmp3, true -; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break.i64(i1 %0, i64 %phi.broken) -; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop.i64(i64 %1) -; OPT-NEXT: br i1 %2, label %bb9, label %bb1 - -; OPT: bb9: -; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %1) -; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef -; OPT-NEXT: ret void define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 { +; OPT-LABEL: @invert_true_phi_cond_break_loop( +; OPT-NEXT: bb: +; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] +; OPT-NEXT: br label [[BB1:%.*]] +; OPT: bb1: +; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP1:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] +; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] +; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 +; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 +; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] +; OPT: bb4: +; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 +; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] +; OPT-NEXT: br label [[FLOW]] +; OPT: Flow: +; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] +; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] +; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[MY_TMP3]], true +; OPT-NEXT: [[TMP1]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP0]], i64 [[PHI_BROKEN]]) +; OPT-NEXT: [[TMP2:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP1]]) +; OPT-NEXT: br i1 [[TMP2]], label [[BB9:%.*]], label [[BB1]] +; OPT: bb9: +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP1]]) +; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef +; OPT-NEXT: ret void +; +; GCN-LABEL: invert_true_phi_cond_break_loop: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 +; GCN-NEXT: s_mov_b64 s[0:1], 0 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 +; GCN-NEXT: ; implicit-def: $sgpr6 +; GCN-NEXT: BB5_1: ; %bb1 +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec +; GCN-NEXT: s_cmp_gt_i32 s6, -1 +; GCN-NEXT: s_cbranch_scc1 BB5_3 +; GCN-NEXT: ; %bb.2: ; %bb4 +; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 +; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec +; GCN-NEXT: s_and_b64 s[8:9], vcc, exec +; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; GCN-NEXT: BB5_3: ; %Flow +; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1 +; GCN-NEXT: s_add_i32 s6, s6, 1 +; GCN-NEXT: s_xor_b64 s[8:9], s[4:5], -1 +; GCN-NEXT: s_and_b64 s[8:9], exec, s[8:9] +; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] +; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GCN-NEXT: s_cbranch_execnz BB5_1 +; GCN-NEXT: ; %bb.4: ; %bb9 +; GCN-NEXT: s_or_b64 exec, exec, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, 7 +; GCN-NEXT: s_mov_b32 m0, -1 +; GCN-NEXT: ds_write_b32 v0, v0 +; GCN-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() - %tmp = sub i32 %id, %arg + %my.tmp = sub i32 %id, %arg br label %bb1 bb1: ; preds = %Flow, %bb - %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] + %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] %lsr.iv.next = add i32 %lsr.iv, 1 %cmp0 = icmp slt i32 %lsr.iv.next, 0 br i1 %cmp0, label %bb4, label %Flow bb4: ; preds = %bb1 %load = load volatile i32, i32 addrspace(1)* undef, align 4 - %cmp1 = icmp sge i32 %tmp, %load + %cmp1 = icmp sge i32 %my.tmp, %load br label %Flow Flow: ; preds = %bb4, %bb1 - %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] - %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] - br i1 %tmp3, label %bb1, label %bb9 + %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] + %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] + br i1 %my.tmp3, label %bb1, label %bb9 bb9: ; preds = %Flow store volatile i32 7, i32 addrspace(3)* undef diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll index 6e846fd56498ca..e6712277a90b44 100644 --- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -1,3 +1,5 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=IR %s ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s @@ -6,74 +8,89 @@ ; the condition that appears to have no uses until the loop is ; completely processed. - -; IR-LABEL: @reduced_nested_loop_conditions( - -; IR: bb5: -; IR-NEXT: %phi.broken = phi i64 [ %3, %bb10 ], [ 0, %bb ] -; IR-NEXT: %tmp6 = phi i32 [ 0, %bb ], [ %tmp11, %bb10 ] -; IR-NEXT: %tmp7 = icmp eq i32 %tmp6, 1 -; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %tmp7) -; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0 -; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1 -; IR-NEXT: br i1 %1, label %bb8, label %Flow - -; IR: bb8: -; IR-NEXT: br label %bb13 - -; IR: bb10: -; IR-NEXT: %tmp11 = phi i32 [ %6, %Flow ] -; IR-NEXT: %tmp12 = phi i1 [ %5, %Flow ] -; IR-NEXT: %3 = call i64 @llvm.amdgcn.if.break.i64(i1 %tmp12, i64 %phi.broken) -; IR-NEXT: %4 = call i1 @llvm.amdgcn.loop.i64(i64 %3) -; IR-NEXT: br i1 %4, label %bb23, label %bb5 - -; IR: Flow: -; IR-NEXT: %5 = phi i1 [ %tmp22, %bb4 ], [ true, %bb5 ] -; IR-NEXT: %6 = phi i32 [ %tmp21, %bb4 ], [ undef, %bb5 ] -; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %2) -; IR-NEXT: br label %bb10 - -; IR: bb13: -; IR-NEXT: %tmp14 = phi i1 [ %tmp22, %bb3 ], [ true, %bb8 ] -; IR-NEXT: %tmp15 = bitcast i64 %tmp2 to <2 x i32> -; IR-NEXT: br i1 %tmp14, label %bb16, label %bb20 - -; IR: bb16: -; IR-NEXT: %tmp17 = extractelement <2 x i32> %tmp15, i64 1 -; IR-NEXT: %tmp18 = getelementptr inbounds i32, i32 addrspace(3)* undef, i32 %tmp17 -; IR-NEXT: %tmp19 = load volatile i32, i32 addrspace(3)* %tmp18 -; IR-NEXT: br label %bb20 - -; IR: bb20: -; IR-NEXT: %tmp21 = phi i32 [ %tmp19, %bb16 ], [ 0, %bb13 ] -; IR-NEXT: %tmp22 = phi i1 [ false, %bb16 ], [ %tmp14, %bb13 ] -; IR-NEXT: br label %bb9 - -; IR: bb23: -; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %3) -; IR-NEXT: ret void - -; GCN-LABEL: {{^}}reduced_nested_loop_conditions: - -; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1 -; GCN-NEXT: s_cbranch_scc0 - -; FIXME: Should fold to unconditional branch? -; GCN: ; implicit-def -; GCN: s_cbranch_vccnz - -; GCN: ds_read_b32 - -; GCN: [[BB9:BB[0-9]+_[0-9]+]]: ; %bb9 -; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: s_mov_b64 vcc, vcc -; GCN-NEXT: s_cbranch_vccnz [[BB9]] define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* nocapture %arg) #0 { +; GCN-LABEL: reduced_nested_loop_conditions: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dword s0, s[0:1], 0x9 +; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GCN-NEXT: s_mov_b32 m0, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_add_i32_e32 v0, vcc, s0, v0 +; GCN-NEXT: ds_read_b64 v[0:1], v0 +; GCN-NEXT: s_mov_b32 s0, 0 +; GCN-NEXT: s_and_b64 vcc, exec, 0 +; GCN-NEXT: BB0_1: ; %bb5 +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_cmp_lg_u32 s0, 1 +; GCN-NEXT: s_cbranch_scc0 BB0_3 +; GCN-NEXT: ; %bb.2: ; %bb10 +; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; GCN-NEXT: ; implicit-def: $sgpr0 +; GCN-NEXT: s_cbranch_vccnz BB0_1 +; GCN-NEXT: s_branch BB0_5 +; GCN-NEXT: BB0_3: ; %bb8 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: ds_read_b32 v0, v0 +; GCN-NEXT: s_and_b64 vcc, exec, -1 +; GCN-NEXT: BB0_4: ; %bb9 +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_cbranch_vccnz BB0_4 +; GCN-NEXT: BB0_5: ; %DummyReturnBlock +; GCN-NEXT: s_endpgm +; IR-LABEL: @reduced_nested_loop_conditions( +; IR-NEXT: bb: +; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #4 +; IR-NEXT: [[MY_TMP1:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* [[ARG:%.*]], i32 [[MY_TMP]] +; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, i64 addrspace(3)* [[MY_TMP1]] +; IR-NEXT: br label [[BB5:%.*]] +; IR: bb3: +; IR-NEXT: br i1 true, label [[BB4:%.*]], label [[BB13:%.*]] +; IR: bb4: +; IR-NEXT: br label [[FLOW:%.*]] +; IR: bb5: +; IR-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP3:%.*]], [[BB10:%.*]] ], [ 0, [[BB:%.*]] ] +; IR-NEXT: [[MY_TMP6:%.*]] = phi i32 [ 0, [[BB]] ], [ [[MY_TMP11:%.*]], [[BB10]] ] +; IR-NEXT: [[MY_TMP7:%.*]] = icmp eq i32 [[MY_TMP6]], 1 +; IR-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[MY_TMP7]]) +; IR-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 +; IR-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 +; IR-NEXT: br i1 [[TMP1]], label [[BB8:%.*]], label [[FLOW]] +; IR: bb8: +; IR-NEXT: br label [[BB13]] +; IR: bb9: +; IR-NEXT: br i1 false, label [[BB3:%.*]], label [[BB9:%.*]] +; IR: bb10: +; IR-NEXT: [[MY_TMP11]] = phi i32 [ [[TMP6:%.*]], [[FLOW]] ] +; IR-NEXT: [[MY_TMP12:%.*]] = phi i1 [ [[TMP5:%.*]], [[FLOW]] ] +; IR-NEXT: [[TMP3]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP12]], i64 [[PHI_BROKEN]]) +; IR-NEXT: [[TMP4:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP3]]) +; IR-NEXT: br i1 [[TMP4]], label [[BB23:%.*]], label [[BB5]] +; IR: Flow: +; IR-NEXT: [[TMP5]] = phi i1 [ [[MY_TMP22:%.*]], [[BB4]] ], [ true, [[BB5]] ] +; IR-NEXT: [[TMP6]] = phi i32 [ [[MY_TMP21:%.*]], [[BB4]] ], [ undef, [[BB5]] ] +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) +; IR-NEXT: br label [[BB10]] +; IR: bb13: +; IR-NEXT: [[MY_TMP14:%.*]] = phi i1 [ [[MY_TMP22]], [[BB3]] ], [ true, [[BB8]] ] +; IR-NEXT: [[MY_TMP15:%.*]] = bitcast i64 [[MY_TMP2]] to <2 x i32> +; IR-NEXT: br i1 [[MY_TMP14]], label [[BB16:%.*]], label [[BB20:%.*]] +; IR: bb16: +; IR-NEXT: [[MY_TMP17:%.*]] = extractelement <2 x i32> [[MY_TMP15]], i64 1 +; IR-NEXT: [[MY_TMP18:%.*]] = getelementptr inbounds i32, i32 addrspace(3)* undef, i32 [[MY_TMP17]] +; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[MY_TMP18]] +; IR-NEXT: br label [[BB20]] +; IR: bb20: +; IR-NEXT: [[MY_TMP21]] = phi i32 [ [[MY_TMP19]], [[BB16]] ], [ 0, [[BB13]] ] +; IR-NEXT: [[MY_TMP22]] = phi i1 [ false, [[BB16]] ], [ [[MY_TMP14]], [[BB13]] ] +; IR-NEXT: br label [[BB9]] +; IR: bb23: +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP3]]) +; IR-NEXT: ret void +; bb: - %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1 - %tmp1 = getelementptr inbounds i64, i64 addrspace(3)* %arg, i32 %tmp - %tmp2 = load volatile i64, i64 addrspace(3)* %tmp1 + %my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1 + %my.tmp1 = getelementptr inbounds i64, i64 addrspace(3)* %arg, i32 %my.tmp + %my.tmp2 = load volatile i64, i64 addrspace(3)* %my.tmp1 br label %bb5 bb3: ; preds = %bb9 @@ -83,9 +100,9 @@ bb4: ; preds = %bb3 br label %bb10 bb5: ; preds = %bb10, %bb - %tmp6 = phi i32 [ 0, %bb ], [ %tmp11, %bb10 ] - %tmp7 = icmp eq i32 %tmp6, 1 - br i1 %tmp7, label %bb8, label %bb10 + %my.tmp6 = phi i32 [ 0, %bb ], [ %my.tmp11, %bb10 ] + %my.tmp7 = icmp eq i32 %my.tmp6, 1 + br i1 %my.tmp7, label %bb8, label %bb10 bb8: ; preds = %bb5 br label %bb13 @@ -94,24 +111,24 @@ bb9: ; preds = %bb20, %bb9 br i1 false, label %bb3, label %bb9 bb10: ; preds = %bb5, %bb4 - %tmp11 = phi i32 [ %tmp21, %bb4 ], [ undef, %bb5 ] - %tmp12 = phi i1 [ %tmp22, %bb4 ], [ true, %bb5 ] - br i1 %tmp12, label %bb23, label %bb5 + %my.tmp11 = phi i32 [ %my.tmp21, %bb4 ], [ undef, %bb5 ] + %my.tmp12 = phi i1 [ %my.tmp22, %bb4 ], [ true, %bb5 ] + br i1 %my.tmp12, label %bb23, label %bb5 bb13: ; preds = %bb8, %bb3 - %tmp14 = phi i1 [ %tmp22, %bb3 ], [ true, %bb8 ] - %tmp15 = bitcast i64 %tmp2 to <2 x i32> - br i1 %tmp14, label %bb16, label %bb20 + %my.tmp14 = phi i1 [ %my.tmp22, %bb3 ], [ true, %bb8 ] + %my.tmp15 = bitcast i64 %my.tmp2 to <2 x i32> + br i1 %my.tmp14, label %bb16, label %bb20 bb16: ; preds = %bb13 - %tmp17 = extractelement <2 x i32> %tmp15, i64 1 - %tmp18 = getelementptr inbounds i32, i32 addrspace(3)* undef, i32 %tmp17 - %tmp19 = load volatile i32, i32 addrspace(3)* %tmp18 + %my.tmp17 = extractelement <2 x i32> %my.tmp15, i64 1 + %my.tmp18 = getelementptr inbounds i32, i32 addrspace(3)* undef, i32 %my.tmp17 + %my.tmp19 = load volatile i32, i32 addrspace(3)* %my.tmp18 br label %bb20 bb20: ; preds = %bb16, %bb13 - %tmp21 = phi i32 [ %tmp19, %bb16 ], [ 0, %bb13 ] - %tmp22 = phi i1 [ false, %bb16 ], [ %tmp14, %bb13 ] + %my.tmp21 = phi i32 [ %my.tmp19, %bb16 ], [ 0, %bb13 ] + %my.tmp22 = phi i1 [ false, %bb16 ], [ %my.tmp14, %bb13 ] br label %bb9 bb23: ; preds = %bb10 @@ -119,97 +136,146 @@ bb23: ; preds = %bb10 } ; Earlier version of above, before a run of the structurizer. -; IR-LABEL: @nested_loop_conditions( - -; IR: Flow3: -; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %21) -; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %14) -; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0 -; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1 -; IR-NEXT: br i1 %1, label %bb4.bb13_crit_edge, label %Flow4 - -; IR: Flow4: -; IR-NEXT: %3 = phi i1 [ true, %bb4.bb13_crit_edge ], [ false, %Flow3 ] -; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %2) -; IR-NEXT: br label %Flow - -; IR: Flow: -; IR-NEXT: %4 = phi i1 [ %3, %Flow4 ], [ true, %bb ] -; IR-NEXT: %5 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %4) -; IR-NEXT: %6 = extractvalue { i1, i64 } %5, 0 -; IR-NEXT: %7 = extractvalue { i1, i64 } %5, 1 -; IR-NEXT: br i1 %6, label %bb13, label %bb31 - -; IR: bb14: -; IR: %tmp15 = icmp eq i32 %tmp1037, 1 -; IR-NEXT: %8 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %tmp15) - -; IR: Flow1: -; IR-NEXT: %11 = phi <4 x i32> [ %tmp9, %bb21 ], [ undef, %bb14 ] -; IR-NEXT: %12 = phi i32 [ %tmp10, %bb21 ], [ undef, %bb14 ] -; IR-NEXT: %13 = phi i1 [ %18, %bb21 ], [ true, %bb14 ] -; IR-NEXT: %14 = phi i1 [ %18, %bb21 ], [ false, %bb14 ] -; IR-NEXT: %15 = phi i1 [ false, %bb21 ], [ true, %bb14 ] -; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %10) -; IR-NEXT: %16 = call i64 @llvm.amdgcn.if.break.i64(i1 %13, i64 %phi.broken) -; IR-NEXT: %17 = call i1 @llvm.amdgcn.loop.i64(i64 %16) -; IR-NEXT: br i1 %17, label %Flow2, label %bb14 - -; IR: bb21: -; IR: %tmp12 = icmp slt i32 %tmp11, 9 -; IR-NEXT: %18 = xor i1 %tmp12, true -; IR-NEXT: br label %Flow1 - -; IR: Flow2: -; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %16) -; IR-NEXT: %19 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %15) -; IR-NEXT: %20 = extractvalue { i1, i64 } %19, 0 -; IR-NEXT: %21 = extractvalue { i1, i64 } %19, 1 -; IR-NEXT: br i1 %20, label %bb31.loopexit, label %Flow3 -; IR: bb31: -; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %7) -; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef -; IR-NEXT: ret void - - -; GCN-LABEL: {{^}}nested_loop_conditions: - -; GCN: v_cmp_lt_i32_e32 vcc, 8, v -; GCN: s_and_b64 vcc, exec, vcc -; GCN: s_cbranch_vccnz [[BB31:BB[0-9]+_[0-9]+]] - -; GCN: [[BB14:BB[0-9]+_[0-9]+]]: ; %bb14 -; GCN: v_cmp_ne_u32_e32 vcc, 1, v -; GCN-NEXT: s_and_b64 vcc, exec, vcc -; GCN-NEXT: s_cbranch_vccnz [[BB31]] - -; GCN: [[BB18:BB[0-9]+_[0-9]+]]: ; %bb18 -; GCN: buffer_load_dword -; GCN: v_cmp_lt_i32_e32 vcc, 8, v -; GCN-NEXT: s_and_b64 vcc, exec, vcc -; GCN-NEXT: s_cbranch_vccnz [[BB18]] - -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: v_cmp_gt_i32_e32 vcc, 9 -; GCN-NEXT: s_and_b64 vcc, exec, vcc -; GCN-NEXT: s_cbranch_vccnz [[BB14]] - -; GCN: [[BB31]]: -; GCN: buffer_store_dword -; GCN: s_endpgm define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %arg) #0 { +; GCN-LABEL: nested_loop_conditions: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 8, v0 +; GCN-NEXT: s_and_b64 vcc, exec, vcc +; GCN-NEXT: s_cbranch_vccnz BB1_5 +; GCN-NEXT: ; %bb.1: ; %bb14.lr.ph +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; GCN-NEXT: BB1_2: ; %bb14 +; GCN-NEXT: ; =>This Loop Header: Depth=1 +; GCN-NEXT: ; Child Loop BB1_3 Depth 2 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 +; GCN-NEXT: s_and_b64 vcc, exec, vcc +; GCN-NEXT: s_cbranch_vccnz BB1_5 +; GCN-NEXT: BB1_3: ; %bb18 +; GCN-NEXT: ; Parent Loop BB1_2 Depth=1 +; GCN-NEXT: ; => This Inner Loop Header: Depth=2 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 8, v0 +; GCN-NEXT: s_and_b64 vcc, exec, vcc +; GCN-NEXT: s_cbranch_vccnz BB1_3 +; GCN-NEXT: ; %bb.4: ; %bb21 +; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 9, v1 +; GCN-NEXT: s_and_b64 vcc, exec, vcc +; GCN-NEXT: s_cbranch_vccnz BB1_2 +; GCN-NEXT: BB1_5: ; %bb31 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm +; IR-LABEL: @nested_loop_conditions( +; IR-NEXT: bb: +; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #4 +; IR-NEXT: [[MY_TMP1:%.*]] = zext i32 [[MY_TMP]] to i64 +; IR-NEXT: [[MY_TMP2:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[ARG:%.*]], i64 [[MY_TMP1]] +; IR-NEXT: [[MY_TMP3:%.*]] = load i64, i64 addrspace(1)* [[MY_TMP2]], align 16 +; IR-NEXT: [[MY_TMP932:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* undef, align 16 +; IR-NEXT: [[MY_TMP1033:%.*]] = extractelement <4 x i32> [[MY_TMP932]], i64 0 +; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, i32 addrspace(1)* undef +; IR-NEXT: [[MY_TMP1235:%.*]] = icmp slt i32 [[MY_TMP1134]], 9 +; IR-NEXT: br i1 [[MY_TMP1235]], label [[BB14_LR_PH:%.*]], label [[FLOW:%.*]] +; IR: bb14.lr.ph: +; IR-NEXT: br label [[BB14:%.*]] +; IR: Flow3: +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP21:%.*]]) +; IR-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP14:%.*]]) +; IR-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 +; IR-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 +; IR-NEXT: br i1 [[TMP1]], label [[BB4_BB13_CRIT_EDGE:%.*]], label [[FLOW4:%.*]] +; IR: bb4.bb13_crit_edge: +; IR-NEXT: br label [[FLOW4]] +; IR: Flow4: +; IR-NEXT: [[TMP3:%.*]] = phi i1 [ true, [[BB4_BB13_CRIT_EDGE]] ], [ false, [[FLOW3:%.*]] ] +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) +; IR-NEXT: br label [[FLOW]] +; IR: bb13: +; IR-NEXT: br label [[BB31:%.*]] +; IR: Flow: +; IR-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP3]], [[FLOW4]] ], [ true, [[BB:%.*]] ] +; IR-NEXT: [[TMP5:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP4]]) +; IR-NEXT: [[TMP6:%.*]] = extractvalue { i1, i64 } [[TMP5]], 0 +; IR-NEXT: [[TMP7:%.*]] = extractvalue { i1, i64 } [[TMP5]], 1 +; IR-NEXT: br i1 [[TMP6]], label [[BB13:%.*]], label [[BB31]] +; IR: bb14: +; IR-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP16:%.*]], [[FLOW1:%.*]] ], [ 0, [[BB14_LR_PH]] ] +; IR-NEXT: [[MY_TMP1037:%.*]] = phi i32 [ [[MY_TMP1033]], [[BB14_LR_PH]] ], [ [[TMP12:%.*]], [[FLOW1]] ] +; IR-NEXT: [[MY_TMP936:%.*]] = phi <4 x i32> [ [[MY_TMP932]], [[BB14_LR_PH]] ], [ [[TMP11:%.*]], [[FLOW1]] ] +; IR-NEXT: [[MY_TMP15:%.*]] = icmp eq i32 [[MY_TMP1037]], 1 +; IR-NEXT: [[TMP8:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[MY_TMP15]]) +; IR-NEXT: [[TMP9:%.*]] = extractvalue { i1, i64 } [[TMP8]], 0 +; IR-NEXT: [[TMP10:%.*]] = extractvalue { i1, i64 } [[TMP8]], 1 +; IR-NEXT: br i1 [[TMP9]], label [[BB16:%.*]], label [[FLOW1]] +; IR: bb16: +; IR-NEXT: [[MY_TMP17:%.*]] = bitcast i64 [[MY_TMP3]] to <2 x i32> +; IR-NEXT: br label [[BB18:%.*]] +; IR: Flow1: +; IR-NEXT: [[TMP11]] = phi <4 x i32> [ [[MY_TMP9:%.*]], [[BB21:%.*]] ], [ undef, [[BB14]] ] +; IR-NEXT: [[TMP12]] = phi i32 [ [[MY_TMP10:%.*]], [[BB21]] ], [ undef, [[BB14]] ] +; IR-NEXT: [[TMP13:%.*]] = phi i1 [ [[TMP18:%.*]], [[BB21]] ], [ true, [[BB14]] ] +; IR-NEXT: [[TMP14]] = phi i1 [ [[TMP18]], [[BB21]] ], [ false, [[BB14]] ] +; IR-NEXT: [[TMP15:%.*]] = phi i1 [ false, [[BB21]] ], [ true, [[BB14]] ] +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP10]]) +; IR-NEXT: [[TMP16]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP13]], i64 [[PHI_BROKEN]]) +; IR-NEXT: [[TMP17:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP16]]) +; IR-NEXT: br i1 [[TMP17]], label [[FLOW2:%.*]], label [[BB14]] +; IR: bb18: +; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(1)* undef +; IR-NEXT: [[MY_TMP20:%.*]] = icmp slt i32 [[MY_TMP19]], 9 +; IR-NEXT: br i1 [[MY_TMP20]], label [[BB21]], label [[BB18]] +; IR: bb21: +; IR-NEXT: [[MY_TMP22:%.*]] = extractelement <2 x i32> [[MY_TMP17]], i64 1 +; IR-NEXT: [[MY_TMP23:%.*]] = lshr i32 [[MY_TMP22]], 16 +; IR-NEXT: [[MY_TMP24:%.*]] = select i1 undef, i32 undef, i32 [[MY_TMP23]] +; IR-NEXT: [[MY_TMP25:%.*]] = uitofp i32 [[MY_TMP24]] to float +; IR-NEXT: [[MY_TMP26:%.*]] = fmul float [[MY_TMP25]], 0x3EF0001000000000 +; IR-NEXT: [[MY_TMP27:%.*]] = fsub float [[MY_TMP26]], undef +; IR-NEXT: [[MY_TMP28:%.*]] = fcmp olt float [[MY_TMP27]], 5.000000e-01 +; IR-NEXT: [[MY_TMP29:%.*]] = select i1 [[MY_TMP28]], i64 1, i64 2 +; IR-NEXT: [[MY_TMP30:%.*]] = extractelement <4 x i32> [[MY_TMP936]], i64 [[MY_TMP29]] +; IR-NEXT: [[MY_TMP7:%.*]] = zext i32 [[MY_TMP30]] to i64 +; IR-NEXT: [[MY_TMP8:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 [[MY_TMP7]] +; IR-NEXT: [[MY_TMP9]] = load <4 x i32>, <4 x i32> addrspace(1)* [[MY_TMP8]], align 16 +; IR-NEXT: [[MY_TMP10]] = extractelement <4 x i32> [[MY_TMP9]], i64 0 +; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef +; IR-NEXT: [[MY_TMP12:%.*]] = icmp slt i32 [[MY_TMP11]], 9 +; IR-NEXT: [[TMP18]] = xor i1 [[MY_TMP12]], true +; IR-NEXT: br label [[FLOW1]] +; IR: Flow2: +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP16]]) +; IR-NEXT: [[TMP19:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]]) +; IR-NEXT: [[TMP20:%.*]] = extractvalue { i1, i64 } [[TMP19]], 0 +; IR-NEXT: [[TMP21]] = extractvalue { i1, i64 } [[TMP19]], 1 +; IR-NEXT: br i1 [[TMP20]], label [[BB31_LOOPEXIT:%.*]], label [[FLOW3]] +; IR: bb31.loopexit: +; IR-NEXT: br label [[FLOW3]] +; IR: bb31: +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]]) +; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef +; IR-NEXT: ret void +; bb: - %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1 - %tmp1 = zext i32 %tmp to i64 - %tmp2 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i64 %tmp1 - %tmp3 = load i64, i64 addrspace(1)* %tmp2, align 16 - %tmp932 = load <4 x i32>, <4 x i32> addrspace(1)* undef, align 16 - %tmp1033 = extractelement <4 x i32> %tmp932, i64 0 - %tmp1134 = load volatile i32, i32 addrspace(1)* undef - %tmp1235 = icmp slt i32 %tmp1134, 9 - br i1 %tmp1235, label %bb14.lr.ph, label %bb13 + %my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1 + %my.tmp1 = zext i32 %my.tmp to i64 + %my.tmp2 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i64 %my.tmp1 + %my.tmp3 = load i64, i64 addrspace(1)* %my.tmp2, align 16 + %my.tmp932 = load <4 x i32>, <4 x i32> addrspace(1)* undef, align 16 + %my.tmp1033 = extractelement <4 x i32> %my.tmp932, i64 0 + %my.tmp1134 = load volatile i32, i32 addrspace(1)* undef + %my.tmp1235 = icmp slt i32 %my.tmp1134, 9 + br i1 %my.tmp1235, label %bb14.lr.ph, label %bb13 bb14.lr.ph: ; preds = %bb br label %bb14 @@ -221,37 +287,37 @@ bb13: ; preds = %bb4.bb13_crit_edge, br label %bb31 bb14: ; preds = %bb21, %bb14.lr.ph - %tmp1037 = phi i32 [ %tmp1033, %bb14.lr.ph ], [ %tmp10, %bb21 ] - %tmp936 = phi <4 x i32> [ %tmp932, %bb14.lr.ph ], [ %tmp9, %bb21 ] - %tmp15 = icmp eq i32 %tmp1037, 1 - br i1 %tmp15, label %bb16, label %bb31.loopexit + %my.tmp1037 = phi i32 [ %my.tmp1033, %bb14.lr.ph ], [ %my.tmp10, %bb21 ] + %my.tmp936 = phi <4 x i32> [ %my.tmp932, %bb14.lr.ph ], [ %my.tmp9, %bb21 ] + %my.tmp15 = icmp eq i32 %my.tmp1037, 1 + br i1 %my.tmp15, label %bb16, label %bb31.loopexit bb16: ; preds = %bb14 - %tmp17 = bitcast i64 %tmp3 to <2 x i32> + %my.tmp17 = bitcast i64 %my.tmp3 to <2 x i32> br label %bb18 bb18: ; preds = %bb18, %bb16 - %tmp19 = load volatile i32, i32 addrspace(1)* undef - %tmp20 = icmp slt i32 %tmp19, 9 - br i1 %tmp20, label %bb21, label %bb18 + %my.tmp19 = load volatile i32, i32 addrspace(1)* undef + %my.tmp20 = icmp slt i32 %my.tmp19, 9 + br i1 %my.tmp20, label %bb21, label %bb18 bb21: ; preds = %bb18 - %tmp22 = extractelement <2 x i32> %tmp17, i64 1 - %tmp23 = lshr i32 %tmp22, 16 - %tmp24 = select i1 undef, i32 undef, i32 %tmp23 - %tmp25 = uitofp i32 %tmp24 to float - %tmp26 = fmul float %tmp25, 0x3EF0001000000000 - %tmp27 = fsub float %tmp26, undef - %tmp28 = fcmp olt float %tmp27, 5.000000e-01 - %tmp29 = select i1 %tmp28, i64 1, i64 2 - %tmp30 = extractelement <4 x i32> %tmp936, i64 %tmp29 - %tmp7 = zext i32 %tmp30 to i64 - %tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 %tmp7 - %tmp9 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp8, align 16 - %tmp10 = extractelement <4 x i32> %tmp9, i64 0 - %tmp11 = load volatile i32, i32 addrspace(1)* undef - %tmp12 = icmp slt i32 %tmp11, 9 - br i1 %tmp12, label %bb14, label %bb4.bb13_crit_edge + %my.tmp22 = extractelement <2 x i32> %my.tmp17, i64 1 + %my.tmp23 = lshr i32 %my.tmp22, 16 + %my.tmp24 = select i1 undef, i32 undef, i32 %my.tmp23 + %my.tmp25 = uitofp i32 %my.tmp24 to float + %my.tmp26 = fmul float %my.tmp25, 0x3EF0001000000000 + %my.tmp27 = fsub float %my.tmp26, undef + %my.tmp28 = fcmp olt float %my.tmp27, 5.000000e-01 + %my.tmp29 = select i1 %my.tmp28, i64 1, i64 2 + %my.tmp30 = extractelement <4 x i32> %my.tmp936, i64 %my.tmp29 + %my.tmp7 = zext i32 %my.tmp30 to i64 + %my.tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 %my.tmp7 + %my.tmp9 = load <4 x i32>, <4 x i32> addrspace(1)* %my.tmp8, align 16 + %my.tmp10 = extractelement <4 x i32> %my.tmp9, i64 0 + %my.tmp11 = load volatile i32, i32 addrspace(1)* undef + %my.tmp12 = icmp slt i32 %my.tmp11, 9 + br i1 %my.tmp12, label %bb14, label %bb4.bb13_crit_edge bb31.loopexit: ; preds = %bb14 br label %bb31 diff --git a/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir b/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir index b034cae9926082..c77d6e0eb1be12 100644 --- a/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir +++ b/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir @@ -5,7 +5,7 @@ # CHECK-LABEL: bb.0: # CHECK: renamable $sgpr1 = COPY renamable $sgpr2 # CHECK-LABEL: bb.1: -# CHECK: liveins: $sgpr0_sgpr1:0x00000003 +# CHECK: liveins: $sgpr0_sgpr1:0x0000000000000003 # CHECK: renamable $vgpr1_vgpr2 = COPY renamable $sgpr0_sgpr1 --- diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll index 23bb18e738f54b..faf6ca4cbcb288 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -1,16 +1,55 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s - -; FUNC-LABEL: {{^}}break_inserted_outside_of_loop: - -; SI: [[LOOP_LABEL:[A-Z0-9]+]]: -; Lowered break instructin: -; SI: s_or_b64 -; Lowered Loop instruction: -; SI: s_andn2_b64 -; s_cbranch_execnz [[LOOP_LABEL]] -; SI: s_endpgm +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=FLAT %s + define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) { +; SI-LABEL: break_inserted_outside_of_loop: +; SI: ; %bb.0: ; %main_body +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_load_dword s0, s[0:1], 0xb +; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_and_b32_e32 v0, s0, v0 +; SI-NEXT: v_and_b32_e32 v0, 1, v0 +; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SI-NEXT: s_mov_b64 s[0:1], 0 +; SI-NEXT: BB0_1: ; %ENDIF +; SI-NEXT: ; =>This Inner Loop Header: Depth=1 +; SI-NEXT: s_and_b64 s[2:3], exec, vcc +; SI-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; SI-NEXT: s_andn2_b64 exec, exec, s[0:1] +; SI-NEXT: s_cbranch_execnz BB0_1 +; SI-NEXT: ; %bb.2: ; %ENDLOOP +; SI-NEXT: s_or_b64 exec, exec, s[0:1] +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: v_mov_b32_e32 v0, 0 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; FLAT-LABEL: break_inserted_outside_of_loop: +; FLAT: ; %bb.0: ; %main_body +; FLAT-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 +; FLAT-NEXT: s_load_dword s0, s[0:1], 0x2c +; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 +; FLAT-NEXT: s_waitcnt lgkmcnt(0) +; FLAT-NEXT: v_and_b32_e32 v0, s0, v0 +; FLAT-NEXT: v_and_b32_e32 v0, 1, v0 +; FLAT-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; FLAT-NEXT: s_mov_b64 s[0:1], 0 +; FLAT-NEXT: BB0_1: ; %ENDIF +; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 +; FLAT-NEXT: s_and_b64 s[2:3], exec, vcc +; FLAT-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; FLAT-NEXT: s_andn2_b64 exec, exec, s[0:1] +; FLAT-NEXT: s_cbranch_execnz BB0_1 +; FLAT-NEXT: ; %bb.2: ; %ENDLOOP +; FLAT-NEXT: s_or_b64 exec, exec, s[0:1] +; FLAT-NEXT: s_mov_b32 s7, 0xf000 +; FLAT-NEXT: s_mov_b32 s6, -1 +; FLAT-NEXT: v_mov_b32_e32 v0, 0 +; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; FLAT-NEXT: s_endpgm main_body: %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %0 = and i32 %a, %tid @@ -25,25 +64,54 @@ ENDIF: br i1 %1, label %ENDLOOP, label %ENDIF } - -; FUNC-LABEL: {{^}}phi_cond_outside_loop: - -; SI: s_mov_b64 [[LEFT:s\[[0-9]+:[0-9]+\]]], 0 -; SI: s_mov_b64 [[PHI:s\[[0-9]+:[0-9]+\]]], 0 - -; SI: ; %else -; SI: v_cmp_eq_u32_e64 [[TMP:s\[[0-9]+:[0-9]+\]]], - -; SI: ; %endif - -; SI: [[LOOP_LABEL:BB[0-9]+_[0-9]+]]: ; %loop -; SI: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[PHI]] -; SI: s_or_b64 [[LEFT]], [[TMP1]], [[LEFT]] -; SI: s_andn2_b64 exec, exec, [[LEFT]] -; SI: s_cbranch_execnz [[LOOP_LABEL]] -; SI: s_endpgm - define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) { +; SI-LABEL: phi_cond_outside_loop: +; SI: ; %bb.0: ; %entry +; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; SI-NEXT: s_mov_b64 s[2:3], 0 +; SI-NEXT: s_mov_b64 s[4:5], 0 +; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc +; SI-NEXT: s_cbranch_execz BB1_2 +; SI-NEXT: ; %bb.1: ; %else +; SI-NEXT: s_load_dword s0, s[0:1], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, 0 +; SI-NEXT: s_and_b64 s[4:5], s[0:1], exec +; SI-NEXT: BB1_2: ; %endif +; SI-NEXT: s_or_b64 exec, exec, s[6:7] +; SI-NEXT: BB1_3: ; %loop +; SI-NEXT: ; =>This Inner Loop Header: Depth=1 +; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5] +; SI-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] +; SI-NEXT: s_andn2_b64 exec, exec, s[2:3] +; SI-NEXT: s_cbranch_execnz BB1_3 +; SI-NEXT: ; %bb.4: ; %exit +; SI-NEXT: s_endpgm +; +; FLAT-LABEL: phi_cond_outside_loop: +; FLAT: ; %bb.0: ; %entry +; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 +; FLAT-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; FLAT-NEXT: s_mov_b64 s[2:3], 0 +; FLAT-NEXT: s_mov_b64 s[4:5], 0 +; FLAT-NEXT: s_and_saveexec_b64 s[6:7], vcc +; FLAT-NEXT: s_cbranch_execz BB1_2 +; FLAT-NEXT: ; %bb.1: ; %else +; FLAT-NEXT: s_load_dword s0, s[0:1], 0x24 +; FLAT-NEXT: s_waitcnt lgkmcnt(0) +; FLAT-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, 0 +; FLAT-NEXT: s_and_b64 s[4:5], s[0:1], exec +; FLAT-NEXT: BB1_2: ; %endif +; FLAT-NEXT: s_or_b64 exec, exec, s[6:7] +; FLAT-NEXT: BB1_3: ; %loop +; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 +; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5] +; FLAT-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] +; FLAT-NEXT: s_andn2_b64 exec, exec, s[2:3] +; FLAT-NEXT: s_cbranch_execnz BB1_3 +; FLAT-NEXT: ; %bb.4: ; %exit +; FLAT-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %0 = icmp eq i32 %tid , 0 @@ -67,11 +135,12 @@ exit: ret void } -; FIXME: should emit s_endpgm -; CHECK-LABEL: {{^}}switch_unreachable: -; CHECK-NOT: s_endpgm -; CHECK: .Lfunc_end2 define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind { +; SI-LABEL: switch_unreachable: +; SI: ; %bb.0: ; %centry +; +; FLAT-LABEL: switch_unreachable: +; FLAT: ; %bb.0: ; %centry centry: switch i32 %x, label %sw.default [ i32 0, label %sw.bb @@ -90,29 +159,99 @@ sw.epilog: declare float @llvm.fabs.f32(float) nounwind readnone -; This broke the old AMDIL cfg structurizer -; FUNC-LABEL: {{^}}loop_land_info_assert: -; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}} -; SI: s_and_b64 [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]] - -; SI: [[WHILELOOP:BB[0-9]+_[0-9]+]]: ; %while.cond -; SI: s_cbranch_vccz [[FOR_COND_PH:BB[0-9]+_[0-9]+]] - -; SI: [[CONVEX_EXIT:BB[0-9_]+]] -; SI: s_mov_b64 vcc, -; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]] - -; SI: s_cbranch_vccnz [[WHILELOOP]] - -; SI: ; %if.else -; SI: buffer_store_dword - -; SI: [[FOR_COND_PH]]: ; %for.cond.preheader -; SI: s_cbranch_vccz [[ENDPGM]] - -; SI: [[ENDPGM]]: -; SI-NEXT: s_endpgm define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind { +; SI-LABEL: loop_land_info_assert: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 +; SI-NEXT: s_load_dword s4, s[0:1], 0xc +; SI-NEXT: s_brev_b32 s5, 44 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_cmp_gt_i32_e64 s[0:1], s2, 0 +; SI-NEXT: v_cmp_lt_i32_e64 s[2:3], s3, 4 +; SI-NEXT: s_or_b64 s[8:9], s[0:1], s[2:3] +; SI-NEXT: s_and_b64 s[0:1], exec, s[2:3] +; SI-NEXT: s_and_b64 s[2:3], exec, s[8:9] +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cmp_lt_f32_e64 s[8:9], |v0|, s5 +; SI-NEXT: v_mov_b32_e32 v0, 3 +; SI-NEXT: BB3_1: ; %while.cond +; SI-NEXT: ; =>This Inner Loop Header: Depth=1 +; SI-NEXT: s_mov_b64 vcc, s[0:1] +; SI-NEXT: s_cbranch_vccz BB3_5 +; SI-NEXT: ; %bb.2: ; %convex.exit +; SI-NEXT: ; in Loop: Header=BB3_1 Depth=1 +; SI-NEXT: s_mov_b64 vcc, s[2:3] +; SI-NEXT: s_cbranch_vccnz BB3_8 +; SI-NEXT: ; %bb.3: ; %if.end +; SI-NEXT: ; in Loop: Header=BB3_1 Depth=1 +; SI-NEXT: s_andn2_b64 vcc, exec, s[8:9] +; SI-NEXT: s_cbranch_vccnz BB3_1 +; SI-NEXT: ; %bb.4: ; %if.else +; SI-NEXT: ; in Loop: Header=BB3_1 Depth=1 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_branch BB3_1 +; SI-NEXT: BB3_5: ; %for.cond.preheader +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, 0x3e8 +; SI-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 +; SI-NEXT: s_and_b64 vcc, exec, vcc +; SI-NEXT: s_cbranch_vccz BB3_8 +; SI-NEXT: ; %bb.6: ; %for.body +; SI-NEXT: s_and_b64 vcc, exec, -1 +; SI-NEXT: BB3_7: ; %self.loop +; SI-NEXT: ; =>This Inner Loop Header: Depth=1 +; SI-NEXT: s_cbranch_vccnz BB3_7 +; SI-NEXT: BB3_8: ; %DummyReturnBlock +; SI-NEXT: s_endpgm +; +; FLAT-LABEL: loop_land_info_assert: +; FLAT: ; %bb.0: ; %entry +; FLAT-NEXT: s_mov_b32 s7, 0xf000 +; FLAT-NEXT: s_mov_b32 s6, -1 +; FLAT-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; FLAT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; FLAT-NEXT: s_load_dword s4, s[0:1], 0x30 +; FLAT-NEXT: s_brev_b32 s5, 44 +; FLAT-NEXT: s_waitcnt lgkmcnt(0) +; FLAT-NEXT: v_cmp_gt_i32_e64 s[0:1], s2, 0 +; FLAT-NEXT: v_cmp_lt_i32_e64 s[2:3], s3, 4 +; FLAT-NEXT: s_or_b64 s[8:9], s[0:1], s[2:3] +; FLAT-NEXT: s_and_b64 s[0:1], exec, s[2:3] +; FLAT-NEXT: s_and_b64 s[2:3], exec, s[8:9] +; FLAT-NEXT: s_waitcnt vmcnt(0) +; FLAT-NEXT: v_cmp_lt_f32_e64 s[8:9], |v0|, s5 +; FLAT-NEXT: v_mov_b32_e32 v0, 3 +; FLAT-NEXT: BB3_1: ; %while.cond +; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 +; FLAT-NEXT: s_mov_b64 vcc, s[0:1] +; FLAT-NEXT: s_cbranch_vccz BB3_5 +; FLAT-NEXT: ; %bb.2: ; %convex.exit +; FLAT-NEXT: ; in Loop: Header=BB3_1 Depth=1 +; FLAT-NEXT: s_mov_b64 vcc, s[2:3] +; FLAT-NEXT: s_cbranch_vccnz BB3_8 +; FLAT-NEXT: ; %bb.3: ; %if.end +; FLAT-NEXT: ; in Loop: Header=BB3_1 Depth=1 +; FLAT-NEXT: s_andn2_b64 vcc, exec, s[8:9] +; FLAT-NEXT: s_cbranch_vccnz BB3_1 +; FLAT-NEXT: ; %bb.4: ; %if.else +; FLAT-NEXT: ; in Loop: Header=BB3_1 Depth=1 +; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; FLAT-NEXT: s_branch BB3_1 +; FLAT-NEXT: BB3_5: ; %for.cond.preheader +; FLAT-NEXT: v_mov_b32_e32 v0, 0x3e8 +; FLAT-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 +; FLAT-NEXT: s_and_b64 vcc, exec, vcc +; FLAT-NEXT: s_cbranch_vccz BB3_8 +; FLAT-NEXT: ; %bb.6: ; %for.body +; FLAT-NEXT: s_and_b64 vcc, exec, -1 +; FLAT-NEXT: BB3_7: ; %self.loop +; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 +; FLAT-NEXT: s_cbranch_vccnz BB3_7 +; FLAT-NEXT: BB3_8: ; %DummyReturnBlock +; FLAT-NEXT: s_endpgm entry: %cmp = icmp sgt i32 %c0, 0 br label %while.cond.outer diff --git a/llvm/test/CodeGen/AMDGPU/switch-unreachable.ll b/llvm/test/CodeGen/AMDGPU/switch-unreachable.ll new file mode 100644 index 00000000000000..11d71f7fe2efaa --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/switch-unreachable.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s + +; This testcase was discovered in si-annotate-cf.ll, where none of the +; RUN lines was actually exercising it. See that files git log for its +; history. + +; FIXME: should emit s_endpgm +; CHECK-LABEL: {{^}}switch_unreachable: +; CHECK-NOT: s_endpgm +; CHECK: .Lfunc_end +define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind { +centry: + switch i32 %x, label %sw.default [ + i32 0, label %sw.bb + i32 60, label %sw.bb + ] + +sw.bb: + unreachable + +sw.default: + unreachable + +sw.epilog: + ret void +} diff --git a/llvm/test/CodeGen/AVR/PR37143.ll b/llvm/test/CodeGen/AVR/PR37143.ll index 72f4a2fd3722c3..c7cabd3cd0875d 100644 --- a/llvm/test/CodeGen/AVR/PR37143.ll +++ b/llvm/test/CodeGen/AVR/PR37143.ll @@ -1,4 +1,4 @@ -; RUN: llc -mattr=avr6,sram < %s -march=avr | FileCheck %s +; RUN: llc -mattr=avr6,sram < %s -march=avr -verify-machineinstrs | FileCheck %s ; CHECK: ld {{r[0-9]+}}, [[PTR:[XYZ]]] ; CHECK: ldd {{r[0-9]+}}, [[PTR]]+1 diff --git a/llvm/test/CodeGen/AVR/brind.ll b/llvm/test/CodeGen/AVR/brind.ll index ec8262e84a952d..4eea966062db79 100644 --- a/llvm/test/CodeGen/AVR/brind.ll +++ b/llvm/test/CodeGen/AVR/brind.ll @@ -1,4 +1,4 @@ -; RUN: llc -mattr=sram,eijmpcall < %s -march=avr | FileCheck %s +; RUN: llc -mattr=sram,eijmpcall < %s -march=avr -verify-machineinstrs | FileCheck %s @brind.k = private unnamed_addr constant [2 x i8*] [i8* blockaddress(@brind, %return), i8* blockaddress(@brind, %b)], align 1 diff --git a/llvm/test/CodeGen/AVR/load.ll b/llvm/test/CodeGen/AVR/load.ll index dbadacfd5e0de1..53748b3b100b92 100644 --- a/llvm/test/CodeGen/AVR/load.ll +++ b/llvm/test/CodeGen/AVR/load.ll @@ -1,4 +1,4 @@ -; RUN: llc -mattr=avr6,sram < %s -march=avr | FileCheck %s +; RUN: llc -mattr=avr6,sram < %s -march=avr -verify-machineinstrs | FileCheck %s define i8 @load8(i8* %x) { ; CHECK-LABEL: load8: diff --git a/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr.mir b/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr.mir index 5bd4bf2d431c8b..2343d0df49274a 100644 --- a/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr.mir +++ b/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr.mir @@ -17,7 +17,7 @@ body: | ; CHECK-LABEL: test_ldwrdptr - ; CHECK: $r0, $r31r30 = LDRdPtr + ; CHECK: $r0 = LDRdPtr $r31r30 ; CHECK-NEXT: $r1 = LDDRdPtrQ $r31r30, 1 $r1r0 = LDWRdPtr $r31r30 diff --git a/llvm/test/CodeGen/Hexagon/addrmode-align.ll b/llvm/test/CodeGen/Hexagon/addrmode-align.ll index 1a4df00d47cbdc..f39019a0b40ef5 100644 --- a/llvm/test/CodeGen/Hexagon/addrmode-align.ll +++ b/llvm/test/CodeGen/Hexagon/addrmode-align.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s ; CHECK: [[REG0:(r[0-9]+)]] = add(r29 -; CHECK: [[REG1:(r[0-9]+)]] = add([[REG0]],#4) +; CHECK: [[REG1:(r[0-9]+)]] = add([[REG0]],#8) ; CHECK-DAG: memd([[REG1]]+#8) = ; CHECK-DAG: memd([[REG1]]+#0) = diff --git a/llvm/test/CodeGen/Hexagon/lsr-postinc-nested-loop.ll b/llvm/test/CodeGen/Hexagon/lsr-postinc-nested-loop.ll new file mode 100644 index 00000000000000..8fbf913a22cbb0 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/lsr-postinc-nested-loop.ll @@ -0,0 +1,50 @@ +; RUN: llc -O3 -march=hexagon < %s | FileCheck %s +; Test to ensure LSR does not optimize out addrec of the outerloop. +; This will help to generate post-increment instructions, otherwise +; it end up an as extra reg+reg add inside the loop. +; CHECK: loop0(.LBB0_[[LOOP:.]], +; CHECK: .LBB0_[[LOOP]]: +; CHECK: memuh{{.*}}++ +; CHECK: endloop + + +define dso_local signext i16 @foo(i16* nocapture readonly %filt, i16* nocapture readonly %inp, i32 %c1, i32 %c2) local_unnamed_addr { +entry: + %cmp28 = icmp sgt i32 %c1, 0 + %cmp221 = icmp sgt i32 %c2, 0 + %or.cond = and i1 %cmp28, %cmp221 + br i1 %or.cond, label %for.cond1.preheader.us, label %for.cond.cleanup + +for.cond1.preheader.us: ; preds = %entry, %for.cond1.for.cond.cleanup3_crit_edge.us + %filt.addr.032.us = phi i16* [ %scevgep, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %filt, %entry ] + %inp.addr.031.us = phi i16* [ %scevgep35, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %inp, %entry ] + %l.030.us = phi i32 [ %inc11.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %entry ] + %sum0.029.us = phi i16 [ %add8.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %entry ] + %scevgep = getelementptr i16, i16* %filt.addr.032.us, i32 %c2 + br label %for.body4.us + +for.body4.us: ; preds = %for.body4.us, %for.cond1.preheader.us + %z.025.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body4.us ] + %filt.addr.124.us = phi i16* [ %filt.addr.032.us, %for.cond1.preheader.us ], [ %incdec.ptr.us, %for.body4.us ] + %inp.addr.123.us = phi i16* [ %inp.addr.031.us, %for.cond1.preheader.us ], [ %incdec.ptr5.us, %for.body4.us ] + %sum0.122.us = phi i16 [ %sum0.029.us, %for.cond1.preheader.us ], [ %add8.us, %for.body4.us ] + %incdec.ptr.us = getelementptr inbounds i16, i16* %filt.addr.124.us, i32 1 + %0 = load i16, i16* %filt.addr.124.us, align 2 + %incdec.ptr5.us = getelementptr inbounds i16, i16* %inp.addr.123.us, i32 1 + %1 = load i16, i16* %inp.addr.123.us, align 2 + %add.us = add i16 %0, %sum0.122.us + %add8.us = add i16 %add.us, %1 + %inc.us = add nuw nsw i32 %z.025.us, 1 + %exitcond = icmp eq i32 %inc.us, %c2 + br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us + +for.cond1.for.cond.cleanup3_crit_edge.us: ; preds = %for.body4.us + %scevgep35 = getelementptr i16, i16* %inp.addr.031.us, i32 %c2 + %inc11.us = add nuw nsw i32 %l.030.us, 1 + %exitcond36 = icmp eq i32 %inc11.us, %c1 + br i1 %exitcond36, label %for.cond.cleanup, label %for.cond1.preheader.us + +for.cond.cleanup: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %entry + %sum0.0.lcssa = phi i16 [ 0, %entry ], [ %add8.us, %for.cond1.for.cond.cleanup3_crit_edge.us ] + ret i16 %sum0.0.lcssa +} diff --git a/llvm/test/CodeGen/Hexagon/verify-liveness-at-def.mir b/llvm/test/CodeGen/Hexagon/verify-liveness-at-def.mir index fefe245140990b..d57325e5b27db8 100644 --- a/llvm/test/CodeGen/Hexagon/verify-liveness-at-def.mir +++ b/llvm/test/CodeGen/Hexagon/verify-liveness-at-def.mir @@ -40,21 +40,21 @@ body: | # CHECK-SUB: Bad machine code: Live range continues after dead def flag # CHECK_SUB-NEXT: function: test_fail # CHECK-SUB: v. register: %0 -# CHECK-SUB: lanemask: 00000002 +# CHECK-SUB: lanemask: 0000000000000002 # # CHECK-SUB-NOT: Bad machine code # # CHECK-SUB: Bad machine code: Live range continues after dead def flag # CHECK-SUB-NEXT: function: test_fail # CHECK-SUB: v. register: %1 -# CHECK-SUB: lanemask: 00000002 +# CHECK-SUB: lanemask: 0000000000000002 # # CHECK-SUB-NOT: Bad machine code # # CHECK-SUB: Bad machine code: Live range continues after dead def flag # CHECK-SUB-NEXT: function: test_fail # CHECK-SUB: v. register: %1 -# CHECK-SUB: lanemask: 00000001 +# CHECK-SUB: lanemask: 0000000000000001 # # CHECK-SUB: Bad machine code: Live range continues after dead def flag # CHECK-SUB-NEXT: function: test_fail diff --git a/llvm/test/CodeGen/MIR/Hexagon/parse-lane-masks.mir b/llvm/test/CodeGen/MIR/Hexagon/parse-lane-masks.mir index 1b6dc3b4c41bfa..915c354b5a0ff7 100644 --- a/llvm/test/CodeGen/MIR/Hexagon/parse-lane-masks.mir +++ b/llvm/test/CodeGen/MIR/Hexagon/parse-lane-masks.mir @@ -3,7 +3,7 @@ # CHECK-LABEL: name: foo # CHECK: bb.0: -# CHECK: liveins: $d0:0x00000002, $d1, $d2:0x00000010 +# CHECK: liveins: $d0:0x0000000000000002, $d1, $d2:0x0000000000000010 --- | define void @foo() { diff --git a/llvm/test/CodeGen/SystemZ/codegenprepare-form-OF-ops.ll b/llvm/test/CodeGen/SystemZ/codegenprepare-form-OF-ops.ll new file mode 100644 index 00000000000000..161f4bc2b7658d --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/codegenprepare-form-OF-ops.ll @@ -0,0 +1,54 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 | FileCheck %s +; +; Check that CodeGenPrepare transforms these functions to use +; uadd.with.overflow / usub.with.overflow intrinsics so that the compare +; instruction is eliminated. + +define i32 @uaddo_32(i32 %arg) { +; CHECK-LABEL: uaddo_32: +; CHECK: alhsik %r0, %r2, -1 +; CHECK: locrnle %r2, %r0 +; CHECK: br %r14 + +bb: + %tmp10 = icmp ne i32 %arg, 0 + %tmp11 = add nsw i32 %arg, -1 + %tmp12 = select i1 %tmp10, i32 %tmp11, i32 %arg + ret i32 %tmp12 +} + +define i64 @uaddo_64(i64 %arg) { +; CHECK-LABEL: uaddo_64: +; CHECK: alghsik %r0, %r2, -1 +; CHECK: locgrnle %r2, %r0 +; CHECK: br %r14 +bb: + %tmp10 = icmp ne i64 %arg, 0 + %tmp11 = add nsw i64 %arg, -1 + %tmp12 = select i1 %tmp10, i64 %tmp11, i64 %arg + ret i64 %tmp12 +} + +define i32 @usubo_32(i32 %arg) { +; CHECK-LABEL: usubo_32: +; CHECK: alhsik %r0, %r2, -1 +; CHECK: locrle %r2, %r0 +; CHECK: br %r14 +bb: + %tmp10 = icmp eq i32 %arg, 0 + %tmp11 = sub nsw i32 %arg, 1 + %tmp12 = select i1 %tmp10, i32 %tmp11, i32 %arg + ret i32 %tmp12 +} + +define i64 @usubo_64(i64 %arg) { +; CHECK-LABEL: usubo_64: +; CHECK: alghsik %r0, %r2, -1 +; CHECK: locgrle %r2, %r0 +; CHECK: br %r14 +bb: + %tmp10 = icmp eq i64 %arg, 0 + %tmp11 = sub nsw i64 %arg, 1 + %tmp12 = select i1 %tmp10, i64 %tmp11, i64 %arg + ret i64 %tmp12 +} diff --git a/llvm/test/CodeGen/SystemZ/dag-combine-05.ll b/llvm/test/CodeGen/SystemZ/dag-combine-05.ll index 78b129fc2f731c..eb9fcc29692108 100644 --- a/llvm/test/CodeGen/SystemZ/dag-combine-05.ll +++ b/llvm/test/CodeGen/SystemZ/dag-combine-05.ll @@ -26,10 +26,13 @@ bb: %tmp = icmp ult i16 %arg0, 9616 %tmp1 = zext i1 %tmp to i32 %tmp2 = load i16, i16* %src - %tmp3 = add i16 %tmp2, -1 - %tmp4 = icmp ne i16 %tmp2, 0 - %tmp5 = zext i1 %tmp4 to i32 + %0 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %tmp2, i16 -1) + %math = extractvalue { i16, i1 } %0, 0 + %ov = extractvalue { i16, i1 } %0, 1 + %tmp5 = zext i1 %ov to i32 %tmp6 = add nuw nsw i32 %tmp5, %tmp1 store i32 %tmp6, i32* %dst ret void } + +declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) #1 diff --git a/llvm/test/CodeGen/SystemZ/frame-25.ll b/llvm/test/CodeGen/SystemZ/frame-25.ll new file mode 100644 index 00000000000000..64c175bd4ecaa2 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/frame-25.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; +; Test that space is allocated for the incoming back chain also in cases +; where no GPRs are saved / restored. + +define void @fun0() #0 { +; CHECK-LABEL: fun0: +; CHECK: lgr %r1, %r15 +; CHECK-NEXT: aghi %r15, -24 +; CHECK-NEXT: stg %r1, 152(%r15) +; CHECK-NEXT: #APP +; CHECK-NEXT: stcke 160(%r15) +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: aghi %r15, 24 +; CHECK-NEXT: br %r14 + +entry: + %b = alloca [16 x i8], align 1 + %0 = getelementptr inbounds [16 x i8], [16 x i8]* %b, i64 0, i64 0 + call void asm "stcke $0", "=*Q"([16 x i8]* nonnull %b) #2 + ret void +} + +attributes #0 = { nounwind "packed-stack" "backchain" "use-soft-float"="true" } diff --git a/llvm/test/CodeGen/Thumb/remove-unneeded-push-pop.ll b/llvm/test/CodeGen/Thumb/remove-unneeded-push-pop.ll deleted file mode 100644 index 054be2ea858721..00000000000000 --- a/llvm/test/CodeGen/Thumb/remove-unneeded-push-pop.ll +++ /dev/null @@ -1,1052 +0,0 @@ -; RUN: llc -O0 -mtriple thumbv6m-arm-none-eabi < %s | FileCheck %s - -@a = external hidden global i32*, align 4 -@f = external hidden global i32, align 4 - -define hidden void @foo() { -entry: -; CHECK-NOT: push {lr} -; CHECK-NOT: pop {pc} - store i32 24654, i32* @f, align 4 - br label %if.end - -if.end: ; preds = %entry - %0 = load i32*, i32** @a, align 4 - %arrayidx1 = getelementptr inbounds i32, i32* %0, i32 2 - %1 = load i32, i32* %arrayidx1, align 4 - %tobool2 = icmp ne i32 %1, 0 - br i1 %tobool2, label %if.then3, label %if.end4 - -if.then3: ; preds = %if.end - store i32 17785, i32* @f, align 4 - br label %if.end4 - -if.end4: ; preds = %if.then3, %if.end - %2 = load i32*, i32** @a, align 4 - %arrayidx5 = getelementptr inbounds i32, i32* %2, i32 3 - %3 = load i32, i32* %arrayidx5, align 4 - %tobool6 = icmp ne i32 %3, 0 - br i1 %tobool6, label %if.then7, label %if.end8 - -if.then7: ; preds = %if.end4 - store i32 10342, i32* @f, align 4 - br label %if.end8 - -if.end8: ; preds = %if.then7, %if.end4 - %4 = load i32*, i32** @a, align 4 - %arrayidx9 = getelementptr inbounds i32, i32* %4, i32 4 - %5 = load i32, i32* %arrayidx9, align 4 - %tobool10 = icmp ne i32 %5, 0 - br i1 %tobool10, label %if.then11, label %if.end12 - -if.then11: ; preds = %if.end8 - store i32 29082, i32* @f, align 4 - br label %if.end12 - -if.end12: ; preds = %if.then11, %if.end8 - %6 = load i32*, i32** @a, align 4 - %arrayidx13 = getelementptr inbounds i32, i32* %6, i32 5 - %7 = load i32, i32* %arrayidx13, align 4 - %tobool14 = icmp ne i32 %7, 0 - br i1 %tobool14, label %if.then15, label %if.end16 - -if.then15: ; preds = %if.end12 - store i32 29893, i32* @f, align 4 - br label %if.end16 - -if.end16: ; preds = %if.then15, %if.end12 - %8 = load i32*, i32** @a, align 4 - %arrayidx17 = getelementptr inbounds i32, i32* %8, i32 6 - %9 = load i32, i32* %arrayidx17, align 4 - %tobool18 = icmp ne i32 %9, 0 - br i1 %tobool18, label %if.then19, label %if.end20 - -if.then19: ; preds = %if.end16 - store i32 19071, i32* @f, align 4 - br label %if.end20 - -if.end20: ; preds = %if.then19, %if.end16 - %10 = load i32*, i32** @a, align 4 - %arrayidx21 = getelementptr inbounds i32, i32* %10, i32 7 - %11 = load i32, i32* %arrayidx21, align 4 - %tobool22 = icmp ne i32 %11, 0 - br i1 %tobool22, label %if.then23, label %if.end24 - -if.then23: ; preds = %if.end20 - store i32 6154, i32* @f, align 4 - br label %if.end24 - -if.end24: ; preds = %if.then23, %if.end20 - %12 = load i32*, i32** @a, align 4 - %arrayidx25 = getelementptr inbounds i32, i32* %12, i32 8 - %13 = load i32, i32* %arrayidx25, align 4 - %tobool26 = icmp ne i32 %13, 0 - br i1 %tobool26, label %if.then27, label %if.end28 - -if.then27: ; preds = %if.end24 - store i32 30498, i32* @f, align 4 - br label %if.end28 - -if.end28: ; preds = %if.then27, %if.end24 - %14 = load i32*, i32** @a, align 4 - %arrayidx29 = getelementptr inbounds i32, i32* %14, i32 9 - %15 = load i32, i32* %arrayidx29, align 4 - %tobool30 = icmp ne i32 %15, 0 - br i1 %tobool30, label %if.then31, label %if.end32 - -if.then31: ; preds = %if.end28 - store i32 16667, i32* @f, align 4 - br label %if.end32 - -if.end32: ; preds = %if.then31, %if.end28 - %16 = load i32*, i32** @a, align 4 - %arrayidx33 = getelementptr inbounds i32, i32* %16, i32 10 - %17 = load i32, i32* %arrayidx33, align 4 - %tobool34 = icmp ne i32 %17, 0 - br i1 %tobool34, label %if.then35, label %if.end36 - -if.then35: ; preds = %if.end32 - store i32 195, i32* @f, align 4 - br label %if.end36 - -if.end36: ; preds = %if.then35, %if.end32 - %18 = load i32*, i32** @a, align 4 - %arrayidx37 = getelementptr inbounds i32, i32* %18, i32 11 - %19 = load i32, i32* %arrayidx37, align 4 - %tobool38 = icmp ne i32 %19, 0 - br i1 %tobool38, label %if.then39, label %if.end40 - -if.then39: ; preds = %if.end36 - store i32 14665, i32* @f, align 4 - br label %if.end40 - -if.end40: ; preds = %if.then39, %if.end36 - %20 = load i32*, i32** @a, align 4 - %arrayidx41 = getelementptr inbounds i32, i32* %20, i32 12 - %21 = load i32, i32* %arrayidx41, align 4 - %tobool42 = icmp ne i32 %21, 0 - br i1 %tobool42, label %if.then43, label %if.end44 - -if.then43: ; preds = %if.end40 - store i32 19305, i32* @f, align 4 - br label %if.end44 - -if.end44: ; preds = %if.then43, %if.end40 - %22 = load i32*, i32** @a, align 4 - %arrayidx45 = getelementptr inbounds i32, i32* %22, i32 13 - %23 = load i32, i32* %arrayidx45, align 4 - %tobool46 = icmp ne i32 %23, 0 - br i1 %tobool46, label %if.then47, label %if.end48 - -if.then47: ; preds = %if.end44 - store i32 15133, i32* @f, align 4 - br label %if.end48 - -if.end48: ; preds = %if.then47, %if.end44 - %24 = load i32*, i32** @a, align 4 - %arrayidx49 = getelementptr inbounds i32, i32* %24, i32 14 - %25 = load i32, i32* %arrayidx49, align 4 - %tobool50 = icmp ne i32 %25, 0 - br i1 %tobool50, label %if.then51, label %if.end52 - -if.then51: ; preds = %if.end48 - store i32 19173, i32* @f, align 4 - br label %if.end52 - -if.end52: ; preds = %if.then51, %if.end48 - br label %if.then55 - -if.then55: ; preds = %if.end52 - store i32 14025, i32* @f, align 4 - br label %if.end56 - -if.end56: ; preds = %if.then55 - %26 = load i32*, i32** @a, align 4 - %arrayidx57 = getelementptr inbounds i32, i32* %26, i32 16 - %27 = load i32, i32* %arrayidx57, align 4 - %tobool58 = icmp ne i32 %27, 0 - br i1 %tobool58, label %if.then59, label %if.end60 - -if.then59: ; preds = %if.end56 - store i32 8209, i32* @f, align 4 - br label %if.end60 - -if.end60: ; preds = %if.then59, %if.end56 - %28 = load i32*, i32** @a, align 4 - %arrayidx61 = getelementptr inbounds i32, i32* %28, i32 17 - %29 = load i32, i32* %arrayidx61, align 4 - %tobool62 = icmp ne i32 %29, 0 - br i1 %tobool62, label %if.then63, label %if.end64 - -if.then63: ; preds = %if.end60 - store i32 29621, i32* @f, align 4 - br label %if.end64 - -if.end64: ; preds = %if.then63, %if.end60 - %30 = load i32*, i32** @a, align 4 - %arrayidx65 = getelementptr inbounds i32, i32* %30, i32 18 - %31 = load i32, i32* %arrayidx65, align 4 - %tobool66 = icmp ne i32 %31, 0 - br i1 %tobool66, label %if.then67, label %if.end68 - -if.then67: ; preds = %if.end64 - store i32 14963, i32* @f, align 4 - br label %if.end68 - -if.end68: ; preds = %if.then67, %if.end64 - %32 = load i32*, i32** @a, align 4 - %arrayidx69 = getelementptr inbounds i32, i32* %32, i32 19 - %33 = load i32, i32* %arrayidx69, align 4 - %tobool70 = icmp ne i32 %33, 0 - br i1 %tobool70, label %if.then71, label %if.end72 - -if.then71: ; preds = %if.end68 - store i32 32282, i32* @f, align 4 - br label %if.end72 - -if.end72: ; preds = %if.then71, %if.end68 - %34 = load i32*, i32** @a, align 4 - %arrayidx73 = getelementptr inbounds i32, i32* %34, i32 20 - %35 = load i32, i32* %arrayidx73, align 4 - %tobool74 = icmp ne i32 %35, 0 - br i1 %tobool74, label %if.then75, label %if.end76 - -if.then75: ; preds = %if.end72 - store i32 3072, i32* @f, align 4 - br label %if.end76 - -if.end76: ; preds = %if.then75, %if.end72 - %36 = load i32*, i32** @a, align 4 - %arrayidx77 = getelementptr inbounds i32, i32* %36, i32 21 - %37 = load i32, i32* %arrayidx77, align 4 - %tobool78 = icmp ne i32 %37, 0 - br i1 %tobool78, label %if.then79, label %if.end80 - -if.then79: ; preds = %if.end76 - store i32 1992, i32* @f, align 4 - br label %if.end80 - -if.end80: ; preds = %if.then79, %if.end76 - %38 = load i32*, i32** @a, align 4 - %arrayidx81 = getelementptr inbounds i32, i32* %38, i32 22 - %39 = load i32, i32* %arrayidx81, align 4 - %tobool82 = icmp ne i32 %39, 0 - br i1 %tobool82, label %if.then83, label %if.end84 - -if.then83: ; preds = %if.end80 - store i32 9614, i32* @f, align 4 - br label %if.end84 - -if.end84: ; preds = %if.then83, %if.end80 - %40 = load i32*, i32** @a, align 4 - %arrayidx85 = getelementptr inbounds i32, i32* %40, i32 23 - %41 = load i32, i32* %arrayidx85, align 4 - %tobool86 = icmp ne i32 %41, 0 - br i1 %tobool86, label %if.then87, label %if.end88 - -if.then87: ; preds = %if.end84 - store i32 25931, i32* @f, align 4 - br label %if.end88 - -if.end88: ; preds = %if.then87, %if.end84 - %42 = load i32*, i32** @a, align 4 - %arrayidx89 = getelementptr inbounds i32, i32* %42, i32 24 - %43 = load i32, i32* %arrayidx89, align 4 - %tobool90 = icmp ne i32 %43, 0 - br i1 %tobool90, label %if.then91, label %if.end92 - -if.then91: ; preds = %if.end88 - store i32 22035, i32* @f, align 4 - br label %if.end92 - -if.end92: ; preds = %if.then91, %if.end88 - %44 = load i32*, i32** @a, align 4 - %arrayidx93 = getelementptr inbounds i32, i32* %44, i32 25 - %45 = load i32, i32* %arrayidx93, align 4 - %tobool94 = icmp ne i32 %45, 0 - br i1 %tobool94, label %if.then95, label %if.end96 - -if.then95: ; preds = %if.end92 - store i32 10712, i32* @f, align 4 - br label %if.end96 - -if.end96: ; preds = %if.then95, %if.end92 - %46 = load i32*, i32** @a, align 4 - %arrayidx97 = getelementptr inbounds i32, i32* %46, i32 26 - %47 = load i32, i32* %arrayidx97, align 4 - %tobool98 = icmp ne i32 %47, 0 - br i1 %tobool98, label %if.then99, label %if.end100 - -if.then99: ; preds = %if.end96 - store i32 18267, i32* @f, align 4 - br label %if.end100 - -if.end100: ; preds = %if.then99, %if.end96 - %48 = load i32*, i32** @a, align 4 - %arrayidx101 = getelementptr inbounds i32, i32* %48, i32 27 - %49 = load i32, i32* %arrayidx101, align 4 - %tobool102 = icmp ne i32 %49, 0 - br i1 %tobool102, label %if.then103, label %if.end104 - -if.then103: ; preds = %if.end100 - store i32 30432, i32* @f, align 4 - br label %if.end104 - -if.end104: ; preds = %if.then103, %if.end100 - %50 = load i32*, i32** @a, align 4 - %arrayidx105 = getelementptr inbounds i32, i32* %50, i32 28 - %51 = load i32, i32* %arrayidx105, align 4 - %tobool106 = icmp ne i32 %51, 0 - br i1 %tobool106, label %if.then107, label %if.end108 - -if.then107: ; preds = %if.end104 - store i32 5847, i32* @f, align 4 - br label %if.end108 - -if.end108: ; preds = %if.then107, %if.end104 - %52 = load i32*, i32** @a, align 4 - %arrayidx109 = getelementptr inbounds i32, i32* %52, i32 29 - %53 = load i32, i32* %arrayidx109, align 4 - %tobool110 = icmp ne i32 %53, 0 - br i1 %tobool110, label %if.then111, label %if.end112 - -if.then111: ; preds = %if.end108 - store i32 14705, i32* @f, align 4 - br label %if.end112 - -if.end112: ; preds = %if.then111, %if.end108 - %54 = load i32*, i32** @a, align 4 - %arrayidx113 = getelementptr inbounds i32, i32* %54, i32 30 - %55 = load i32, i32* %arrayidx113, align 4 - %tobool114 = icmp ne i32 %55, 0 - br i1 %tobool114, label %if.then115, label %if.end116 - -if.then115: ; preds = %if.end112 - store i32 28488, i32* @f, align 4 - br label %if.end116 - -if.end116: ; preds = %if.then115, %if.end112 - %56 = load i32*, i32** @a, align 4 - %arrayidx117 = getelementptr inbounds i32, i32* %56, i32 31 - %57 = load i32, i32* %arrayidx117, align 4 - %tobool118 = icmp ne i32 %57, 0 - br i1 %tobool118, label %if.then119, label %if.end120 - -if.then119: ; preds = %if.end116 - store i32 13853, i32* @f, align 4 - br label %if.end120 - -if.end120: ; preds = %if.then119, %if.end116 - %58 = load i32*, i32** @a, align 4 - %arrayidx121 = getelementptr inbounds i32, i32* %58, i32 32 - %59 = load i32, i32* %arrayidx121, align 4 - %tobool122 = icmp ne i32 %59, 0 - br i1 %tobool122, label %if.then123, label %if.end124 - -if.then123: ; preds = %if.end120 - store i32 31379, i32* @f, align 4 - br label %if.end124 - -if.end124: ; preds = %if.then123, %if.end120 - %60 = load i32*, i32** @a, align 4 - %arrayidx125 = getelementptr inbounds i32, i32* %60, i32 33 - %61 = load i32, i32* %arrayidx125, align 4 - %tobool126 = icmp ne i32 %61, 0 - br i1 %tobool126, label %if.then127, label %if.end128 - -if.then127: ; preds = %if.end124 - store i32 7010, i32* @f, align 4 - br label %if.end128 - -if.end128: ; preds = %if.then127, %if.end124 - br label %if.then131 - -if.then131: ; preds = %if.end128 - store i32 31840, i32* @f, align 4 - br label %if.end132 - -if.end132: ; preds = %if.then131 - %62 = load i32*, i32** @a, align 4 - %arrayidx133 = getelementptr inbounds i32, i32* %62, i32 35 - %63 = load i32, i32* %arrayidx133, align 4 - %tobool134 = icmp ne i32 %63, 0 - br i1 %tobool134, label %if.then135, label %if.end136 - -if.then135: ; preds = %if.end132 - store i32 16119, i32* @f, align 4 - br label %if.end136 - -if.end136: ; preds = %if.then135, %if.end132 - %64 = load i32*, i32** @a, align 4 - %arrayidx137 = getelementptr inbounds i32, i32* %64, i32 36 - %65 = load i32, i32* %arrayidx137, align 4 - %tobool138 = icmp ne i32 %65, 0 - br i1 %tobool138, label %if.then139, label %if.end140 - -if.then139: ; preds = %if.end136 - store i32 7119, i32* @f, align 4 - br label %if.end140 - -if.end140: ; preds = %if.then139, %if.end136 - %66 = load i32*, i32** @a, align 4 - %arrayidx141 = getelementptr inbounds i32, i32* %66, i32 37 - %67 = load i32, i32* %arrayidx141, align 4 - %tobool142 = icmp ne i32 %67, 0 - br i1 %tobool142, label %if.then143, label %if.end144 - -if.then143: ; preds = %if.end140 - store i32 3333, i32* @f, align 4 - br label %if.end144 - -if.end144: ; preds = %if.then143, %if.end140 - %68 = load i32*, i32** @a, align 4 - %arrayidx145 = getelementptr inbounds i32, i32* %68, i32 38 - %69 = load i32, i32* %arrayidx145, align 4 - %tobool146 = icmp ne i32 %69, 0 - br i1 %tobool146, label %if.then147, label %if.end148 - -if.then147: ; preds = %if.end144 - store i32 6430, i32* @f, align 4 - br label %if.end148 - -if.end148: ; preds = %if.then147, %if.end144 - %70 = load i32*, i32** @a, align 4 - %arrayidx149 = getelementptr inbounds i32, i32* %70, i32 39 - %71 = load i32, i32* %arrayidx149, align 4 - %tobool150 = icmp ne i32 %71, 0 - br i1 %tobool150, label %if.then151, label %if.end152 - -if.then151: ; preds = %if.end148 - store i32 19857, i32* @f, align 4 - br label %if.end152 - -if.end152: ; preds = %if.then151, %if.end148 - %72 = load i32*, i32** @a, align 4 - %arrayidx153 = getelementptr inbounds i32, i32* %72, i32 40 - %73 = load i32, i32* %arrayidx153, align 4 - %tobool154 = icmp ne i32 %73, 0 - br i1 %tobool154, label %if.then155, label %if.end156 - -if.then155: ; preds = %if.end152 - store i32 13237, i32* @f, align 4 - br label %if.end156 - -if.end156: ; preds = %if.then155, %if.end152 - br label %if.then159 - -if.then159: ; preds = %if.end156 - store i32 163, i32* @f, align 4 - br label %if.end160 - -if.end160: ; preds = %if.then159 - %74 = load i32*, i32** @a, align 4 - %arrayidx161 = getelementptr inbounds i32, i32* %74, i32 42 - %75 = load i32, i32* %arrayidx161, align 4 - %tobool162 = icmp ne i32 %75, 0 - br i1 %tobool162, label %if.then163, label %if.end164 - -if.then163: ; preds = %if.end160 - store i32 1961, i32* @f, align 4 - br label %if.end164 - -if.end164: ; preds = %if.then163, %if.end160 - %76 = load i32*, i32** @a, align 4 - %arrayidx165 = getelementptr inbounds i32, i32* %76, i32 43 - %77 = load i32, i32* %arrayidx165, align 4 - %tobool166 = icmp ne i32 %77, 0 - br i1 %tobool166, label %if.then167, label %if.end168 - -if.then167: ; preds = %if.end164 - store i32 11325, i32* @f, align 4 - br label %if.end168 - -if.end168: ; preds = %if.then167, %if.end164 - %78 = load i32*, i32** @a, align 4 - %arrayidx169 = getelementptr inbounds i32, i32* %78, i32 44 - %79 = load i32, i32* %arrayidx169, align 4 - %tobool170 = icmp ne i32 %79, 0 - br i1 %tobool170, label %if.then171, label %if.end172 - -if.then171: ; preds = %if.end168 - store i32 12189, i32* @f, align 4 - br label %if.end172 - -if.end172: ; preds = %if.then171, %if.end168 - %80 = load i32*, i32** @a, align 4 - %arrayidx173 = getelementptr inbounds i32, i32* %80, i32 45 - %81 = load i32, i32* %arrayidx173, align 4 - %tobool174 = icmp ne i32 %81, 0 - br i1 %tobool174, label %if.then175, label %if.end176 - -if.then175: ; preds = %if.end172 - store i32 15172, i32* @f, align 4 - br label %if.end176 - -if.end176: ; preds = %if.then175, %if.end172 - br label %if.then179 - -if.then179: ; preds = %if.end176 - store i32 13491, i32* @f, align 4 - br label %if.end180 - -if.end180: ; preds = %if.then179 - %82 = load i32*, i32** @a, align 4 - %arrayidx181 = getelementptr inbounds i32, i32* %82, i32 47 - %83 = load i32, i32* %arrayidx181, align 4 - %tobool182 = icmp ne i32 %83, 0 - br i1 %tobool182, label %if.then183, label %if.end184 - -if.then183: ; preds = %if.end180 - store i32 9521, i32* @f, align 4 - br label %if.end184 - -if.end184: ; preds = %if.then183, %if.end180 - %84 = load i32*, i32** @a, align 4 - %arrayidx185 = getelementptr inbounds i32, i32* %84, i32 48 - %85 = load i32, i32* %arrayidx185, align 4 - %tobool186 = icmp ne i32 %85, 0 - br i1 %tobool186, label %if.then187, label %if.end188 - -if.then187: ; preds = %if.end184 - store i32 448, i32* @f, align 4 - br label %if.end188 - -if.end188: ; preds = %if.then187, %if.end184 - %86 = load i32*, i32** @a, align 4 - %arrayidx189 = getelementptr inbounds i32, i32* %86, i32 49 - %87 = load i32, i32* %arrayidx189, align 4 - %tobool190 = icmp ne i32 %87, 0 - br i1 %tobool190, label %if.then191, label %if.end192 - -if.then191: ; preds = %if.end188 - store i32 13468, i32* @f, align 4 - br label %if.end192 - -if.end192: ; preds = %if.then191, %if.end188 - %88 = load i32*, i32** @a, align 4 - %arrayidx193 = getelementptr inbounds i32, i32* %88, i32 50 - %89 = load i32, i32* %arrayidx193, align 4 - %tobool194 = icmp ne i32 %89, 0 - br i1 %tobool194, label %if.then195, label %if.end196 - -if.then195: ; preds = %if.end192 - store i32 16190, i32* @f, align 4 - br label %if.end196 - -if.end196: ; preds = %if.then195, %if.end192 - %90 = load i32*, i32** @a, align 4 - %arrayidx197 = getelementptr inbounds i32, i32* %90, i32 51 - %91 = load i32, i32* %arrayidx197, align 4 - %tobool198 = icmp ne i32 %91, 0 - br i1 %tobool198, label %if.then199, label %if.end200 - -if.then199: ; preds = %if.end196 - store i32 8602, i32* @f, align 4 - br label %if.end200 - -if.end200: ; preds = %if.then199, %if.end196 - %92 = load i32*, i32** @a, align 4 - %arrayidx201 = getelementptr inbounds i32, i32* %92, i32 52 - %93 = load i32, i32* %arrayidx201, align 4 - %tobool202 = icmp ne i32 %93, 0 - br i1 %tobool202, label %if.then203, label %if.end204 - -if.then203: ; preds = %if.end200 - store i32 21083, i32* @f, align 4 - br label %if.end204 - -if.end204: ; preds = %if.then203, %if.end200 - %94 = load i32*, i32** @a, align 4 - %arrayidx205 = getelementptr inbounds i32, i32* %94, i32 53 - %95 = load i32, i32* %arrayidx205, align 4 - %tobool206 = icmp ne i32 %95, 0 - br i1 %tobool206, label %if.then207, label %if.end208 - -if.then207: ; preds = %if.end204 - store i32 5172, i32* @f, align 4 - br label %if.end208 - -if.end208: ; preds = %if.then207, %if.end204 - %96 = load i32*, i32** @a, align 4 - %arrayidx209 = getelementptr inbounds i32, i32* %96, i32 54 - %97 = load i32, i32* %arrayidx209, align 4 - %tobool210 = icmp ne i32 %97, 0 - br i1 %tobool210, label %if.then211, label %if.end212 - -if.then211: ; preds = %if.end208 - store i32 32505, i32* @f, align 4 - br label %if.end212 - -if.end212: ; preds = %if.then211, %if.end208 - br label %if.then215 - -if.then215: ; preds = %if.end212 - store i32 23490, i32* @f, align 4 - br label %if.end216 - -if.end216: ; preds = %if.then215 - %98 = load i32*, i32** @a, align 4 - %arrayidx217 = getelementptr inbounds i32, i32* %98, i32 56 - %99 = load i32, i32* %arrayidx217, align 4 - %tobool218 = icmp ne i32 %99, 0 - br i1 %tobool218, label %if.then219, label %if.end220 - -if.then219: ; preds = %if.end216 - store i32 30699, i32* @f, align 4 - br label %if.end220 - -if.end220: ; preds = %if.then219, %if.end216 - %100 = load i32*, i32** @a, align 4 - %arrayidx221 = getelementptr inbounds i32, i32* %100, i32 57 - %101 = load i32, i32* %arrayidx221, align 4 - %tobool222 = icmp ne i32 %101, 0 - br i1 %tobool222, label %if.then223, label %if.end224 - -if.then223: ; preds = %if.end220 - store i32 16286, i32* @f, align 4 - br label %if.end224 - -if.end224: ; preds = %if.then223, %if.end220 - %102 = load i32*, i32** @a, align 4 - %arrayidx225 = getelementptr inbounds i32, i32* %102, i32 58 - %103 = load i32, i32* %arrayidx225, align 4 - %tobool226 = icmp ne i32 %103, 0 - br i1 %tobool226, label %if.then227, label %if.end228 - -if.then227: ; preds = %if.end224 - store i32 17939, i32* @f, align 4 - br label %if.end228 - -if.end228: ; preds = %if.then227, %if.end224 - %104 = load i32*, i32** @a, align 4 - %arrayidx229 = getelementptr inbounds i32, i32* %104, i32 59 - %105 = load i32, i32* %arrayidx229, align 4 - %tobool230 = icmp ne i32 %105, 0 - br i1 %tobool230, label %if.then231, label %if.end232 - -if.then231: ; preds = %if.end228 - store i32 25148, i32* @f, align 4 - br label %if.end232 - -if.end232: ; preds = %if.then231, %if.end228 - %106 = load i32*, i32** @a, align 4 - %arrayidx233 = getelementptr inbounds i32, i32* %106, i32 60 - %107 = load i32, i32* %arrayidx233, align 4 - %tobool234 = icmp ne i32 %107, 0 - br i1 %tobool234, label %if.then235, label %if.end236 - -if.then235: ; preds = %if.end232 - store i32 644, i32* @f, align 4 - br label %if.end236 - -if.end236: ; preds = %if.then235, %if.end232 - br label %if.then239 - -if.then239: ; preds = %if.end236 - store i32 23457, i32* @f, align 4 - br label %if.end240 - -if.end240: ; preds = %if.then239 - %108 = load i32*, i32** @a, align 4 - %arrayidx241 = getelementptr inbounds i32, i32* %108, i32 62 - %109 = load i32, i32* %arrayidx241, align 4 - %tobool242 = icmp ne i32 %109, 0 - br i1 %tobool242, label %if.then243, label %if.end244 - -if.then243: ; preds = %if.end240 - store i32 21116, i32* @f, align 4 - br label %if.end244 - -if.end244: ; preds = %if.then243, %if.end240 - br label %if.then247 - -if.then247: ; preds = %if.end244 - store i32 10066, i32* @f, align 4 - br label %if.end248 - -if.end248: ; preds = %if.then247 - %110 = load i32*, i32** @a, align 4 - %arrayidx249 = getelementptr inbounds i32, i32* %110, i32 64 - %111 = load i32, i32* %arrayidx249, align 4 - %tobool250 = icmp ne i32 %111, 0 - br i1 %tobool250, label %if.then251, label %if.end252 - -if.then251: ; preds = %if.end248 - store i32 9058, i32* @f, align 4 - br label %if.end252 - -if.end252: ; preds = %if.then251, %if.end248 - %112 = load i32*, i32** @a, align 4 - %arrayidx253 = getelementptr inbounds i32, i32* %112, i32 65 - %113 = load i32, i32* %arrayidx253, align 4 - %tobool254 = icmp ne i32 %113, 0 - br i1 %tobool254, label %if.then255, label %if.end256 - -if.then255: ; preds = %if.end252 - store i32 8383, i32* @f, align 4 - br label %if.end256 - -if.end256: ; preds = %if.then255, %if.end252 - %114 = load i32*, i32** @a, align 4 - %arrayidx257 = getelementptr inbounds i32, i32* %114, i32 66 - %115 = load i32, i32* %arrayidx257, align 4 - %tobool258 = icmp ne i32 %115, 0 - br i1 %tobool258, label %if.then259, label %if.end260 - -if.then259: ; preds = %if.end256 - store i32 31069, i32* @f, align 4 - br label %if.end260 - -if.end260: ; preds = %if.then259, %if.end256 - %116 = load i32*, i32** @a, align 4 - %arrayidx261 = getelementptr inbounds i32, i32* %116, i32 67 - %117 = load i32, i32* %arrayidx261, align 4 - %tobool262 = icmp ne i32 %117, 0 - br i1 %tobool262, label %if.then263, label %if.end264 - -if.then263: ; preds = %if.end260 - store i32 32280, i32* @f, align 4 - br label %if.end264 - -if.end264: ; preds = %if.then263, %if.end260 - br label %if.then267 - -if.then267: ; preds = %if.end264 - store i32 1553, i32* @f, align 4 - br label %if.end268 - -if.end268: ; preds = %if.then267 - %118 = load i32*, i32** @a, align 4 - %arrayidx269 = getelementptr inbounds i32, i32* %118, i32 69 - %119 = load i32, i32* %arrayidx269, align 4 - %tobool270 = icmp ne i32 %119, 0 - br i1 %tobool270, label %if.then271, label %if.end272 - -if.then271: ; preds = %if.end268 - store i32 8118, i32* @f, align 4 - br label %if.end272 - -if.end272: ; preds = %if.then271, %if.end268 - %120 = load i32*, i32** @a, align 4 - %arrayidx273 = getelementptr inbounds i32, i32* %120, i32 70 - %121 = load i32, i32* %arrayidx273, align 4 - %tobool274 = icmp ne i32 %121, 0 - br i1 %tobool274, label %if.then275, label %if.end276 - -if.then275: ; preds = %if.end272 - store i32 12959, i32* @f, align 4 - br label %if.end276 - -if.end276: ; preds = %if.then275, %if.end272 - %122 = load i32*, i32** @a, align 4 - %arrayidx277 = getelementptr inbounds i32, i32* %122, i32 71 - %123 = load i32, i32* %arrayidx277, align 4 - %tobool278 = icmp ne i32 %123, 0 - br i1 %tobool278, label %if.then279, label %if.end280 - -if.then279: ; preds = %if.end276 - store i32 675, i32* @f, align 4 - br label %if.end280 - -if.end280: ; preds = %if.then279, %if.end276 - %124 = load i32*, i32** @a, align 4 - %arrayidx281 = getelementptr inbounds i32, i32* %124, i32 72 - %125 = load i32, i32* %arrayidx281, align 4 - %tobool282 = icmp ne i32 %125, 0 - br i1 %tobool282, label %if.then283, label %if.end284 - -if.then283: ; preds = %if.end280 - store i32 29144, i32* @f, align 4 - br label %if.end284 - -if.end284: ; preds = %if.then283, %if.end280 - %126 = load i32*, i32** @a, align 4 - %arrayidx285 = getelementptr inbounds i32, i32* %126, i32 73 - %127 = load i32, i32* %arrayidx285, align 4 - %tobool286 = icmp ne i32 %127, 0 - br i1 %tobool286, label %if.then287, label %if.end288 - -if.then287: ; preds = %if.end284 - store i32 26130, i32* @f, align 4 - br label %if.end288 - -if.end288: ; preds = %if.then287, %if.end284 - %128 = load i32*, i32** @a, align 4 - %arrayidx289 = getelementptr inbounds i32, i32* %128, i32 74 - %129 = load i32, i32* %arrayidx289, align 4 - %tobool290 = icmp ne i32 %129, 0 - br i1 %tobool290, label %if.then291, label %if.end292 - -if.then291: ; preds = %if.end288 - store i32 31934, i32* @f, align 4 - br label %if.end292 - -if.end292: ; preds = %if.then291, %if.end288 - %130 = load i32*, i32** @a, align 4 - %arrayidx293 = getelementptr inbounds i32, i32* %130, i32 75 - %131 = load i32, i32* %arrayidx293, align 4 - %tobool294 = icmp ne i32 %131, 0 - br i1 %tobool294, label %if.then295, label %if.end296 - -if.then295: ; preds = %if.end292 - store i32 25862, i32* @f, align 4 - br label %if.end296 - -if.end296: ; preds = %if.then295, %if.end292 - %132 = load i32*, i32** @a, align 4 - %arrayidx297 = getelementptr inbounds i32, i32* %132, i32 76 - %133 = load i32, i32* %arrayidx297, align 4 - %tobool298 = icmp ne i32 %133, 0 - br i1 %tobool298, label %if.then299, label %if.end300 - -if.then299: ; preds = %if.end296 - store i32 10642, i32* @f, align 4 - br label %if.end300 - -if.end300: ; preds = %if.then299, %if.end296 - %134 = load i32*, i32** @a, align 4 - %arrayidx301 = getelementptr inbounds i32, i32* %134, i32 77 - %135 = load i32, i32* %arrayidx301, align 4 - %tobool302 = icmp ne i32 %135, 0 - br i1 %tobool302, label %if.then303, label %if.end304 - -if.then303: ; preds = %if.end300 - store i32 20209, i32* @f, align 4 - br label %if.end304 - -if.end304: ; preds = %if.then303, %if.end300 - %136 = load i32*, i32** @a, align 4 - %arrayidx305 = getelementptr inbounds i32, i32* %136, i32 78 - %137 = load i32, i32* %arrayidx305, align 4 - %tobool306 = icmp ne i32 %137, 0 - br i1 %tobool306, label %if.then307, label %if.end308 - -if.then307: ; preds = %if.end304 - store i32 30889, i32* @f, align 4 - br label %if.end308 - -if.end308: ; preds = %if.then307, %if.end304 - %138 = load i32*, i32** @a, align 4 - %arrayidx309 = getelementptr inbounds i32, i32* %138, i32 79 - %139 = load i32, i32* %arrayidx309, align 4 - %tobool310 = icmp ne i32 %139, 0 - br i1 %tobool310, label %if.then311, label %if.end312 - -if.then311: ; preds = %if.end308 - store i32 18688, i32* @f, align 4 - br label %if.end312 - -if.end312: ; preds = %if.then311, %if.end308 - %140 = load i32*, i32** @a, align 4 - %arrayidx313 = getelementptr inbounds i32, i32* %140, i32 80 - %141 = load i32, i32* %arrayidx313, align 4 - %tobool314 = icmp ne i32 %141, 0 - br i1 %tobool314, label %if.then315, label %if.end316 - -if.then315: ; preds = %if.end312 - store i32 28726, i32* @f, align 4 - br label %if.end316 - -if.end316: ; preds = %if.then315, %if.end312 - %142 = load i32*, i32** @a, align 4 - %arrayidx317 = getelementptr inbounds i32, i32* %142, i32 81 - %143 = load i32, i32* %arrayidx317, align 4 - %tobool318 = icmp ne i32 %143, 0 - br i1 %tobool318, label %if.then319, label %if.end320 - -if.then319: ; preds = %if.end316 - store i32 4266, i32* @f, align 4 - br label %if.end320 - -if.end320: ; preds = %if.then319, %if.end316 - %144 = load i32*, i32** @a, align 4 - %arrayidx321 = getelementptr inbounds i32, i32* %144, i32 82 - %145 = load i32, i32* %arrayidx321, align 4 - %tobool322 = icmp ne i32 %145, 0 - br i1 %tobool322, label %if.then323, label %if.end324 - -if.then323: ; preds = %if.end320 - store i32 15461, i32* @f, align 4 - br label %if.end324 - -if.end324: ; preds = %if.then323, %if.end320 - %146 = load i32*, i32** @a, align 4 - %arrayidx325 = getelementptr inbounds i32, i32* %146, i32 83 - %147 = load i32, i32* %arrayidx325, align 4 - %tobool326 = icmp ne i32 %147, 0 - br i1 %tobool326, label %if.then327, label %if.end328 - -if.then327: ; preds = %if.end324 - store i32 24716, i32* @f, align 4 - br label %if.end328 - -if.end328: ; preds = %if.then327, %if.end324 - br label %if.then331 - -if.then331: ; preds = %if.end328 - store i32 18727, i32* @f, align 4 - br label %if.end332 - -if.end332: ; preds = %if.then331 - %148 = load i32*, i32** @a, align 4 - %arrayidx333 = getelementptr inbounds i32, i32* %148, i32 85 - %149 = load i32, i32* %arrayidx333, align 4 - %tobool334 = icmp ne i32 %149, 0 - br i1 %tobool334, label %if.then335, label %if.end336 - -if.then335: ; preds = %if.end332 - store i32 29505, i32* @f, align 4 - br label %if.end336 - -if.end336: ; preds = %if.then335, %if.end332 - %150 = load i32*, i32** @a, align 4 - %arrayidx337 = getelementptr inbounds i32, i32* %150, i32 86 - %151 = load i32, i32* %arrayidx337, align 4 - %tobool338 = icmp ne i32 %151, 0 - br i1 %tobool338, label %if.then339, label %if.end340 - -if.then339: ; preds = %if.end336 - store i32 27008, i32* @f, align 4 - br label %if.end340 - -if.end340: ; preds = %if.then339, %if.end336 - %152 = load i32*, i32** @a, align 4 - %arrayidx341 = getelementptr inbounds i32, i32* %152, i32 87 - %153 = load i32, i32* %arrayidx341, align 4 - %tobool342 = icmp ne i32 %153, 0 - br i1 %tobool342, label %if.then343, label %if.end344 - -if.then343: ; preds = %if.end340 - store i32 6550, i32* @f, align 4 - br label %if.end344 - -if.end344: ; preds = %if.then343, %if.end340 - br label %if.then347 - -if.then347: ; preds = %if.end344 - store i32 1117, i32* @f, align 4 - br label %if.end348 - -if.end348: ; preds = %if.then347 - %154 = load i32*, i32** @a, align 4 - %arrayidx349 = getelementptr inbounds i32, i32* %154, i32 89 - %155 = load i32, i32* %arrayidx349, align 4 - %tobool350 = icmp ne i32 %155, 0 - br i1 %tobool350, label %if.then351, label %if.end352 - -if.then351: ; preds = %if.end348 - store i32 20118, i32* @f, align 4 - br label %if.end352 - -if.end352: ; preds = %if.then351, %if.end348 - %156 = load i32*, i32** @a, align 4 - %arrayidx353 = getelementptr inbounds i32, i32* %156, i32 90 - %157 = load i32, i32* %arrayidx353, align 4 - %tobool354 = icmp ne i32 %157, 0 - br i1 %tobool354, label %if.then355, label %if.end356 - -if.then355: ; preds = %if.end352 - store i32 13650, i32* @f, align 4 - br label %if.end356 - -if.end356: ; preds = %if.then355, %if.end352 - br label %if.then359 - -if.then359: ; preds = %if.end356 - store i32 18642, i32* @f, align 4 - br label %if.end360 - -if.end360: ; preds = %if.then359 - %158 = load i32*, i32** @a, align 4 - %arrayidx361 = getelementptr inbounds i32, i32* %158, i32 92 - %159 = load i32, i32* %arrayidx361, align 4 - %tobool362 = icmp ne i32 %159, 0 - br i1 %tobool362, label %if.then363, label %if.end364 - -if.then363: ; preds = %if.end360 - store i32 30662, i32* @f, align 4 - br label %if.end364 - -if.end364: ; preds = %if.then363, %if.end360 - %160 = load i32*, i32** @a, align 4 - %arrayidx365 = getelementptr inbounds i32, i32* %160, i32 93 - %161 = load i32, i32* %arrayidx365, align 4 - %tobool366 = icmp ne i32 %161, 0 - br i1 %tobool366, label %if.then367, label %if.end368 - -if.then367: ; preds = %if.end364 - store i32 8095, i32* @f, align 4 - br label %if.end368 - -if.end368: ; preds = %if.then367, %if.end364 - %162 = load i32*, i32** @a, align 4 - %arrayidx369 = getelementptr inbounds i32, i32* %162, i32 94 - %163 = load i32, i32* %arrayidx369, align 4 - %tobool370 = icmp ne i32 %163, 0 - br i1 %tobool370, label %if.then371, label %if.end372 - -if.then371: ; preds = %if.end368 - store i32 8442, i32* @f, align 4 - br label %if.end372 - -if.end372: ; preds = %if.then371, %if.end368 - %164 = load i32*, i32** @a, align 4 - %arrayidx373 = getelementptr inbounds i32, i32* %164, i32 95 - %165 = load i32, i32* %arrayidx373, align 4 - %tobool374 = icmp ne i32 %165, 0 - br i1 %tobool374, label %if.then375, label %if.end376 - -if.then375: ; preds = %if.end372 - store i32 8153, i32* @f, align 4 - br label %if.end376 - -if.end376: ; preds = %if.then375, %if.end372 - br label %if.then379 - -if.then379: ; preds = %if.end376 - store i32 12965, i32* @f, align 4 - br label %if.end380 - -if.end380: ; preds = %if.then379 - %166 = load i32*, i32** @a, align 4 - %arrayidx381 = getelementptr inbounds i32, i32* %166, i32 97 - %167 = load i32, i32* %arrayidx381, align 4 - %tobool382 = icmp ne i32 %167, 0 - br i1 %tobool382, label %if.then383, label %if.end384 - -if.then383: ; preds = %if.end380 - store i32 14277, i32* @f, align 4 - br label %if.end384 - -if.end384: ; preds = %if.then383, %if.end380 - br label %if.then387 - -if.then387: ; preds = %if.end384 - store i32 1997, i32* @f, align 4 - br label %if.end388 - -if.end388: ; preds = %if.then387 - %168 = load i32*, i32** @a, align 4 - %arrayidx389 = getelementptr inbounds i32, i32* %168, i32 99 - %169 = load i32, i32* %arrayidx389, align 4 - %tobool390 = icmp ne i32 %169, 0 - br i1 %tobool390, label %if.then391, label %if.end392 - -if.then391: ; preds = %if.end388 - store i32 31385, i32* @f, align 4 - br label %if.end392 - -if.end392: ; preds = %if.then391, %if.end388 - %170 = load i32*, i32** @a, align 4 - %arrayidx393 = getelementptr inbounds i32, i32* %170, i32 100 - %171 = load i32, i32* %arrayidx393, align 4 - %tobool394 = icmp ne i32 %171, 0 - br i1 %tobool394, label %if.then395, label %if.end396 - -if.then395: ; preds = %if.end392 - store i32 8286, i32* @f, align 4 - br label %if.end396 - -if.end396: ; preds = %if.then395, %if.end392 - ret void -} diff --git a/llvm/test/CodeGen/Thumb/stack-mis-alignment.ll b/llvm/test/CodeGen/Thumb/stack-mis-alignment.ll new file mode 100644 index 00000000000000..c000fb6a618eed --- /dev/null +++ b/llvm/test/CodeGen/Thumb/stack-mis-alignment.ll @@ -0,0 +1,18 @@ +; RUN: llc -O0 < %s | FileCheck %s + +; For noreturn function with StackAlignment 8 (function contains call/alloc), +; check that lr is saved to keep the stack aligned. +; CHECK: push {lr} + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv5e-none-linux-gnueabi" + +define dso_local i32 @f() noreturn nounwind { +entry: + call i32 @llvm.arm.space(i32 2048, i32 undef) + tail call i32 @exit(i32 0) + unreachable +} + +declare i32 @llvm.arm.space(i32, i32) +declare dso_local i32 @exit(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll index ebd93db9bdbe95..905b6d14bf080c 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -1778,11 +1778,11 @@ for.body: ; preds = %for.body, %for.body define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) { ; CHECK-LABEL: half_short_mac: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: cbz r2, .LBB11_3 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: subs r3, r2, #1 -; CHECK-NEXT: and r7, r2, #3 +; CHECK-NEXT: and r6, r2, #3 ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhs .LBB11_4 ; CHECK-NEXT: @ %bb.2: @@ -1799,33 +1799,33 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n ; CHECK-NEXT: vldr s0, .LCPI11_0 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: add.w lr, r3, r2, lsr #2 -; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: adds r3, r1, #4 +; CHECK-NEXT: adds r2, r0, #4 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB11_5: @ %for.body -; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r2, r1, r3 -; CHECK-NEXT: adds r6, r0, r3 -; CHECK-NEXT: vldr.16 s2, [r6, #6] +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrsh.w r4, [r3, #2] +; CHECK-NEXT: vldr.16 s2, [r2, #2] ; CHECK-NEXT: add.w r12, r12, #4 -; CHECK-NEXT: ldrsh.w r4, [r2, #2] -; CHECK-NEXT: ldrsh.w r5, [r2, #4] -; CHECK-NEXT: ldrsh.w r2, [r2, #6] -; CHECK-NEXT: vmov s8, r4 -; CHECK-NEXT: vmov s6, r5 -; CHECK-NEXT: vmov s4, r2 +; CHECK-NEXT: vmov s4, r4 ; CHECK-NEXT: vcvt.f16.s32 s4, s4 +; CHECK-NEXT: ldrsh.w r4, [r3] ; CHECK-NEXT: vmul.f16 s2, s2, s4 -; CHECK-NEXT: vldr.16 s4, [r6, #4] +; CHECK-NEXT: vldr.16 s4, [r2] +; CHECK-NEXT: vmov s6, r4 ; CHECK-NEXT: vcvt.f16.s32 s6, s6 +; CHECK-NEXT: ldrsh r5, [r3, #-2] +; CHECK-NEXT: ldrsh r4, [r3, #-4] ; CHECK-NEXT: vmul.f16 s4, s4, s6 -; CHECK-NEXT: vldr.16 s6, [r6, #2] +; CHECK-NEXT: vldr.16 s6, [r2, #-2] +; CHECK-NEXT: adds r3, #8 +; CHECK-NEXT: vmov s8, r5 ; CHECK-NEXT: vcvt.f16.s32 s8, s8 -; CHECK-NEXT: ldrsh r2, [r1, r3] +; CHECK-NEXT: vmov s10, r4 ; CHECK-NEXT: vmul.f16 s6, s6, s8 -; CHECK-NEXT: vldr.16 s8, [r6] -; CHECK-NEXT: adds r3, #8 -; CHECK-NEXT: vmov s10, r2 +; CHECK-NEXT: vldr.16 s8, [r2, #-4] ; CHECK-NEXT: vcvt.f16.s32 s10, s10 +; CHECK-NEXT: adds r2, #8 ; CHECK-NEXT: vmul.f16 s8, s8, s10 ; CHECK-NEXT: vcvtb.f32.f16 s8, s8 ; CHECK-NEXT: vcvtb.f32.f16 s6, s6 @@ -1837,11 +1837,11 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n ; CHECK-NEXT: vadd.f32 s0, s0, s2 ; CHECK-NEXT: le lr, .LBB11_5 ; CHECK-NEXT: .LBB11_6: @ %for.cond.cleanup.loopexit.unr-lcssa -; CHECK-NEXT: wls lr, r7, .LBB11_9 +; CHECK-NEXT: wls lr, r6, .LBB11_9 ; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader ; CHECK-NEXT: add.w r0, r0, r12, lsl #1 ; CHECK-NEXT: add.w r1, r1, r12, lsl #1 -; CHECK-NEXT: mov lr, r7 +; CHECK-NEXT: mov lr, r6 ; CHECK-NEXT: .LBB11_8: @ %for.body.epil ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh r2, [r1], #2 @@ -1854,7 +1854,7 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n ; CHECK-NEXT: vadd.f32 s0, s0, s2 ; CHECK-NEXT: le lr, .LBB11_8 ; CHECK-NEXT: .LBB11_9: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: pop {r4, r5, r6, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.10: ; CHECK-NEXT: .LCPI11_0: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll index 5fd03a78132264..0b8a20e8256949 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -372,29 +372,29 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq.w .LBB5_11 ; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph -; CHECK-NEXT: add.w r4, r3, r12, lsl #2 -; CHECK-NEXT: add.w r5, r1, r12 -; CHECK-NEXT: cmp r4, r1 -; CHECK-NEXT: add.w r6, r0, r12 -; CHECK-NEXT: cset r7, hi -; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: cset r5, hi -; CHECK-NEXT: cmp r4, r0 +; CHECK-NEXT: add.w r6, r3, r12, lsl #2 +; CHECK-NEXT: add.w r4, r1, r12 +; CHECK-NEXT: cmp r6, r1 +; CHECK-NEXT: add.w r5, r0, r12 +; CHECK-NEXT: cset lr, hi +; CHECK-NEXT: cmp r4, r3 ; CHECK-NEXT: cset r4, hi -; CHECK-NEXT: cmp r6, r3 +; CHECK-NEXT: cmp r6, r0 ; CHECK-NEXT: cset r6, hi -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: ands r6, r4 -; CHECK-NEXT: lsls r6, r6, #31 -; CHECK-NEXT: itt eq -; CHECK-NEXT: andeq.w r4, r5, r7 -; CHECK-NEXT: lslseq.w r4, r4, #31 -; CHECK-NEXT: beq .LBB5_4 +; CHECK-NEXT: cmp r5, r3 +; CHECK-NEXT: cset r5, hi +; CHECK-NEXT: ands r5, r6 +; CHECK-NEXT: movs r6, #1 +; CHECK-NEXT: lsls r5, r5, #31 +; CHECK-NEXT: itt eq +; CHECK-NEXT: andeq.w r5, r4, lr +; CHECK-NEXT: lslseq.w r5, r5, #31 +; CHECK-NEXT: beq .LBB5_4 ; CHECK-NEXT: @ %bb.2: @ %for.body.preheader -; CHECK-NEXT: sub.w r4, r12, #1 -; CHECK-NEXT: and r9, r12, #3 -; CHECK-NEXT: cmp r4, #3 -; CHECK-NEXT: bhs .LBB5_6 +; CHECK-NEXT: sub.w r5, r12, #1 +; CHECK-NEXT: and r9, r12, #3 +; CHECK-NEXT: cmp r5, #3 +; CHECK-NEXT: bhs .LBB5_6 ; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB5_8 @@ -409,35 +409,37 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly ; CHECK-NEXT: letp lr, .LBB5_5 ; CHECK-NEXT: b .LBB5_11 ; CHECK-NEXT: .LBB5_6: @ %for.body.preheader.new -; CHECK-NEXT: bic r7, r12, #3 +; CHECK-NEXT: bic r5, r12, #3 ; CHECK-NEXT: add.w r4, r3, #8 -; CHECK-NEXT: subs r7, #4 +; CHECK-NEXT: subs r5, #4 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w lr, lr, r7, lsr #2 +; CHECK-NEXT: add.w lr, r6, r5, lsr #2 +; CHECK-NEXT: adds r5, r0, #3 +; CHECK-NEXT: adds r6, r1, #1 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB5_7: @ %for.body -; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb.w r5, [r0, r12] -; CHECK-NEXT: add.w r7, r1, r12 -; CHECK-NEXT: ldrb.w r6, [r1, r12] -; CHECK-NEXT: smlabb r5, r6, r5, r2 -; CHECK-NEXT: str r5, [r4, #-8] -; CHECK-NEXT: add.w r5, r0, r12 -; CHECK-NEXT: ldrb r6, [r7, #1] -; CHECK-NEXT: add.w r12, r12, #4 -; CHECK-NEXT: ldrb.w r8, [r5, #1] -; CHECK-NEXT: smlabb r6, r6, r8, r2 -; CHECK-NEXT: str r6, [r4, #-4] -; CHECK-NEXT: ldrb.w r8, [r5, #2] -; CHECK-NEXT: ldrb r6, [r7, #2] -; CHECK-NEXT: smlabb r6, r6, r8, r2 -; CHECK-NEXT: str r6, [r4] -; CHECK-NEXT: ldrb r5, [r5, #3] -; CHECK-NEXT: ldrb r6, [r7, #3] -; CHECK-NEXT: smlabb r5, r6, r5, r2 -; CHECK-NEXT: str r5, [r4, #4] -; CHECK-NEXT: adds r4, #16 -; CHECK-NEXT: le lr, .LBB5_7 +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrb r8, [r5, #-3] +; CHECK-NEXT: add.w r12, r12, #4 +; CHECK-NEXT: ldrb r7, [r6, #-1] +; CHECK-NEXT: smlabb r7, r7, r8, r2 +; CHECK-NEXT: str r7, [r4, #-8] +; CHECK-NEXT: ldrb r8, [r5, #-2] +; CHECK-NEXT: ldrb r7, [r6] +; CHECK-NEXT: smlabb r7, r7, r8, r2 +; CHECK-NEXT: str r7, [r4, #-4] +; CHECK-NEXT: ldrb r8, [r5, #-1] +; CHECK-NEXT: ldrb r7, [r6, #1] +; CHECK-NEXT: smlabb r7, r7, r8, r2 +; CHECK-NEXT: str r7, [r4] +; CHECK-NEXT: ldrb.w r8, [r5] +; CHECK-NEXT: adds r5, #4 +; CHECK-NEXT: ldrb r7, [r6, #2] +; CHECK-NEXT: adds r6, #4 +; CHECK-NEXT: smlabb r7, r7, r8, r2 +; CHECK-NEXT: str r7, [r4, #4] +; CHECK-NEXT: adds r4, #16 +; CHECK-NEXT: le lr, .LBB5_7 ; CHECK-NEXT: .LBB5_8: @ %for.cond.cleanup.loopexit.unr-lcssa ; CHECK-NEXT: wls lr, r9, .LBB5_11 ; CHECK-NEXT: @ %bb.9: @ %for.body.epil.preheader @@ -447,10 +449,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly ; CHECK-NEXT: mov lr, r9 ; CHECK-NEXT: .LBB5_10: @ %for.body.epil ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb r7, [r0], #1 -; CHECK-NEXT: ldrb r6, [r1], #1 -; CHECK-NEXT: smlabb r7, r6, r7, r2 -; CHECK-NEXT: str r7, [r3], #4 +; CHECK-NEXT: ldrb r6, [r0], #1 +; CHECK-NEXT: ldrb r5, [r1], #1 +; CHECK-NEXT: smlabb r6, r5, r6, r2 +; CHECK-NEXT: str r6, [r3], #4 ; CHECK-NEXT: le lr, .LBB5_10 ; CHECK-NEXT: .LBB5_11: @ %for.cond.cleanup ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} @@ -663,28 +665,28 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq.w .LBB7_11 ; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph -; CHECK-NEXT: add.w r4, r3, r12, lsl #2 -; CHECK-NEXT: add.w r5, r1, r12 -; CHECK-NEXT: cmp r4, r1 -; CHECK-NEXT: add.w r6, r0, r12 -; CHECK-NEXT: cset r7, hi -; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: cset r5, hi -; CHECK-NEXT: cmp r4, r0 +; CHECK-NEXT: add.w r6, r3, r12, lsl #2 +; CHECK-NEXT: add.w r4, r1, r12 +; CHECK-NEXT: cmp r6, r1 +; CHECK-NEXT: add.w r5, r0, r12 +; CHECK-NEXT: cset lr, hi +; CHECK-NEXT: cmp r4, r3 ; CHECK-NEXT: cset r4, hi -; CHECK-NEXT: cmp r6, r3 +; CHECK-NEXT: cmp r6, r0 ; CHECK-NEXT: cset r6, hi -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: ands r6, r4 -; CHECK-NEXT: lsls r6, r6, #31 +; CHECK-NEXT: cmp r5, r3 +; CHECK-NEXT: cset r5, hi +; CHECK-NEXT: ands r5, r6 +; CHECK-NEXT: movs r6, #1 +; CHECK-NEXT: lsls r5, r5, #31 ; CHECK-NEXT: itt eq -; CHECK-NEXT: andeq.w r4, r5, r7 -; CHECK-NEXT: lslseq.w r4, r4, #31 +; CHECK-NEXT: andeq.w r5, r4, lr +; CHECK-NEXT: lslseq.w r5, r5, #31 ; CHECK-NEXT: beq .LBB7_4 ; CHECK-NEXT: @ %bb.2: @ %for.body.preheader -; CHECK-NEXT: sub.w r4, r12, #1 +; CHECK-NEXT: sub.w r5, r12, #1 ; CHECK-NEXT: and r9, r12, #3 -; CHECK-NEXT: cmp r4, #3 +; CHECK-NEXT: cmp r5, #3 ; CHECK-NEXT: bhs .LBB7_6 ; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: mov.w r12, #0 @@ -700,33 +702,35 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl ; CHECK-NEXT: letp lr, .LBB7_5 ; CHECK-NEXT: b .LBB7_11 ; CHECK-NEXT: .LBB7_6: @ %for.body.preheader.new -; CHECK-NEXT: bic r7, r12, #3 +; CHECK-NEXT: bic r5, r12, #3 ; CHECK-NEXT: add.w r4, r3, #8 -; CHECK-NEXT: subs r7, #4 +; CHECK-NEXT: subs r5, #4 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w lr, lr, r7, lsr #2 +; CHECK-NEXT: add.w lr, r6, r5, lsr #2 +; CHECK-NEXT: adds r5, r0, #3 +; CHECK-NEXT: adds r6, r1, #1 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB7_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb.w r5, [r0, r12] -; CHECK-NEXT: add.w r7, r1, r12 -; CHECK-NEXT: ldrb.w r6, [r1, r12] -; CHECK-NEXT: smlabb r5, r6, r5, r2 -; CHECK-NEXT: str r5, [r4, #-8] -; CHECK-NEXT: add.w r5, r0, r12 -; CHECK-NEXT: ldrb r6, [r7, #1] +; CHECK-NEXT: ldrb r8, [r5, #-3] ; CHECK-NEXT: add.w r12, r12, #4 -; CHECK-NEXT: ldrb.w r8, [r5, #1] -; CHECK-NEXT: smlabb r6, r6, r8, r2 -; CHECK-NEXT: str r6, [r4, #-4] -; CHECK-NEXT: ldrb.w r8, [r5, #2] -; CHECK-NEXT: ldrb r6, [r7, #2] -; CHECK-NEXT: smlabb r6, r6, r8, r2 -; CHECK-NEXT: str r6, [r4] -; CHECK-NEXT: ldrb r5, [r5, #3] -; CHECK-NEXT: ldrb r6, [r7, #3] -; CHECK-NEXT: smlabb r5, r6, r5, r2 -; CHECK-NEXT: str r5, [r4, #4] +; CHECK-NEXT: ldrb r7, [r6, #-1] +; CHECK-NEXT: smlabb r7, r7, r8, r2 +; CHECK-NEXT: str r7, [r4, #-8] +; CHECK-NEXT: ldrb r8, [r5, #-2] +; CHECK-NEXT: ldrb r7, [r6] +; CHECK-NEXT: smlabb r7, r7, r8, r2 +; CHECK-NEXT: str r7, [r4, #-4] +; CHECK-NEXT: ldrb r8, [r5, #-1] +; CHECK-NEXT: ldrb r7, [r6, #1] +; CHECK-NEXT: smlabb r7, r7, r8, r2 +; CHECK-NEXT: str r7, [r4] +; CHECK-NEXT: ldrb.w r8, [r5] +; CHECK-NEXT: adds r5, #4 +; CHECK-NEXT: ldrb r7, [r6, #2] +; CHECK-NEXT: adds r6, #4 +; CHECK-NEXT: smlabb r7, r7, r8, r2 +; CHECK-NEXT: str r7, [r4, #4] ; CHECK-NEXT: adds r4, #16 ; CHECK-NEXT: le lr, .LBB7_7 ; CHECK-NEXT: .LBB7_8: @ %for.cond.cleanup.loopexit.unr-lcssa @@ -738,10 +742,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl ; CHECK-NEXT: mov lr, r9 ; CHECK-NEXT: .LBB7_10: @ %for.body.epil ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb r7, [r0], #1 -; CHECK-NEXT: ldrb r6, [r1], #1 -; CHECK-NEXT: smlabb r7, r6, r7, r2 -; CHECK-NEXT: str r7, [r3], #4 +; CHECK-NEXT: ldrb r6, [r0], #1 +; CHECK-NEXT: ldrb r5, [r1], #1 +; CHECK-NEXT: smlabb r6, r5, r6, r2 +; CHECK-NEXT: str r6, [r3], #4 ; CHECK-NEXT: le lr, .LBB7_10 ; CHECK-NEXT: .LBB7_11: @ %for.cond.cleanup ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll new file mode 100644 index 00000000000000..d74f3bbfb2e0e3 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -0,0 +1,698 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s + +; Check some LSR loop postinc + +; fma loop with a destination that is the same as one of the sources +define void @fma(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %n) { +; CHECK-LABEL: fma: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: blt .LBB0_8 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-NEXT: cmp r3, #3 +; CHECK-NEXT: bhi .LBB0_3 +; CHECK-NEXT: @ %bb.2: +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: b .LBB0_6 +; CHECK-NEXT: .LBB0_3: @ %vector.ph +; CHECK-NEXT: bic r12, r3, #3 +; CHECK-NEXT: movs r5, #1 +; CHECK-NEXT: sub.w r6, r12, #4 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: add.w lr, r5, r6, lsr #2 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: .LBB0_4: @ %vector.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q0, [r4], #16 +; CHECK-NEXT: vldrw.u32 q1, [r5], #16 +; CHECK-NEXT: vldrw.u32 q2, [r6] +; CHECK-NEXT: vfma.f32 q2, q1, q0 +; CHECK-NEXT: vstrb.8 q2, [r6], #16 +; CHECK-NEXT: le lr, .LBB0_4 +; CHECK-NEXT: @ %bb.5: @ %middle.block +; CHECK-NEXT: cmp r12, r3 +; CHECK-NEXT: it eq +; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: .LBB0_6: @ %for.body.preheader12 +; CHECK-NEXT: sub.w lr, r3, r12 +; CHECK-NEXT: add.w r0, r0, r12, lsl #2 +; CHECK-NEXT: add.w r1, r1, r12, lsl #2 +; CHECK-NEXT: add.w r2, r2, r12, lsl #2 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: .LBB0_7: @ %for.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldr s0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldr s2, [r1] +; CHECK-NEXT: adds r1, #4 +; CHECK-NEXT: vldr s4, [r2] +; CHECK-NEXT: vfma.f32 s4, s2, s0 +; CHECK-NEXT: vstr s4, [r2] +; CHECK-NEXT: adds r2, #4 +; CHECK-NEXT: le lr, .LBB0_7 +; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup +; CHECK-NEXT: pop {r4, r5, r6, pc} +entry: + %cmp8 = icmp sgt i32 %n, 0 + br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %min.iters.check = icmp ult i32 %n, 4 + br i1 %min.iters.check, label %for.body.preheader12, label %vector.ph + +for.body.preheader12: ; preds = %middle.block, %for.body.preheader + %i.09.ph = phi i32 [ 0, %for.body.preheader ], [ %n.vec, %middle.block ] + br label %for.body + +vector.ph: ; preds = %for.body.preheader + %n.vec = and i32 %n, -4 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds float, float* %A, i32 %index + %1 = bitcast float* %0 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %1, align 4 + %2 = getelementptr inbounds float, float* %B, i32 %index + %3 = bitcast float* %2 to <4 x float>* + %wide.load10 = load <4 x float>, <4 x float>* %3, align 4 + %4 = fmul fast <4 x float> %wide.load10, %wide.load + %5 = getelementptr inbounds float, float* %C, i32 %index + %6 = bitcast float* %5 to <4 x float>* + %wide.load11 = load <4 x float>, <4 x float>* %6, align 4 + %7 = fadd fast <4 x float> %wide.load11, %4 + %8 = bitcast float* %5 to <4 x float>* + store <4 x float> %7, <4 x float>* %8, align 4 + %index.next = add i32 %index, 4 + %9 = icmp eq i32 %index.next, %n.vec + br i1 %9, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + %cmp.n = icmp eq i32 %n.vec, %n + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader12 + +for.cond.cleanup: ; preds = %for.body, %middle.block, %entry + ret void + +for.body: ; preds = %for.body.preheader12, %for.body + %i.09 = phi i32 [ %inc, %for.body ], [ %i.09.ph, %for.body.preheader12 ] + %arrayidx = getelementptr inbounds float, float* %A, i32 %i.09 + %10 = load float, float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.09 + %11 = load float, float* %arrayidx1, align 4 + %mul = fmul fast float %11, %10 + %arrayidx2 = getelementptr inbounds float, float* %C, i32 %i.09 + %12 = load float, float* %arrayidx2, align 4 + %add = fadd fast float %12, %mul + store float %add, float* %arrayidx2, align 4 + %inc = add nuw nsw i32 %i.09, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + + +; Same as above but tail predicated +; FIXME: The postinc here is put on the load, not the store. An extra mov is needed in the loop because of it. +define void @fma_tailpred(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %n) { +; CHECK-LABEL: fma_tailpred: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: blt .LBB1_3 +; CHECK-NEXT: @ %bb.1: @ %vector.ph +; CHECK-NEXT: add.w r12, r3, #3 +; CHECK-NEXT: adr r4, .LCPI1_0 +; CHECK-NEXT: bic r12, r12, #3 +; CHECK-NEXT: mov.w lr, #1 +; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: subs r3, #1 +; CHECK-NEXT: vldrw.u32 q0, [r4] +; CHECK-NEXT: vdup.32 q1, r3 +; CHECK-NEXT: add.w lr, lr, r12, lsr #2 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: mov r3, r2 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: .LBB1_2: @ %vector.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vdup.32 q2, r12 +; CHECK-NEXT: add.w r12, r12, #4 +; CHECK-NEXT: vorr q2, q2, q0 +; CHECK-NEXT: vpttt.u32 cs, q1, q2 +; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 +; CHECK-NEXT: vldrwt.u32 q3, [r1], #16 +; CHECK-NEXT: vldrwt.u32 q4, [r3], #16 +; CHECK-NEXT: vfma.f32 q4, q3, q2 +; CHECK-NEXT: vpst +; CHECK-NEXT: vstrwt.32 q4, [r2] +; CHECK-NEXT: mov r2, r3 +; CHECK-NEXT: le lr, .LBB1_2 +; CHECK-NEXT: .LBB1_3: @ %for.cond.cleanup +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.4: +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .long 1 @ 0x1 +; CHECK-NEXT: .long 2 @ 0x2 +; CHECK-NEXT: .long 3 @ 0x3 +entry: + %cmp8 = icmp sgt i32 %n, 0 + br i1 %cmp8, label %vector.ph, label %for.cond.cleanup + +vector.ph: ; preds = %entry + %n.rnd.up = add i32 %n, 3 + %n.vec = and i32 %n.rnd.up, -4 + %trip.count.minus.1 = add i32 %n, -1 + %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 + %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + %induction = or <4 x i32> %broadcast.splat, + %0 = getelementptr inbounds float, float* %A, i32 %index + %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 + %2 = bitcast float* %0 to <4 x float>* + %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) + %3 = getelementptr inbounds float, float* %B, i32 %index + %4 = bitcast float* %3 to <4 x float>* + %wide.masked.load12 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %4, i32 4, <4 x i1> %1, <4 x float> undef) + %5 = fmul fast <4 x float> %wide.masked.load12, %wide.masked.load + %6 = getelementptr inbounds float, float* %C, i32 %index + %7 = bitcast float* %6 to <4 x float>* + %wide.masked.load13 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %7, i32 4, <4 x i1> %1, <4 x float> undef) + %8 = fadd fast <4 x float> %wide.masked.load13, %5 + %9 = bitcast float* %6 to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %8, <4 x float>* %9, i32 4, <4 x i1> %1) + %index.next = add i32 %index, 4 + %10 = icmp eq i32 %index.next, %n.vec + br i1 %10, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body, %entry + ret void +} + + +; Multiple loads of the loop with a common base +define i8* @test(i8* nocapture readonly %input_row, i8* nocapture readonly %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, i32 %col_offset, i16 signext %activation_min, i16 zeroext %row_len, i32* nocapture readonly %bias, i8* returned %out) { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #20 +; CHECK-NEXT: sub sp, #20 +; CHECK-NEXT: cmp r3, #4 +; CHECK-NEXT: strd r0, r1, [sp, #12] @ 8-byte Folded Spill +; CHECK-NEXT: bne .LBB2_8 +; CHECK-NEXT: @ %bb.1: @ %for.cond.preheader +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: beq .LBB2_8 +; CHECK-NEXT: @ %bb.2: @ %for.body.lr.ph +; CHECK-NEXT: ldr r3, [sp, #64] +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr.w r9, [sp, #56] +; CHECK-NEXT: add.w r0, r1, r3, lsl #1 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: adds r0, r1, r3 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: add.w r0, r3, r3, lsl #1 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-NEXT: adds r0, r3, #7 +; CHECK-NEXT: lsrs r0, r0, #3 +; CHECK-NEXT: b .LBB2_5 +; CHECK-NEXT: .LBB2_3: @ in Loop: Header=BB2_5 Depth=1 +; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: mov r10, r12 +; CHECK-NEXT: mov r6, r12 +; CHECK-NEXT: .LBB2_4: @ %for.cond.cleanup23 +; CHECK-NEXT: @ in Loop: Header=BB2_5 Depth=1 +; CHECK-NEXT: ldr r3, [sp, #72] +; CHECK-NEXT: add.w r1, r10, r8 +; CHECK-NEXT: add r1, r6 +; CHECK-NEXT: add r1, r12 +; CHECK-NEXT: strb.w r1, [r3, r11] +; CHECK-NEXT: add.w r11, r11, #1 +; CHECK-NEXT: cmp r11, r2 +; CHECK-NEXT: beq .LBB2_8 +; CHECK-NEXT: .LBB2_5: @ %for.body +; CHECK-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-NEXT: @ Child Loop BB2_7 Depth 2 +; CHECK-NEXT: ldr r1, [sp, #68] +; CHECK-NEXT: subs.w lr, r0, r0 +; CHECK-NEXT: ldr.w r12, [r1, r11, lsl #2] +; CHECK-NEXT: ble .LBB2_3 +; CHECK-NEXT: @ %bb.6: @ %for.body24.preheader +; CHECK-NEXT: @ in Loop: Header=BB2_5 Depth=1 +; CHECK-NEXT: ldr r3, [sp, #64] +; CHECK-NEXT: mov r6, r12 +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r10, r12 +; CHECK-NEXT: mla r7, r11, r3, r1 +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldrd r4, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: .LBB2_7: @ %for.body24 +; CHECK-NEXT: @ Parent Loop BB2_5 Depth=1 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-NEXT: vldrb.s16 q0, [r4], #8 +; CHECK-NEXT: vadd.i16 q1, q0, r9 +; CHECK-NEXT: vldrb.s16 q0, [r7], #8 +; CHECK-NEXT: vmlava.s16 r12, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r5], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r9 +; CHECK-NEXT: vmlava.s16 r6, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r3], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r9 +; CHECK-NEXT: vmlava.s16 r10, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r1], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r9 +; CHECK-NEXT: vmlava.s16 r8, q0, q1 +; CHECK-NEXT: le lr, .LBB2_7 +; CHECK-NEXT: b .LBB2_4 +; CHECK-NEXT: .LBB2_8: @ %if.end +; CHECK-NEXT: ldr r0, [sp, #72] +; CHECK-NEXT: add sp, #20 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +entry: + %cmp = icmp eq i16 %num_cols, 4 + br i1 %cmp, label %for.cond.preheader, label %if.end + +for.cond.preheader: ; preds = %entry + %conv2 = zext i16 %output_ch to i32 + %cmp3114 = icmp eq i16 %output_ch, 0 + br i1 %cmp3114, label %if.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %for.cond.preheader + %conv5 = zext i16 %row_len to i32 + %add.ptr9 = getelementptr inbounds i8, i8* %input_col, i32 %conv5 + %mul11 = shl nuw nsw i32 %conv5, 1 + %add.ptr12 = getelementptr inbounds i8, i8* %input_col, i32 %mul11 + %mul14 = mul nuw nsw i32 %conv5, 3 + %add.ptr15 = getelementptr inbounds i8, i8* %input_col, i32 %mul14 + %add = add nuw nsw i32 %conv5, 7 + %div = lshr i32 %add, 3 + %conv25 = trunc i32 %col_offset to i16 + %.splatinsert = insertelement <8 x i16> undef, i16 %conv25, i32 0 + %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer + br label %for.body + +for.body: ; preds = %for.cond.cleanup23, %for.body.lr.ph + %i_out_ch.0116 = phi i32 [ 0, %for.body.lr.ph ], [ %inc37, %for.cond.cleanup23 ] + %i_row_loop.0115 = phi i32 [ undef, %for.body.lr.ph ], [ %i_row_loop.1.lcssa, %for.cond.cleanup23 ] + %arrayidx = getelementptr inbounds i32, i32* %bias, i32 %i_out_ch.0116 + %0 = load i32, i32* %arrayidx, align 4 + %cmp2199 = icmp slt i32 %i_row_loop.0115, %div + br i1 %cmp2199, label %for.body24.preheader, label %for.cond.cleanup23 + +for.body24.preheader: ; preds = %for.body + %mul = mul nuw nsw i32 %i_out_ch.0116, %conv5 + %add.ptr = getelementptr inbounds i8, i8* %input_row, i32 %mul + br label %for.body24 + +for.cond.cleanup23: ; preds = %for.body24, %for.body + %acc_0.0.lcssa = phi i32 [ %0, %for.body ], [ %20, %for.body24 ] + %acc_1.0.lcssa = phi i32 [ %0, %for.body ], [ %21, %for.body24 ] + %acc_2.0.lcssa = phi i32 [ %0, %for.body ], [ %22, %for.body24 ] + %acc_3.0.lcssa = phi i32 [ %0, %for.body ], [ %23, %for.body24 ] + %i_row_loop.1.lcssa = phi i32 [ %i_row_loop.0115, %for.body ], [ %div, %for.body24 ] + %add31 = add nsw i32 %acc_1.0.lcssa, %acc_0.0.lcssa + %add32 = add nsw i32 %add31, %acc_2.0.lcssa + %add33 = add nsw i32 %add32, %acc_3.0.lcssa + %conv34 = trunc i32 %add33 to i8 + %arrayidx35 = getelementptr inbounds i8, i8* %out, i32 %i_out_ch.0116 + store i8 %conv34, i8* %arrayidx35, align 1 + %inc37 = add nuw nsw i32 %i_out_ch.0116, 1 + %exitcond120 = icmp eq i32 %inc37, %conv2 + br i1 %exitcond120, label %if.end, label %for.body + +for.body24: ; preds = %for.body24, %for.body24.preheader + %ip_r0.0109 = phi i8* [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ] + %ip_c0.0108 = phi i8* [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ] + %ip_c1.0107 = phi i8* [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ] + %ip_c2.0106 = phi i8* [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ] + %i_row_loop.1105 = phi i32 [ %inc, %for.body24 ], [ %i_row_loop.0115, %for.body24.preheader ] + %ip_c3.0104 = phi i8* [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ] + %acc_3.0103 = phi i32 [ %23, %for.body24 ], [ %0, %for.body24.preheader ] + %acc_2.0102 = phi i32 [ %22, %for.body24 ], [ %0, %for.body24.preheader ] + %acc_1.0101 = phi i32 [ %21, %for.body24 ], [ %0, %for.body24.preheader ] + %acc_0.0100 = phi i32 [ %20, %for.body24 ], [ %0, %for.body24.preheader ] + %1 = bitcast i8* %ip_r0.0109 to <8 x i8>* + %2 = load <8 x i8>, <8 x i8>* %1, align 1 + %3 = sext <8 x i8> %2 to <8 x i16> + %add.ptr26 = getelementptr inbounds i8, i8* %ip_r0.0109, i32 8 + %4 = bitcast i8* %ip_c0.0108 to <8 x i8>* + %5 = load <8 x i8>, <8 x i8>* %4, align 1 + %6 = sext <8 x i8> %5 to <8 x i16> + %add.ptr27 = getelementptr inbounds i8, i8* %ip_c0.0108, i32 8 + %7 = add <8 x i16> %.splat, %6 + %8 = bitcast i8* %ip_c1.0107 to <8 x i8>* + %9 = load <8 x i8>, <8 x i8>* %8, align 1 + %10 = sext <8 x i8> %9 to <8 x i16> + %add.ptr28 = getelementptr inbounds i8, i8* %ip_c1.0107, i32 8 + %11 = add <8 x i16> %.splat, %10 + %12 = bitcast i8* %ip_c2.0106 to <8 x i8>* + %13 = load <8 x i8>, <8 x i8>* %12, align 1 + %14 = sext <8 x i8> %13 to <8 x i16> + %add.ptr29 = getelementptr inbounds i8, i8* %ip_c2.0106, i32 8 + %15 = add <8 x i16> %.splat, %14 + %16 = bitcast i8* %ip_c3.0104 to <8 x i8>* + %17 = load <8 x i8>, <8 x i8>* %16, align 1 + %18 = sext <8 x i8> %17 to <8 x i16> + %add.ptr30 = getelementptr inbounds i8, i8* %ip_c3.0104, i32 8 + %19 = add <8 x i16> %.splat, %18 + %20 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_0.0100, <8 x i16> %3, <8 x i16> %7) + %21 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_1.0101, <8 x i16> %3, <8 x i16> %11) + %22 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_2.0102, <8 x i16> %3, <8 x i16> %15) + %23 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_3.0103, <8 x i16> %3, <8 x i16> %19) + %inc = add nsw i32 %i_row_loop.1105, 1 + %exitcond = icmp eq i32 %inc, %div + br i1 %exitcond, label %for.cond.cleanup23, label %for.body24 + +if.end: ; preds = %for.cond.cleanup23, %for.cond.preheader, %entry + ret i8* %out +} + +; Same as above with optsize +define i8* @test_optsize(i8* nocapture readonly %input_row, i8* nocapture readonly %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, i32 %col_offset, i16 signext %activation_min, i16 zeroext %row_len, i32* nocapture readonly %bias, i8* returned %out) optsize { +; CHECK-LABEL: test_optsize: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #20 +; CHECK-NEXT: sub sp, #20 +; CHECK-NEXT: cmp r3, #4 +; CHECK-NEXT: strd r0, r1, [sp, #12] @ 8-byte Folded Spill +; CHECK-NEXT: bne .LBB3_8 +; CHECK-NEXT: @ %bb.1: @ %for.cond.preheader +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: beq .LBB3_8 +; CHECK-NEXT: @ %bb.2: @ %for.body.lr.ph +; CHECK-NEXT: ldr r3, [sp, #64] +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr.w r9, [sp, #56] +; CHECK-NEXT: add.w r0, r1, r3, lsl #1 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: adds r0, r1, r3 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: add.w r0, r3, r3, lsl #1 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-NEXT: adds r0, r3, #7 +; CHECK-NEXT: lsrs r0, r0, #3 +; CHECK-NEXT: .LBB3_3: @ %for.body +; CHECK-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-NEXT: @ Child Loop BB3_5 Depth 2 +; CHECK-NEXT: ldr r1, [sp, #68] +; CHECK-NEXT: subs.w lr, r0, r0 +; CHECK-NEXT: ldr.w r12, [r1, r11, lsl #2] +; CHECK-NEXT: ble .LBB3_6 +; CHECK-NEXT: @ %bb.4: @ %for.body24.preheader +; CHECK-NEXT: @ in Loop: Header=BB3_3 Depth=1 +; CHECK-NEXT: ldr r3, [sp, #64] +; CHECK-NEXT: mov r6, r12 +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r10, r12 +; CHECK-NEXT: mla r7, r11, r3, r1 +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldrd r4, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: .LBB3_5: @ %for.body24 +; CHECK-NEXT: @ Parent Loop BB3_3 Depth=1 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-NEXT: vldrb.s16 q0, [r4], #8 +; CHECK-NEXT: vadd.i16 q1, q0, r9 +; CHECK-NEXT: vldrb.s16 q0, [r7], #8 +; CHECK-NEXT: vmlava.s16 r12, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r5], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r9 +; CHECK-NEXT: vmlava.s16 r6, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r3], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r9 +; CHECK-NEXT: vmlava.s16 r10, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r1], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r9 +; CHECK-NEXT: vmlava.s16 r8, q0, q1 +; CHECK-NEXT: le lr, .LBB3_5 +; CHECK-NEXT: b .LBB3_7 +; CHECK-NEXT: .LBB3_6: @ in Loop: Header=BB3_3 Depth=1 +; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: mov r10, r12 +; CHECK-NEXT: mov r6, r12 +; CHECK-NEXT: .LBB3_7: @ %for.cond.cleanup23 +; CHECK-NEXT: @ in Loop: Header=BB3_3 Depth=1 +; CHECK-NEXT: ldr r3, [sp, #72] +; CHECK-NEXT: add.w r1, r10, r8 +; CHECK-NEXT: add r1, r6 +; CHECK-NEXT: add r1, r12 +; CHECK-NEXT: strb.w r1, [r3, r11] +; CHECK-NEXT: add.w r11, r11, #1 +; CHECK-NEXT: cmp r11, r2 +; CHECK-NEXT: bne .LBB3_3 +; CHECK-NEXT: .LBB3_8: @ %if.end +; CHECK-NEXT: ldr r0, [sp, #72] +; CHECK-NEXT: add sp, #20 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +entry: + %cmp = icmp eq i16 %num_cols, 4 + br i1 %cmp, label %for.cond.preheader, label %if.end + +for.cond.preheader: ; preds = %entry + %conv2 = zext i16 %output_ch to i32 + %cmp3114 = icmp eq i16 %output_ch, 0 + br i1 %cmp3114, label %if.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %for.cond.preheader + %conv5 = zext i16 %row_len to i32 + %add.ptr9 = getelementptr inbounds i8, i8* %input_col, i32 %conv5 + %mul11 = shl nuw nsw i32 %conv5, 1 + %add.ptr12 = getelementptr inbounds i8, i8* %input_col, i32 %mul11 + %mul14 = mul nuw nsw i32 %conv5, 3 + %add.ptr15 = getelementptr inbounds i8, i8* %input_col, i32 %mul14 + %add = add nuw nsw i32 %conv5, 7 + %div = lshr i32 %add, 3 + %conv25 = trunc i32 %col_offset to i16 + %.splatinsert = insertelement <8 x i16> undef, i16 %conv25, i32 0 + %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer + br label %for.body + +for.body: ; preds = %for.cond.cleanup23, %for.body.lr.ph + %i_out_ch.0116 = phi i32 [ 0, %for.body.lr.ph ], [ %inc37, %for.cond.cleanup23 ] + %i_row_loop.0115 = phi i32 [ undef, %for.body.lr.ph ], [ %i_row_loop.1.lcssa, %for.cond.cleanup23 ] + %arrayidx = getelementptr inbounds i32, i32* %bias, i32 %i_out_ch.0116 + %0 = load i32, i32* %arrayidx, align 4 + %cmp2199 = icmp slt i32 %i_row_loop.0115, %div + br i1 %cmp2199, label %for.body24.preheader, label %for.cond.cleanup23 + +for.body24.preheader: ; preds = %for.body + %mul = mul nuw nsw i32 %i_out_ch.0116, %conv5 + %add.ptr = getelementptr inbounds i8, i8* %input_row, i32 %mul + br label %for.body24 + +for.cond.cleanup23: ; preds = %for.body24, %for.body + %acc_0.0.lcssa = phi i32 [ %0, %for.body ], [ %20, %for.body24 ] + %acc_1.0.lcssa = phi i32 [ %0, %for.body ], [ %21, %for.body24 ] + %acc_2.0.lcssa = phi i32 [ %0, %for.body ], [ %22, %for.body24 ] + %acc_3.0.lcssa = phi i32 [ %0, %for.body ], [ %23, %for.body24 ] + %i_row_loop.1.lcssa = phi i32 [ %i_row_loop.0115, %for.body ], [ %div, %for.body24 ] + %add31 = add nsw i32 %acc_1.0.lcssa, %acc_0.0.lcssa + %add32 = add nsw i32 %add31, %acc_2.0.lcssa + %add33 = add nsw i32 %add32, %acc_3.0.lcssa + %conv34 = trunc i32 %add33 to i8 + %arrayidx35 = getelementptr inbounds i8, i8* %out, i32 %i_out_ch.0116 + store i8 %conv34, i8* %arrayidx35, align 1 + %inc37 = add nuw nsw i32 %i_out_ch.0116, 1 + %exitcond120 = icmp eq i32 %inc37, %conv2 + br i1 %exitcond120, label %if.end, label %for.body + +for.body24: ; preds = %for.body24, %for.body24.preheader + %ip_r0.0109 = phi i8* [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ] + %ip_c0.0108 = phi i8* [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ] + %ip_c1.0107 = phi i8* [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ] + %ip_c2.0106 = phi i8* [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ] + %i_row_loop.1105 = phi i32 [ %inc, %for.body24 ], [ %i_row_loop.0115, %for.body24.preheader ] + %ip_c3.0104 = phi i8* [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ] + %acc_3.0103 = phi i32 [ %23, %for.body24 ], [ %0, %for.body24.preheader ] + %acc_2.0102 = phi i32 [ %22, %for.body24 ], [ %0, %for.body24.preheader ] + %acc_1.0101 = phi i32 [ %21, %for.body24 ], [ %0, %for.body24.preheader ] + %acc_0.0100 = phi i32 [ %20, %for.body24 ], [ %0, %for.body24.preheader ] + %1 = bitcast i8* %ip_r0.0109 to <8 x i8>* + %2 = load <8 x i8>, <8 x i8>* %1, align 1 + %3 = sext <8 x i8> %2 to <8 x i16> + %add.ptr26 = getelementptr inbounds i8, i8* %ip_r0.0109, i32 8 + %4 = bitcast i8* %ip_c0.0108 to <8 x i8>* + %5 = load <8 x i8>, <8 x i8>* %4, align 1 + %6 = sext <8 x i8> %5 to <8 x i16> + %add.ptr27 = getelementptr inbounds i8, i8* %ip_c0.0108, i32 8 + %7 = add <8 x i16> %.splat, %6 + %8 = bitcast i8* %ip_c1.0107 to <8 x i8>* + %9 = load <8 x i8>, <8 x i8>* %8, align 1 + %10 = sext <8 x i8> %9 to <8 x i16> + %add.ptr28 = getelementptr inbounds i8, i8* %ip_c1.0107, i32 8 + %11 = add <8 x i16> %.splat, %10 + %12 = bitcast i8* %ip_c2.0106 to <8 x i8>* + %13 = load <8 x i8>, <8 x i8>* %12, align 1 + %14 = sext <8 x i8> %13 to <8 x i16> + %add.ptr29 = getelementptr inbounds i8, i8* %ip_c2.0106, i32 8 + %15 = add <8 x i16> %.splat, %14 + %16 = bitcast i8* %ip_c3.0104 to <8 x i8>* + %17 = load <8 x i8>, <8 x i8>* %16, align 1 + %18 = sext <8 x i8> %17 to <8 x i16> + %add.ptr30 = getelementptr inbounds i8, i8* %ip_c3.0104, i32 8 + %19 = add <8 x i16> %.splat, %18 + %20 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_0.0100, <8 x i16> %3, <8 x i16> %7) + %21 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_1.0101, <8 x i16> %3, <8 x i16> %11) + %22 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_2.0102, <8 x i16> %3, <8 x i16> %15) + %23 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_3.0103, <8 x i16> %3, <8 x i16> %19) + %inc = add nsw i32 %i_row_loop.1105, 1 + %exitcond = icmp eq i32 %inc, %div + br i1 %exitcond, label %for.cond.cleanup23, label %for.body24 + +if.end: ; preds = %for.cond.cleanup23, %for.cond.preheader, %entry + ret i8* %out +} + + +; Similar but predicated +define i32 @arm_nn_mat_mul_core_4x_s8(i32 %row_elements, i32 %offset, i8* %row_base, i8* %col_base, i32* nocapture readnone %sum_col, i32* nocapture %output) { +; CHECK-LABEL: arm_nn_mat_mul_core_4x_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r10, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r10, lr} +; CHECK-NEXT: add.w r7, r0, #15 +; CHECK-NEXT: ldr.w r12, [sp, #32] +; CHECK-NEXT: mov.w lr, #1 +; CHECK-NEXT: asrs r6, r7, #31 +; CHECK-NEXT: add.w r4, r7, r6, lsr #28 +; CHECK-NEXT: asrs r5, r4, #4 +; CHECK-NEXT: cmp r5, #1 +; CHECK-NEXT: it gt +; CHECK-NEXT: asrgt.w lr, r4, #4 +; CHECK-NEXT: cmp r0, #1 +; CHECK-NEXT: blt .LBB4_3 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-NEXT: adds r5, r2, r1 +; CHECK-NEXT: add.w r7, r2, r1, lsl #1 +; CHECK-NEXT: add.w r1, r1, r1, lsl #1 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: add r1, r2 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: dlstp.8 lr, r0 +; CHECK-NEXT: .LBB4_2: @ %for.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrb.u8 q0, [r3], #16 +; CHECK-NEXT: vldrb.u8 q1, [r1], #16 +; CHECK-NEXT: vmlava.s8 r10, q1, q0 +; CHECK-NEXT: vldrb.u8 q1, [r7], #16 +; CHECK-NEXT: vmlava.s8 r4, q1, q0 +; CHECK-NEXT: vldrb.u8 q1, [r5], #16 +; CHECK-NEXT: vmlava.s8 r6, q1, q0 +; CHECK-NEXT: vldrb.u8 q1, [r2], #16 +; CHECK-NEXT: vmlava.s8 r8, q1, q0 +; CHECK-NEXT: letp lr, .LBB4_2 +; CHECK-NEXT: b .LBB4_4 +; CHECK-NEXT: .LBB4_3: +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: .LBB4_4: @ %for.cond.cleanup +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: strd r8, r6, [r12] +; CHECK-NEXT: strd r4, r10, [r12, #8] +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r10, pc} +entry: + %add = add nsw i32 %row_elements, 15 + %div = sdiv i32 %add, 16 + %cmp84 = icmp sgt i32 %row_elements, 0 + br i1 %cmp84, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %mul2 = mul nsw i32 %offset, 3 + %add.ptr3 = getelementptr inbounds i8, i8* %row_base, i32 %mul2 + %mul = shl nsw i32 %offset, 1 + %add.ptr1 = getelementptr inbounds i8, i8* %row_base, i32 %mul + %add.ptr = getelementptr inbounds i8, i8* %row_base, i32 %offset + %0 = icmp sgt i32 %div, 1 + %smax = select i1 %0, i32 %div, i32 1 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %acc_n.sroa.12.0.lcssa = phi i32 [ 0, %entry ], [ %15, %for.body ] + %acc_n.sroa.9.0.lcssa = phi i32 [ 0, %entry ], [ %12, %for.body ] + %acc_n.sroa.6.0.lcssa = phi i32 [ 0, %entry ], [ %9, %for.body ] + %acc_n.sroa.0.0.lcssa = phi i32 [ 0, %entry ], [ %6, %for.body ] + store i32 %acc_n.sroa.0.0.lcssa, i32* %output, align 4 + %arrayidx19 = getelementptr inbounds i32, i32* %output, i32 1 + store i32 %acc_n.sroa.6.0.lcssa, i32* %arrayidx19, align 4 + %arrayidx21 = getelementptr inbounds i32, i32* %output, i32 2 + store i32 %acc_n.sroa.9.0.lcssa, i32* %arrayidx21, align 4 + %arrayidx23 = getelementptr inbounds i32, i32* %output, i32 3 + store i32 %acc_n.sroa.12.0.lcssa, i32* %arrayidx23, align 4 + ret i32 0 + +for.body: ; preds = %for.body, %for.body.preheader + %col_base.addr.095 = phi i8* [ %add.ptr4, %for.body ], [ %col_base, %for.body.preheader ] + %acc_n.sroa.0.094 = phi i32 [ %6, %for.body ], [ 0, %for.body.preheader ] + %acc_n.sroa.6.093 = phi i32 [ %9, %for.body ], [ 0, %for.body.preheader ] + %acc_n.sroa.9.092 = phi i32 [ %12, %for.body ], [ 0, %for.body.preheader ] + %i.091 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %row_elem.090 = phi i32 [ %sub, %for.body ], [ %row_elements, %for.body.preheader ] + %acc_n.sroa.12.089 = phi i32 [ %15, %for.body ], [ 0, %for.body.preheader ] + %ip_row_3.088 = phi i8* [ %add.ptr15, %for.body ], [ %add.ptr3, %for.body.preheader ] + %ip_row_2.087 = phi i8* [ %add.ptr14, %for.body ], [ %add.ptr1, %for.body.preheader ] + %ip_row_1.086 = phi i8* [ %add.ptr13, %for.body ], [ %add.ptr, %for.body.preheader ] + %ip_row_0.085 = phi i8* [ %add.ptr12, %for.body ], [ %row_base, %for.body.preheader ] + %1 = tail call <16 x i1> @llvm.arm.mve.vctp8(i32 %row_elem.090) + %sub = add nsw i32 %row_elem.090, -16 + %2 = bitcast i8* %col_base.addr.095 to <16 x i8>* + %3 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %2, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) + %add.ptr4 = getelementptr inbounds i8, i8* %col_base.addr.095, i32 16 + %4 = bitcast i8* %ip_row_0.085 to <16 x i8>* + %5 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %4, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) + %6 = tail call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 0, i32 %acc_n.sroa.0.094, <16 x i8> %5, <16 x i8> %3, <16 x i1> %1) + %7 = bitcast i8* %ip_row_1.086 to <16 x i8>* + %8 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %7, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) + %9 = tail call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 0, i32 %acc_n.sroa.6.093, <16 x i8> %8, <16 x i8> %3, <16 x i1> %1) + %10 = bitcast i8* %ip_row_2.087 to <16 x i8>* + %11 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %10, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) + %12 = tail call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 0, i32 %acc_n.sroa.9.092, <16 x i8> %11, <16 x i8> %3, <16 x i1> %1) + %13 = bitcast i8* %ip_row_3.088 to <16 x i8>* + %14 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %13, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) + %15 = tail call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 0, i32 %acc_n.sroa.12.089, <16 x i8> %14, <16 x i8> %3, <16 x i1> %1) + %add.ptr12 = getelementptr inbounds i8, i8* %ip_row_0.085, i32 16 + %add.ptr13 = getelementptr inbounds i8, i8* %ip_row_1.086, i32 16 + %add.ptr14 = getelementptr inbounds i8, i8* %ip_row_2.087, i32 16 + %add.ptr15 = getelementptr inbounds i8, i8* %ip_row_3.088, i32 16 + %inc = add nuw nsw i32 %i.091, 1 + %exitcond = icmp eq i32 %inc, %smax + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +declare <16 x i1> @llvm.arm.mve.vctp8(i32) +declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>) +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #1 +declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>) #2 +declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) +declare i32 @llvm.experimental.vector.reduce.add.v16i8(<16 x i32> %ext4) +declare i32 @llvm.arm.mve.vmldava.v8i16(i32, i32, i32, i32, <8 x i16>, <8 x i16>) +declare i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32, i32, i32, i32, <16 x i8>, <16 x i8>, <16 x i1>) diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll index 793b4e39284fe9..76b5f6ff228fe2 100644 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -93,6 +93,65 @@ define i32 @length2(i8* %X, i8* %Y) nounwind { ret i32 %m } +define i32 @length2_const(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length2_const: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: movzwl .L.str+1, %ecx +; X86-NEXT: rolw $8, %ax +; X86-NEXT: rolw $8, %cx +; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: movzwl %cx, %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: length2_const: +; X64: # %bb.0: +; X64-NEXT: movzwl (%rdi), %eax +; X64-NEXT: movzwl .L.str+{{.*}}(%rip), %ecx +; X64-NEXT: rolw $8, %ax +; X64-NEXT: rolw $8, %cx +; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind + ret i32 %m +} + +define i1 @length2_gt_const(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length2_gt_const: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: movzwl .L.str+1, %ecx +; X86-NEXT: rolw $8, %ax +; X86-NEXT: rolw $8, %cx +; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: movzwl %cx, %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length2_gt_const: +; X64: # %bb.0: +; X64-NEXT: movzwl (%rdi), %eax +; X64-NEXT: movzwl .L.str+{{.*}}(%rip), %ecx +; X64-NEXT: rolw $8, %ax +; X64-NEXT: rolw $8, %cx +; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + define i1 @length2_eq(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length2_eq: ; X86: # %bb.0: @@ -238,14 +297,14 @@ define i32 @length3(i8* %X, i8* %Y) nounwind { ; X86-NEXT: rolw $8, %dx ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB9_1 +; X86-NEXT: jne .LBB11_1 ; X86-NEXT: # %bb.2: # %loadbb1 ; X86-NEXT: movzbl 2(%eax), %eax ; X86-NEXT: movzbl 2(%ecx), %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB9_1: # %res_block +; X86-NEXT: .LBB11_1: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax @@ -259,13 +318,13 @@ define i32 @length3(i8* %X, i8* %Y) nounwind { ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax -; X64-NEXT: jne .LBB9_1 +; X64-NEXT: jne .LBB11_1 ; X64-NEXT: # %bb.2: # %loadbb1 ; X64-NEXT: movzbl 2(%rdi), %eax ; X64-NEXT: movzbl 2(%rsi), %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq -; X64-NEXT: .LBB9_1: # %res_block +; X64-NEXT: .LBB11_1: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax @@ -453,14 +512,14 @@ define i32 @length5(i8* %X, i8* %Y) nounwind { ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB16_1 +; X86-NEXT: jne .LBB18_1 ; X86-NEXT: # %bb.2: # %loadbb1 ; X86-NEXT: movzbl 4(%eax), %eax ; X86-NEXT: movzbl 4(%ecx), %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB16_1: # %res_block +; X86-NEXT: .LBB18_1: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax @@ -474,13 +533,13 @@ define i32 @length5(i8* %X, i8* %Y) nounwind { ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB16_1 +; X64-NEXT: jne .LBB18_1 ; X64-NEXT: # %bb.2: # %loadbb1 ; X64-NEXT: movzbl 4(%rdi), %eax ; X64-NEXT: movzbl 4(%rsi), %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq -; X64-NEXT: .LBB16_1: # %res_block +; X64-NEXT: .LBB18_1: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax @@ -529,17 +588,17 @@ define i1 @length5_lt(i8* %X, i8* %Y) nounwind { ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB18_1 +; X86-NEXT: jne .LBB20_1 ; X86-NEXT: # %bb.2: # %loadbb1 ; X86-NEXT: movzbl 4(%eax), %eax ; X86-NEXT: movzbl 4(%ecx), %ecx ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB18_3 -; X86-NEXT: .LBB18_1: # %res_block +; X86-NEXT: jmp .LBB20_3 +; X86-NEXT: .LBB20_1: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB18_3: # %endblock +; X86-NEXT: .LBB20_3: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: popl %esi @@ -552,7 +611,7 @@ define i1 @length5_lt(i8* %X, i8* %Y) nounwind { ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB18_1 +; X64-NEXT: jne .LBB20_1 ; X64-NEXT: # %bb.2: # %loadbb1 ; X64-NEXT: movzbl 4(%rdi), %eax ; X64-NEXT: movzbl 4(%rsi), %ecx @@ -560,7 +619,7 @@ define i1 @length5_lt(i8* %X, i8* %Y) nounwind { ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq -; X64-NEXT: .LBB18_1: # %res_block +; X64-NEXT: .LBB20_1: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax @@ -610,7 +669,7 @@ define i32 @length8(i8* %X, i8* %Y) nounwind { ; X86-NEXT: bswapl %ecx ; X86-NEXT: bswapl %edx ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB20_2 +; X86-NEXT: jne .LBB22_2 ; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movl 4(%esi), %ecx ; X86-NEXT: movl 4(%eax), %edx @@ -618,13 +677,13 @@ define i32 @length8(i8* %X, i8* %Y) nounwind { ; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB20_3 -; X86-NEXT: .LBB20_2: # %res_block +; X86-NEXT: je .LBB22_3 +; X86-NEXT: .LBB22_2: # %res_block ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx ; X86-NEXT: setae %al ; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB20_3: # %endblock +; X86-NEXT: .LBB22_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -818,7 +877,7 @@ define i32 @length12(i8* %X, i8* %Y) nounwind { ; X64-NEXT: bswapq %rcx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB27_2 +; X64-NEXT: jne .LBB29_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movl 8(%rdi), %ecx ; X64-NEXT: movl 8(%rsi), %edx @@ -826,13 +885,13 @@ define i32 @length12(i8* %X, i8* %Y) nounwind { ; X64-NEXT: bswapl %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB27_3 -; X64-NEXT: .LBB27_2: # %res_block +; X64-NEXT: je .LBB29_3 +; X64-NEXT: .LBB29_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: setae %al ; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB27_3: # %endblock +; X64-NEXT: .LBB29_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind ret i32 %m @@ -892,6 +951,26 @@ define i1 @length14_eq(i8* %X, i8* %Y) nounwind { ret i1 %c } +define i32 @length15_const(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length15_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $15 +; X86-NEXT: pushl $.L.str+1 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length15_const: +; X64: # %bb.0: +; X64-NEXT: movl $.L.str+1, %esi +; X64-NEXT: movl $15, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 15) nounwind + ret i32 %m +} + define i1 @length15_eq(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length15_eq: ; X86: # %bb.0: @@ -919,6 +998,34 @@ define i1 @length15_eq(i8* %X, i8* %Y) nounwind { ret i1 %c } +define i1 @length15_gt_const(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length15_gt_const: +; X86: # %bb.0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $15 +; X86-NEXT: pushl $.L.str+1 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setg %al +; X86-NEXT: retl +; +; X64-LABEL: length15_gt_const: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str+1, %esi +; X64-NEXT: movl $15, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setg %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 15) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + ; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 define i32 @length16(i8* %X, i8* %Y) nounwind { @@ -939,7 +1046,7 @@ define i32 @length16(i8* %X, i8* %Y) nounwind { ; X64-NEXT: bswapq %rcx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB31_2 +; X64-NEXT: jne .LBB35_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx ; X64-NEXT: movq 8(%rsi), %rdx @@ -947,13 +1054,13 @@ define i32 @length16(i8* %X, i8* %Y) nounwind { ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB31_3 -; X64-NEXT: .LBB31_2: # %res_block +; X64-NEXT: je .LBB35_3 +; X64-NEXT: .LBB35_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: setae %al ; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB31_3: # %endblock +; X64-NEXT: .LBB35_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind ret i32 %m @@ -1068,7 +1175,7 @@ define i1 @length16_lt(i8* %x, i8* %y) nounwind { ; X64-NEXT: bswapq %rcx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB33_2 +; X64-NEXT: jne .LBB37_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx ; X64-NEXT: movq 8(%rsi), %rdx @@ -1076,13 +1183,13 @@ define i1 @length16_lt(i8* %x, i8* %y) nounwind { ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB33_3 -; X64-NEXT: .LBB33_2: # %res_block +; X64-NEXT: je .LBB37_3 +; X64-NEXT: .LBB37_2: # %res_block ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: setae %al ; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB33_3: # %endblock +; X64-NEXT: .LBB37_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -1111,7 +1218,7 @@ define i1 @length16_gt(i8* %x, i8* %y) nounwind { ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB34_2 +; X64-NEXT: jne .LBB38_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax ; X64-NEXT: movq 8(%rsi), %rcx @@ -1119,13 +1226,13 @@ define i1 @length16_gt(i8* %x, i8* %y) nounwind { ; X64-NEXT: bswapq %rcx ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB34_3 -; X64-NEXT: .LBB34_2: # %res_block +; X64-NEXT: je .LBB38_3 +; X64-NEXT: .LBB38_2: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: setae %dl ; X64-NEXT: leal -1(%rdx,%rdx), %edx -; X64-NEXT: .LBB34_3: # %endblock +; X64-NEXT: .LBB38_3: # %endblock ; X64-NEXT: testl %edx, %edx ; X64-NEXT: setg %al ; X64-NEXT: retq diff --git a/llvm/test/DebugInfo/X86/debug-macinfo-split-dwarf.ll b/llvm/test/DebugInfo/X86/debug-macinfo-split-dwarf.ll index e75f138354e9ae..f7cbff4013ca77 100644 --- a/llvm/test/DebugInfo/X86/debug-macinfo-split-dwarf.ll +++ b/llvm/test/DebugInfo/X86/debug-macinfo-split-dwarf.ll @@ -8,6 +8,7 @@ ; CHECK: DW_AT_macro_info (0x00000000) ;CHECK-LABEL:.debug_macinfo.dwo contents: +;CHECK-NEXT: 0x00000000: ;CHECK-NEXT: DW_MACINFO_start_file - lineno: 0 filenum: 1 ;CHECK-NEXT: DW_MACINFO_start_file - lineno: 1 filenum: 2 ;CHECK-NEXT: DW_MACINFO_define - lineno: 1 macro: define_1 12 diff --git a/llvm/test/DebugInfo/X86/debug-macro.ll b/llvm/test/DebugInfo/X86/debug-macro.ll index 6a5ae1e7021631..fbcfab6610d5e5 100644 --- a/llvm/test/DebugInfo/X86/debug-macro.ll +++ b/llvm/test/DebugInfo/X86/debug-macro.ll @@ -16,6 +16,7 @@ ; CHECK-NOT: DW_AT_macro_info ; CHECK-LABEL: .debug_macinfo contents: +; CHECK-NEXT: 0x00000000: ; CHECK-NEXT: DW_MACINFO_define - lineno: 0 macro: NameCMD ValueCMD ; CHECK-NEXT: DW_MACINFO_start_file - lineno: 0 filenum: 1 ; CHECK-NEXT: DW_MACINFO_start_file - lineno: 9 filenum: 2 @@ -24,8 +25,9 @@ ; CHECK-NEXT: DW_MACINFO_end_file ; CHECK-NEXT: DW_MACINFO_undef - lineno: 10 macro: NameUndef2 ; CHECK-NEXT: DW_MACINFO_end_file - -; CHECK: DW_MACINFO_start_file - lineno: 0 filenum: 1 +; CHECK-EMPTY: +; CHECK-NEXT: 0x00000045: +; CHECK-NEXT: DW_MACINFO_start_file - lineno: 0 filenum: 1 ; CHECK-NEXT: DW_MACINFO_end_file ; CHECK-LABEL: .debug_line contents: diff --git a/llvm/test/DebugInfo/duplicate_dbgvalue.ll b/llvm/test/DebugInfo/duplicate_dbgvalue.ll new file mode 100644 index 00000000000000..2145b6ef5c83bf --- /dev/null +++ b/llvm/test/DebugInfo/duplicate_dbgvalue.ll @@ -0,0 +1,169 @@ +; RUN: opt -instcombine -S -o - < %s | FileCheck %s + +; CHECK-LABEL: %4 = load i32, i32* %i1_311 +; CHECK: call void @llvm.dbg.value(metadata i32 %4 +; Next instruction should not be duplicate dbg.value intrinsic. +; CHECK-NEXT: @f90io_sc_i_ldw + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;program main +;integer :: res +; res = mfun() +; print *, res +;contains +; function mfun() +; integer :: i1 +; i1 = 5 +; mfun = fun(i1) +; write (*,*) i1 +; end function +; function fun(a) +; integer, intent (in) :: a +; fun = a +; end function +;end program main +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; ModuleID = 'duplicate_dbgvalue.ll' +source_filename = "duplicate_dbgvalue.ll" +target datalayout = "e-p:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.BSS1 = type <{ [4 x i8] }> + +@.BSS1 = internal unnamed_addr global %struct.BSS1 zeroinitializer, align 32, !dbg !0 +@.C303_MAIN_ = internal constant i32 6 +@.C300_MAIN_ = internal constant [22 x i8] c"duplicate_dbgvalue.f90" +@.C302_MAIN_ = internal constant i32 4 +@.C283_MAIN_ = internal constant i32 0 +@.C283_main_mfun = internal constant i32 0 +@.C302_main_mfun = internal constant i32 6 +@.C300_main_mfun = internal constant [22 x i8] c"duplicate_dbgvalue.f90" +@.C313_main_mfun = internal constant i32 10 + +define void @MAIN_() local_unnamed_addr !dbg !2 { +L.entry: + call void (i8*, ...) bitcast (void (...)* @fort_init to void (i8*, ...)*)(i8* bitcast (i32* @.C283_MAIN_ to i8*)), !dbg !16 + %0 = call fastcc i32 @main_mfun(), !dbg !18 + store i32 %0, i32* bitcast (%struct.BSS1* @.BSS1 to i32*), align 32, !dbg !18 + call void (i8*, i8*, i64, ...) bitcast (void (...)* @f90io_src_info03a to void (i8*, i8*, i64, ...)*)(i8* bitcast (i32* @.C302_MAIN_ to i8*), i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.C300_MAIN_, i64 0, i64 0), i64 22), !dbg !23 + %1 = call i32 (i8*, i8*, i8*, i8*, ...) bitcast (i32 (...)* @f90io_print_init to i32 (i8*, i8*, i8*, i8*, ...)*)(i8* bitcast (i32* @.C303_MAIN_ to i8*), i8* null, i8* bitcast (i32* @.C283_MAIN_ to i8*), i8* bitcast (i32* @.C283_MAIN_ to i8*)), !dbg !23 + call void @llvm.dbg.value(metadata i32 %1, metadata !24, metadata !DIExpression()), !dbg !25 + %2 = load i32, i32* bitcast (%struct.BSS1* @.BSS1 to i32*), align 32, !dbg !23 + %3 = call i32 (i32, i32, ...) bitcast (i32 (...)* @f90io_sc_i_ldw to i32 (i32, i32, ...)*)(i32 %2, i32 25), !dbg !23 + call void @llvm.dbg.value(metadata i32 %3, metadata !24, metadata !DIExpression()), !dbg !25 + %4 = call i32 (...) @f90io_ldw_end(), !dbg !23 + call void @llvm.dbg.value(metadata i32 %4, metadata !24, metadata !DIExpression()), !dbg !25 + ret void, !dbg !26 +} + +define internal fastcc signext i32 @main_mfun() unnamed_addr !dbg !27 { +L.entry: + %i1_311 = alloca i32, align 4 + call void @llvm.dbg.declare(metadata i64* undef, metadata !31, metadata !DIExpression()), !dbg !33 + call void @llvm.dbg.declare(metadata i32* %i1_311, metadata !35, metadata !DIExpression()), !dbg !33 + store i32 5, i32* %i1_311, align 4, !dbg !36 + %0 = bitcast i32* %i1_311 to i64*, !dbg !41 + %1 = call fastcc float @main_fun(i64* %0), !dbg !41 + %2 = fptosi float %1 to i32, !dbg !41 + call void (i8*, i8*, i64, ...) bitcast (void (...)* @f90io_src_info03a to void (i8*, i8*, i64, ...)*)(i8* bitcast (i32* @.C313_main_mfun to i8*), i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.C300_main_mfun, i32 0, i32 0), i64 22), !dbg !42 + %3 = call i32 (i8*, i8*, i8*, i8*, ...) bitcast (i32 (...)* @f90io_print_init to i32 (i8*, i8*, i8*, i8*, ...)*)(i8* bitcast (i32* @.C302_main_mfun to i8*), i8* null, i8* bitcast (i32* @.C283_main_mfun to i8*), i8* bitcast (i32* @.C283_main_mfun to i8*)), !dbg !42 + call void @llvm.dbg.value(metadata i32 %3, metadata !43, metadata !DIExpression()), !dbg !33 + %4 = load i32, i32* %i1_311, align 4, !dbg !42 + call void @llvm.dbg.value(metadata i32 %4, metadata !35, metadata !DIExpression()), !dbg !33 + %5 = call i32 (i32, i32, ...) bitcast (i32 (...)* @f90io_sc_i_ldw to i32 (i32, i32, ...)*)(i32 %4, i32 25), !dbg !42 + call void @llvm.dbg.value(metadata i32 %5, metadata !43, metadata !DIExpression()), !dbg !33 + %6 = call i32 (...) @f90io_ldw_end(), !dbg !42 + call void @llvm.dbg.value(metadata i32 %6, metadata !43, metadata !DIExpression()), !dbg !33 + ret i32 %2, !dbg !44 +} + +define internal fastcc float @main_fun(i64* noalias %a) unnamed_addr !dbg !45 { +L.entry: + call void @llvm.dbg.declare(metadata i64* %a, metadata !50, metadata !DIExpression()), !dbg !51 + call void @llvm.dbg.declare(metadata i64* undef, metadata !53, metadata !DIExpression()), !dbg !51 + %0 = bitcast i64* %a to i32*, !dbg !54 + %1 = load i32, i32* %0, align 4, !dbg !54 + %2 = sitofp i32 %1 to float, !dbg !54 + ret float %2, !dbg !59 +} + +declare signext i32 @f90io_ldw_end(...) local_unnamed_addr + +declare signext i32 @f90io_sc_i_ldw(...) local_unnamed_addr + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +declare signext i32 @f90io_print_init(...) local_unnamed_addr + +declare void @f90io_src_info03a(...) local_unnamed_addr + +declare void @fort_init(...) local_unnamed_addr + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.module.flags = !{!14, !15} +!llvm.dbg.cu = !{!4} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "res", scope: !2, file: !3, type: !9, isLocal: true, isDefinition: true) +!2 = distinct !DISubprogram(name: "main", scope: !4, file: !3, line: 1, type: !12, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagMainSubprogram, unit: !4) +!3 = !DIFile(filename: "duplicate-dbgvalue.f90", directory: "/dir") +!4 = distinct !DICompileUnit(language: DW_LANG_Fortran90, file: !3, producer: " F90 Flang - 1.5", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !5, retainedTypes: !5, globals: !6, imports: !5) +!5 = !{} +!6 = !{!0, !7, !10} +!7 = !DIGlobalVariableExpression(var: !8, expr: !DIExpression()) +!8 = distinct !DIGlobalVariable(name: "res", scope: !4, file: !3, type: !9, isLocal: true, isDefinition: true) +!9 = !DIBasicType(name: "integer", size: 32, align: 32, encoding: DW_ATE_signed) +!10 = !DIGlobalVariableExpression(var: !11, expr: !DIExpression()) +!11 = distinct !DIGlobalVariable(name: "res", scope: !4, file: !3, type: !9, isLocal: true, isDefinition: true) +!12 = !DISubroutineType(cc: DW_CC_program, types: !13) +!13 = !{null} +!14 = !{i32 2, !"Dwarf Version", i32 4} +!15 = !{i32 2, !"Debug Info Version", i32 3} +!16 = !DILocation(line: 1, column: 1, scope: !17) +!17 = !DILexicalBlock(scope: !2, file: !3, line: 1, column: 1) +!18 = !DILocation(line: 3, column: 1, scope: !17) +!19 = !{!20, !20, i64 0} +!20 = !{!"t1.2", !21, i64 0} +!21 = !{!"unlimited ptr", !22, i64 0} +!22 = !{!"Flang FAA 1"} +!23 = !DILocation(line: 4, column: 1, scope: !17) +!24 = !DILocalVariable(scope: !17, file: !3, type: !9, flags: DIFlagArtificial) +!25 = !DILocation(line: 0, scope: !17) +!26 = !DILocation(line: 5, column: 1, scope: !17) +!27 = distinct !DISubprogram(name: "mfun", scope: !2, file: !3, line: 6, type: !28, scopeLine: 6, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !4) +!28 = !DISubroutineType(types: !29) +!29 = !{!30} +!30 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64) +!31 = !DILocalVariable(arg: 1, scope: !27, file: !3, type: !32, flags: DIFlagArtificial) +!32 = !DIBasicType(name: "uinteger*8", size: 64, align: 64, encoding: DW_ATE_unsigned) +!33 = !DILocation(line: 0, scope: !34) +!34 = !DILexicalBlock(scope: !27, file: !3, line: 6, column: 1) +!35 = !DILocalVariable(name: "i1", scope: !34, file: !3, type: !9) +!36 = !DILocation(line: 8, column: 1, scope: !34) +!37 = !{!38, !38, i64 0} +!38 = !{!"t2.2", !39, i64 0} +!39 = !{!"unlimited ptr", !40, i64 0} +!40 = !{!"Flang FAA 2"} +!41 = !DILocation(line: 9, column: 1, scope: !34) +!42 = !DILocation(line: 10, column: 1, scope: !34) +!43 = !DILocalVariable(scope: !34, file: !3, type: !9, flags: DIFlagArtificial) +!44 = !DILocation(line: 11, column: 1, scope: !34) +!45 = distinct !DISubprogram(name: "fun", scope: !2, file: !3, line: 12, type: !46, scopeLine: 12, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !4) +!46 = !DISubroutineType(types: !47) +!47 = !{!48, !9} +!48 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !49, size: 64, align: 64) +!49 = !DIBasicType(name: "real", size: 32, align: 32, encoding: DW_ATE_float) +!50 = !DILocalVariable(name: "a", arg: 1, scope: !45, file: !3, type: !9) +!51 = !DILocation(line: 0, scope: !52) +!52 = !DILexicalBlock(scope: !45, file: !3, line: 12, column: 1) +!53 = !DILocalVariable(arg: 2, scope: !45, file: !3, type: !32, flags: DIFlagArtificial) +!54 = !DILocation(line: 14, column: 1, scope: !52) +!55 = !{!56, !56, i64 0} +!56 = !{!"t3.2", !57, i64 0} +!57 = !{!"unlimited ptr", !58, i64 0} +!58 = !{!"Flang FAA 3"} +!59 = !DILocation(line: 15, column: 1, scope: !52) diff --git a/llvm/test/MC/X86/align-branch-64-system.s b/llvm/test/MC/X86/align-branch-64-system.s new file mode 100644 index 00000000000000..b62a4e3e136f8c --- /dev/null +++ b/llvm/test/MC/X86/align-branch-64-system.s @@ -0,0 +1,68 @@ + # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu --x86-align-branch-boundary=32 --x86-align-branch=jmp %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s + + # Exercise cases where we're enabling interrupts with one instruction delay + # and thus can't add a nop in between without changing behavior. + + .text + + # CHECK: 1e: sti + # CHECK: 1f: jmp + .p2align 5 + .rept 30 + int3 + .endr + sti + jmp baz + + # CHECK: 5c: movq %rax, %ss + # CHECK: 5f: jmp + .p2align 5 + .rept 28 + int3 + .endr + movq %rax, %ss + jmp baz + + # CHECK: 9d: movl %esi, %ss + # CHECK: 9f: jmp + .p2align 5 + .rept 29 + int3 + .endr + movl %esi, %ss + jmp baz + + # movw and movl are interchangeable since we're only using the low 16 bits. + # Both are generated as "MOV Sreg,r/m16**" (8E /r), but disassembled as movl + # CHECK: dd: movl %esi, %ss + # CHECK: df: jmp + .p2align 5 + .rept 29 + int3 + .endr + movw %si, %ss + jmp baz + + # CHECK: 11b: movw (%esi), %ss + # CHECK: 11e: jmp + .p2align 5 + .rept 27 + int3 + .endr + movw (%esi), %ss + jmp baz + + # CHECK: 15b: movw (%rsi), %ss + # CHECK: 15d: jmp + .p2align 5 + .rept 27 + int3 + .endr + movw (%rsi), %ss + jmp baz + + + int3 + .section ".text.other" +bar: + retq diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index bab23c924d64b8..4bfee73720f746 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -92,7 +92,6 @@ ; CHECK-O2-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O2-NEXT: Running pass: CrossDSOCFIPass ; CHECK-O2-NEXT: Running pass: LowerTypeTestsPass -; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}SimplifyCFGPass> ; CHECK-O2-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-O2-NEXT: Running pass: GlobalDCEPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll index 6326bec87a59f4..a6faeccb30dd19 100644 --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -80,7 +80,6 @@ ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: AttributorPass -; CHECK-POSTLINK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 9c5fdc6458a54b..0c2e9328c11a4c 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -49,7 +49,6 @@ ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: AttributorPass -; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 01d951703a6b67..29d379ab54ad79 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -60,7 +60,6 @@ ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion ; CHECK-O-NEXT: Running pass: AttributorPass -; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll b/llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll deleted file mode 100644 index bc7a0e36dfa3b5..00000000000000 --- a/llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll +++ /dev/null @@ -1,50 +0,0 @@ -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-grtev4-linux-gnu" - -%struct.A = type { i32 (...)** } -%struct.B = type { i32 (...)** } - -@_ZTV1B = linkonce_odr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1nEi to i8*)] }, !type !0 - -$test = comdat any - -; CHECK-IR-LABEL: define i32 @test -define linkonce_odr i32 @test(%struct.A* %obj, i32 %a) comdat { -entry: - %0 = bitcast %struct.A* %obj to i8** - %vtable5 = load i8*, i8** %0 - - %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 8, metadata !"_ZTS1A") - %2 = extractvalue { i8*, i1 } %1, 1 - br i1 %2, label %cont, label %trap - -trap: - tail call void @llvm.trap() - unreachable - -cont: - %3 = extractvalue { i8*, i1 } %1, 0 - %4 = bitcast i8* %3 to i32 (%struct.A*, i32)* - - ; Check that the call was devirtualized. - ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi - %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a) - - ret i32 %call -} -; CHECK-IR-LABEL: ret i32 -; CHECK-IR-LABEL: } - -declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) -declare void @llvm.trap() - -define internal i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) { -entry: - ret i32 0 -} -define internal i32 @_ZN1B1nEi(%struct.B* %this, i32 %a) { -entry: - ret i32 0 -} - -!0 = !{i64 16, !"_ZTS1B"} diff --git a/llvm/test/ThinLTO/X86/cfi-unsat.ll b/llvm/test/ThinLTO/X86/cfi-unsat.ll deleted file mode 100644 index 24e837303c2ade..00000000000000 --- a/llvm/test/ThinLTO/X86/cfi-unsat.ll +++ /dev/null @@ -1,74 +0,0 @@ -; REQUIRES: x86-registered-target - -; Test CFI devirtualization through the thin link and backend when -; a type id is Unsat (not used on any global's type metadata). -; -; In this test case, the first module is split and will import a resolution -; for its type test. The resolution would be exported by the second -; module, which is set up so that it does not get split (treated as regular -; LTO because it does not have any external globals from which to create -; a unique module ID). We should not actually get any resolution for the -; type id in this case, since no globals include it in their type metadata, -; so the resolution is Unsat and the type.checked.load instructions are -; converted to type tests that evaluate to false. - -; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s -; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1.o %p/Inputs/cfi-unsat.ll - -; RUN: llvm-lto2 run %t.o %t1.o -save-temps -use-new-pm -pass-remarks=. \ -; RUN: -whole-program-visibility \ -; RUN: -o %t3 \ -; RUN: -r=%t.o,test2,px \ -; RUN: -r=%t1.o,_ZTV1B,px \ -; RUN: -r=%t1.o,test,px -; RUN: llvm-dis %t3.index.bc -o - | FileCheck %s --check-prefix=INDEX -; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR0 -; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1 - -; INDEX-NOT: "typeid:" - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-grtev4-linux-gnu" - -%struct.A = type { i32 (...)** } - -$test2 = comdat any - -; CHECK-IR0: define weak_odr i32 @test -define linkonce_odr i32 @test2(%struct.A* %obj, i32 %a) comdat { -; CHECK-IR0-NEXT: entry: -entry: -; CHECK-IR0-NEXT: %0 = bitcast - %0 = bitcast %struct.A* %obj to i8** -; CHECK-IR0-NEXT: %vtable5 = - %vtable5 = load i8*, i8** %0 - -; CHECK-IR0-NEXT: tail call void @llvm.trap() -; CHECK-IR0-NEXT: unreachable - - %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 8, metadata !"_ZTS1A") - %2 = extractvalue { i8*, i1 } %1, 1 - br i1 %2, label %cont, label %trap - -trap: - tail call void @llvm.trap() - unreachable - -cont: - %3 = extractvalue { i8*, i1 } %1, 0 - %4 = bitcast i8* %3 to i32 (%struct.A*, i32)* - - %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a) - - ret i32 %call -; CHECK-IR0-NEXT: } -} - -; CHECK-IR1: define weak_odr i32 @test2 -; CHECK-IR1-NEXT: entry: -; CHECK-IR1-NEXT: tail call void @llvm.trap() -; CHECK-IR1-NEXT: unreachable -; CHECK-IR1-NEXT: } - -declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) -declare void @llvm.trap() diff --git a/llvm/test/Transforms/InstCombine/multi-use-load-casts.ll b/llvm/test/Transforms/InstCombine/multi-use-load-casts.ll new file mode 100644 index 00000000000000..147d893e285eea --- /dev/null +++ b/llvm/test/Transforms/InstCombine/multi-use-load-casts.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +; Positive test - all uses are identical casts. +define void @t0(i1 zeroext %c0, i1 zeroext %c1, i64* nocapture readonly %src) { +; CHECK-LABEL: @t0( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[DATA:%.*]] = load i64, i64* [[SRC:%.*]], align 8 +; CHECK-NEXT: br i1 [[C0:%.*]], label [[BB3:%.*]], label [[BB7:%.*]] +; CHECK: bb3: +; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB4:%.*]], label [[BB5:%.*]] +; CHECK: bb4: +; CHECK-NEXT: tail call void @abort() +; CHECK-NEXT: unreachable +; CHECK: bb5: +; CHECK-NEXT: [[PTR0:%.*]] = inttoptr i64 [[DATA]] to i32* +; CHECK-NEXT: tail call void @sink0(i32* [[PTR0]]) +; CHECK-NEXT: br label [[BB9:%.*]] +; CHECK: bb7: +; CHECK-NEXT: [[PTR1:%.*]] = inttoptr i64 [[DATA]] to i32* +; CHECK-NEXT: tail call void @sink1(i32* [[PTR1]]) +; CHECK-NEXT: br label [[BB9]] +; CHECK: bb9: +; CHECK-NEXT: ret void +; +bb: + %data = load i64, i64* %src, align 8 + br i1 %c0, label %bb3, label %bb7 + +bb3: + br i1 %c1, label %bb4, label %bb5 + +bb4: + tail call void @abort() + unreachable + +bb5: + %ptr0 = inttoptr i64 %data to i32* + tail call void @sink0(i32* %ptr0) + br label %bb9 + +bb7: + %ptr1 = inttoptr i64 %data to i32* + tail call void @sink1(i32* %ptr1) + br label %bb9 + +bb9: + ret void +} + +; Negative test - all uses are casts, but non-identical ones. +define void @n1(i1 zeroext %c0, i1 zeroext %c1, i64* nocapture readonly %src) { +; CHECK-LABEL: @n1( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[DATA:%.*]] = load i64, i64* [[SRC:%.*]], align 8 +; CHECK-NEXT: br i1 [[C0:%.*]], label [[BB3:%.*]], label [[BB7:%.*]] +; CHECK: bb3: +; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB4:%.*]], label [[BB5:%.*]] +; CHECK: bb4: +; CHECK-NEXT: tail call void @abort() +; CHECK-NEXT: unreachable +; CHECK: bb5: +; CHECK-NEXT: [[PTR0:%.*]] = inttoptr i64 [[DATA]] to i32* +; CHECK-NEXT: tail call void @sink0(i32* [[PTR0]]) +; CHECK-NEXT: br label [[BB9:%.*]] +; CHECK: bb7: +; CHECK-NEXT: [[VEC:%.*]] = bitcast i64 [[DATA]] to <2 x i32> +; CHECK-NEXT: tail call void @sink2(<2 x i32> [[VEC]]) +; CHECK-NEXT: br label [[BB9]] +; CHECK: bb9: +; CHECK-NEXT: ret void +; +bb: + %data = load i64, i64* %src, align 8 + br i1 %c0, label %bb3, label %bb7 + +bb3: + br i1 %c1, label %bb4, label %bb5 + +bb4: + tail call void @abort() + unreachable + +bb5: + %ptr0 = inttoptr i64 %data to i32* + tail call void @sink0(i32* %ptr0) + br label %bb9 + +bb7: + %vec = bitcast i64 %data to <2 x i32> ; different cast + tail call void @sink2(<2 x i32> %vec) + br label %bb9 + +bb9: + ret void +} + +; Negative test - have non-cast users. +define void @n2(i1 zeroext %c0, i1 zeroext %c1, i64* nocapture readonly %src) { +; CHECK-LABEL: @n2( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[DATA:%.*]] = load i64, i64* [[SRC:%.*]], align 8 +; CHECK-NEXT: br i1 [[C0:%.*]], label [[BB3:%.*]], label [[BB7:%.*]] +; CHECK: bb3: +; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB4:%.*]], label [[BB5:%.*]] +; CHECK: bb4: +; CHECK-NEXT: tail call void @abort() +; CHECK-NEXT: unreachable +; CHECK: bb5: +; CHECK-NEXT: [[PTR0:%.*]] = inttoptr i64 [[DATA]] to i32* +; CHECK-NEXT: tail call void @sink0(i32* [[PTR0]]) +; CHECK-NEXT: br label [[BB9:%.*]] +; CHECK: bb7: +; CHECK-NEXT: tail call void @sink3(i64 [[DATA]]) +; CHECK-NEXT: br label [[BB9]] +; CHECK: bb9: +; CHECK-NEXT: ret void +; +bb: + %data = load i64, i64* %src, align 8 + br i1 %c0, label %bb3, label %bb7 + +bb3: + br i1 %c1, label %bb4, label %bb5 + +bb4: + tail call void @abort() + unreachable + +bb5: + %ptr0 = inttoptr i64 %data to i32* + tail call void @sink0(i32* %ptr0) + br label %bb9 + +bb7: + tail call void @sink3(i64 %data) ; non-cast use + br label %bb9 + +bb9: + ret void +} + +declare void @abort() + +declare void @sink0(i32*) + +declare void @sink1(i32*) + +declare void @sink2(<2 x i32>) + +declare void @sink3(i64) diff --git a/llvm/test/Transforms/LICM/freeze.ll b/llvm/test/Transforms/LICM/freeze.ll new file mode 100644 index 00000000000000..f17c270c97bec7 --- /dev/null +++ b/llvm/test/Transforms/LICM/freeze.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -S < %s | FileCheck %s + +define void @hoist(i1 %a) { +; CHECK-LABEL: @hoist( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = freeze i1 [[A:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: call void @use(i1 [[B]]) +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop +loop: + %b = freeze i1 %a + call void @use(i1 %b) + br label %loop +} + +define i1 @sink(i1 %a) { +; CHECK-LABEL: @sink( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[C:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[FR_LE:%.*]] = freeze i1 [[A:%.*]] +; CHECK-NEXT: ret i1 [[FR_LE]] +; +entry: + br label %loop +loop: + %fr = freeze i1 %a + %c = call i1 @cond() + br i1 %c, label %loop, label %exit +exit: + ret i1 %fr +} + +declare i1 @cond() +declare void @use(i1) diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll index 5c84884c66dea0..f0fbf3d6d49b1d 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll @@ -403,76 +403,11 @@ for.end: ret void } -; In this case we cannot peel the inner loop, because the condition involves -; the outer induction variable. -define void @test5(i32 %k) { -; CHECK-LABEL: @test5( -; CHECK-NEXT: for.body.lr.ph: -; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] -; CHECK: outer.header: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[J]], 2 -; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: call void @f1() -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: if.else: -; CHECK-NEXT: call void @f2() -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]] -; CHECK: outer.inc: -; CHECK-NEXT: [[J_INC]] = add nsw i32 [[J]], 1 -; CHECK-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]] -; CHECK-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END:%.*]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -for.body.lr.ph: - br label %outer.header - -outer.header: - %j = phi i32 [ 0, %for.body.lr.ph ], [ %j.inc, %outer.inc ] - br label %for.body - -for.body: - %i.05 = phi i32 [ 0, %outer.header ], [ %inc, %for.inc ] - %cmp1 = icmp ult i32 %j, 2 - br i1 %cmp1, label %if.then, label %if.else - -if.then: - call void @f1() - br label %for.inc - -if.else: - call void @f2() - br label %for.inc - -for.inc: - %inc = add nsw i32 %i.05, 1 - %cmp = icmp slt i32 %inc, %k - br i1 %cmp, label %for.body, label %outer.inc - -outer.inc: - %j.inc = add nsw i32 %j, 1 - %outer.cmp = icmp slt i32 %j.inc, %k - br i1 %outer.cmp, label %outer.header, label %for.end - - -for.end: - ret void -} - ; In this test, the condition involves 2 AddRecs. Without evaluating both ; AddRecs, we cannot prove that the condition becomes known in the loop body ; after peeling. -define void @test6(i32 %k) { -; CHECK-LABEL: @test6( +define void @test5(i32 %k) { +; CHECK-LABEL: @test5( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -521,8 +456,8 @@ for.end: ret void } -define void @test7(i32 %k) { -; CHECK-LABEL: @test7( +define void @test6(i32 %k) { +; CHECK-LABEL: @test6( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] ; CHECK: for.body.peel.begin: @@ -615,8 +550,8 @@ for.end: ret void } -define void @test8(i32 %k) { -; CHECK-LABEL: @test8( +define void @test7(i32 %k) { +; CHECK-LABEL: @test7( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] ; CHECK: for.body.peel.begin: @@ -711,8 +646,8 @@ for.end: ; Comparison with non-monotonic predicate due to possible wrapping, loop ; body cannot be simplified. -define void @test9(i32 %k) { -; CHECK-LABEL: @test9( +define void @test8(i32 %k) { +; CHECK-LABEL: @test8( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -751,8 +686,8 @@ for.end: } ; CHECK-NOT: llvm.loop.unroll.disable -define void @test_10__peel_first_iter_via_slt_pred(i32 %len) { -; CHECK-LABEL: @test_10__peel_first_iter_via_slt_pred( +define void @test_9__peel_first_iter_via_slt_pred(i32 %len) { +; CHECK-LABEL: @test_9__peel_first_iter_via_slt_pred( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] @@ -818,8 +753,8 @@ if.end: ; preds = %if.then, %for.body br i1 %exitcond, label %for.cond.cleanup, label %for.body } -define void @test_11__peel_first_iter_via_sgt_pred(i32 %len) { -; CHECK-LABEL: @test_11__peel_first_iter_via_sgt_pred( +define void @test_10__peel_first_iter_via_sgt_pred(i32 %len) { +; CHECK-LABEL: @test_10__peel_first_iter_via_sgt_pred( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] @@ -887,8 +822,8 @@ if.end: ; preds = %if.then, %for.body ; NOTE: here we should only peel the first iteration, ; i.e. all calls to sink() must stay in loop. -define void @test12__peel_first_iter_via_eq_pred(i32 %len) { -; CHECK-LABEL: @test12__peel_first_iter_via_eq_pred( +define void @test11__peel_first_iter_via_eq_pred(i32 %len) { +; CHECK-LABEL: @test11__peel_first_iter_via_eq_pred( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] @@ -956,8 +891,8 @@ if.end: ; preds = %if.then, %for.body ; NOTE: here we should only peel the first iteration, ; i.e. all calls to sink() must stay in loop. -define void @test13__peel_first_iter_via_ne_pred(i32 %len) { -; CHECK-LABEL: @test13__peel_first_iter_via_ne_pred( +define void @test12__peel_first_iter_via_ne_pred(i32 %len) { +; CHECK-LABEL: @test12__peel_first_iter_via_ne_pred( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] @@ -1024,8 +959,8 @@ if.end: ; preds = %if.then, %for.body } ; No peeling is profitable here. -define void @test14__ivar_mod2_is_1(i32 %len) { -; CHECK-LABEL: @test14__ivar_mod2_is_1( +define void @test13__ivar_mod2_is_1(i32 %len) { +; CHECK-LABEL: @test13__ivar_mod2_is_1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] @@ -1074,8 +1009,8 @@ if.end: ; preds = %if.then, %for.body } ; No peeling is profitable here. -define void @test15__ivar_mod2_is_0(i32 %len) { -; CHECK-LABEL: @test15__ivar_mod2_is_0( +define void @test14__ivar_mod2_is_0(i32 %len) { +; CHECK-LABEL: @test14__ivar_mod2_is_0( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] @@ -1123,10 +1058,10 @@ if.end: ; preds = %if.then, %for.body br i1 %exitcond, label %for.cond.cleanup, label %for.body } -; Similar to @test7, we need to peel one extra iteration, and we can't do that +; Similar to @test6, we need to peel one extra iteration, and we can't do that ; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all. -define void @test16(i32 %k) { -; CHECK-LABEL: @test16( +define void @test15(i32 %k) { +; CHECK-LABEL: @test15( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -1164,10 +1099,10 @@ for.end: ret void } -; Similar to @test8, we need to peel one extra iteration, and we can't do that +; Similar to @test7, we need to peel one extra iteration, and we can't do that ; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all. -define void @test17(i32 %k) { -; CHECK-LABEL: @test17( +define void @test16(i32 %k) { +; CHECK-LABEL: @test16( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-nests.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-nests.ll new file mode 100644 index 00000000000000..dc1d9be860736b --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-nests.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -verify-dom-info | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -unroll-allow-loop-nests-peeling -verify-dom-info | FileCheck %s --check-prefix PEELED + +declare void @f1() +declare void @f2() + +; In this case we cannot peel the inner loop, because the condition involves +; the outer induction variable. +; Peel the loop nest if allowed by the flag -unroll-allow-loop-nests-peeling. +define void @test1(i32 %k) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[J]], 2 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: if.else: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]] +; CHECK: outer.inc: +; CHECK-NEXT: [[J_INC]] = add nsw i32 [[J]], 1 +; CHECK-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]] +; CHECK-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END:%.*]], !llvm.loop !{{.*}} +; CHECK: for.end: +; CHECK-NEXT: ret void +; +; PEELED-LABEL: @test1( +; PEELED-NEXT: for.body.lr.ph: +; PEELED-NEXT: br label [[OUTER_HEADER_PEEL_BEGIN:%.*]] +; PEELED: outer.header.peel.begin: +; PEELED-NEXT: br label [[OUTER_HEADER_PEEL:%.*]] +; PEELED: outer.header.peel: +; PEELED-NEXT: br label [[FOR_BODY_PEEL:%.*]] +; PEELED: for.body.peel: +; PEELED-NEXT: [[I_05_PEEL:%.*]] = phi i32 [ 0, [[OUTER_HEADER_PEEL]] ], [ [[INC_PEEL:%.*]], [[FOR_INC_PEEL:%.*]] ] +; PEELED-NEXT: [[CMP1_PEEL:%.*]] = icmp ult i32 0, 2 +; PEELED-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_ELSE_PEEL:%.*]] +; PEELED: if.else.peel: +; PEELED-NEXT: call void @f2() +; PEELED-NEXT: br label [[FOR_INC_PEEL]] +; PEELED: if.then.peel: +; PEELED-NEXT: call void @f1() +; PEELED-NEXT: br label [[FOR_INC_PEEL]] +; PEELED: for.inc.peel: +; PEELED-NEXT: [[INC_PEEL]] = add nsw i32 [[I_05_PEEL]], 1 +; PEELED-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]] +; PEELED-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL]], label [[OUTER_INC_PEEL:%.*]] +; PEELED: outer.inc.peel: +; PEELED-NEXT: [[J_INC_PEEL:%.*]] = add nsw i32 0, 1 +; PEELED-NEXT: [[OUTER_CMP_PEEL:%.*]] = icmp slt i32 [[J_INC_PEEL]], [[K]] +; PEELED-NEXT: br i1 [[OUTER_CMP_PEEL]], label [[OUTER_HEADER_PEEL_NEXT:%.*]], label [[FOR_END:%[^,]*]] +; Verify that MD_loop metadata is dropped. +; PEELED-NOT: , !llvm.loop !{{[0-9]*}} +; PEELED: outer.header.peel.next: +; PEELED-NEXT: br label [[OUTER_HEADER_PEEL2:%.*]] +; PEELED: outer.header.peel2: +; PEELED-NEXT: br label [[FOR_BODY_PEEL3:%.*]] +; PEELED: for.body.peel3: +; PEELED-NEXT: [[I_05_PEEL4:%.*]] = phi i32 [ 0, [[OUTER_HEADER_PEEL2]] ], [ [[INC_PEEL9:%.*]], [[FOR_INC_PEEL8:%.*]] ] +; PEELED-NEXT: [[CMP1_PEEL5:%.*]] = icmp ult i32 [[J_INC_PEEL]], 2 +; PEELED-NEXT: br i1 [[CMP1_PEEL5]], label [[IF_THEN_PEEL7:%.*]], label [[IF_ELSE_PEEL6:%.*]] +; PEELED: if.else.peel6: +; PEELED-NEXT: call void @f2() +; PEELED-NEXT: br label [[FOR_INC_PEEL8]] +; PEELED: if.then.peel7: +; PEELED-NEXT: call void @f1() +; PEELED-NEXT: br label [[FOR_INC_PEEL8]] +; PEELED: for.inc.peel8: +; PEELED-NEXT: [[INC_PEEL9]] = add nsw i32 [[I_05_PEEL4]], 1 +; PEELED-NEXT: [[CMP_PEEL10:%.*]] = icmp slt i32 [[INC_PEEL9]], [[K]] +; PEELED-NEXT: br i1 [[CMP_PEEL10]], label [[FOR_BODY_PEEL3]], label [[OUTER_INC_PEEL11:%.*]] +; PEELED: outer.inc.peel11: +; PEELED-NEXT: [[J_INC_PEEL12:%.*]] = add nsw i32 [[J_INC_PEEL]], 1 +; PEELED-NEXT: [[OUTER_CMP_PEEL13:%.*]] = icmp slt i32 [[J_INC_PEEL12]], [[K]] +; PEELED-NEXT: br i1 [[OUTER_CMP_PEEL13]], label [[OUTER_HEADER_PEEL_NEXT1:%.*]], label [[FOR_END]] +; Verify that MD_loop metadata is dropped. +; PEELED-NOT: , !llvm.loop !{{[0-9]*}} +; PEELED: outer.header.peel.next1: +; PEELED-NEXT: br label [[OUTER_HEADER_PEEL_NEXT14:%.*]] +; PEELED: outer.header.peel.next14: +; PEELED-NEXT: br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]] +; PEELED: for.body.lr.ph.peel.newph: +; PEELED-NEXT: br label [[OUTER_HEADER:%.*]] +; PEELED: outer.header: +; PEELED-NEXT: [[J:%.*]] = phi i32 [ [[J_INC_PEEL12]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ] +; PEELED-NEXT: br label [[FOR_BODY:%.*]] +; PEELED: for.body: +; PEELED-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; PEELED-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; PEELED: if.then: +; PEELED-NEXT: call void @f1() +; PEELED-NEXT: br label [[FOR_INC]] +; PEELED: if.else: +; PEELED-NEXT: call void @f2() +; PEELED-NEXT: br label [[FOR_INC]] +; PEELED: for.inc: +; PEELED-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; PEELED-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]] +; PEELED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]] +; PEELED: outer.inc: +; PEELED-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 +; PEELED-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]] +; PEELED-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !{{.*}} +; PEELED: for.end.loopexit: +; PEELED-NEXT: br label [[FOR_END]] +; PEELED: for.end: +; PEELED-NEXT: ret void +; +for.body.lr.ph: + br label %outer.header + +outer.header: + %j = phi i32 [ 0, %for.body.lr.ph ], [ %j.inc, %outer.inc ] + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %outer.header ], [ %inc, %for.inc ] + %cmp1 = icmp ult i32 %j, 2 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + call void @f1() + br label %for.inc + +if.else: + call void @f2() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %outer.inc + +outer.inc: + %j.inc = add nsw i32 %j, 1 + %outer.cmp = icmp slt i32 %j.inc, %k + br i1 %outer.cmp, label %outer.header, label %for.end, !llvm.loop !0 + +for.end: + ret void +} + +!0 = distinct !{!0} diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll new file mode 100644 index 00000000000000..0886b8eca2ef3d --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -0,0 +1,1358 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -force-reduction-intrinsics -dce -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { +; CHECK-LABEL: @reduction_sum( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] +; CHECK: .lr.ph.preheader: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND2]] +; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP11]] = add <4 x i32> [[TMP10]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[TMP21:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[SUM_02]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], [[TMP15]] +; CHECK-NEXT: [[TMP21]] = add i32 [[TMP20]], [[TMP17]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !2 +; CHECK: ._crit_edge.loopexit: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] +; CHECK: ._crit_edge: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %5 = load i32, i32* %4, align 4 + %6 = trunc i64 %indvars.iv to i32 + %7 = add i32 %sum.02, %6 + %8 = add i32 %7, %3 + %9 = add i32 %8, %5 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { +; CHECK-LABEL: @reduction_prod( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] +; CHECK: .lr.ph.preheader: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[VEC_PHI]], [[VEC_IND2]] +; CHECK-NEXT: [[TMP10:%.*]] = mul <4 x i32> [[TMP9]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP11]] = mul <4 x i32> [[TMP10]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP11]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 1, [[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[PROD_02:%.*]] = phi i32 [ [[TMP21:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[PROD_02]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], [[TMP15]] +; CHECK-NEXT: [[TMP21]] = mul i32 [[TMP20]], [[TMP17]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !5 +; CHECK: ._crit_edge.loopexit: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] +; CHECK: ._crit_edge: +; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ 1, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] +; + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %prod.02 = phi i32 [ %9, %.lr.ph ], [ 1, %0 ] + %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %5 = load i32, i32* %4, align 4 + %6 = trunc i64 %indvars.iv to i32 + %7 = mul i32 %prod.02, %6 + %8 = mul i32 %7, %3 + %9 = mul i32 %8, %5 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + %prod.0.lcssa = phi i32 [ 1, %0 ], [ %9, %.lr.ph ] + ret i32 %prod.0.lcssa +} + +define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { +; CHECK-LABEL: @reduction_mix( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] +; CHECK: .lr.ph.preheader: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND2]] +; CHECK-NEXT: [[TMP11]] = add <4 x i32> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[TMP21:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = mul nsw i32 [[TMP17]], [[TMP15]] +; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[SUM_02]], [[TMP19]] +; CHECK-NEXT: [[TMP21]] = add i32 [[TMP20]], [[TMP18]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !7 +; CHECK: ._crit_edge.loopexit: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] +; CHECK: ._crit_edge: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %5 = load i32, i32* %4, align 4 + %6 = mul nsw i32 %5, %3 + %7 = trunc i64 %indvars.iv to i32 + %8 = add i32 %sum.02, %7 + %9 = add i32 %8, %6 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { +; CHECK-LABEL: @reduction_mul( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] +; CHECK: .lr.ph.preheader: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP10]] = mul <4 x i32> [[TMP9]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP10]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 19, [[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[TMP18:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[SUM_02]], [[TMP14]] +; CHECK-NEXT: [[TMP18]] = mul i32 [[TMP17]], [[TMP16]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !9 +; CHECK: ._crit_edge.loopexit: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP18]], [[DOTLR_PH]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] +; CHECK: ._crit_edge: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %sum.02 = phi i32 [ %7, %.lr.ph ], [ 19, %0 ] + %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %5 = load i32, i32* %4, align 4 + %6 = mul i32 %sum.02, %3 + %7 = mul i32 %6, %5 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + %sum.0.lcssa = phi i32 [ 0, %0 ], [ %7, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out, i32 %n) nounwind uwtable readonly ssp { +; CHECK-LABEL: @start_at_non_zero( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[COEFF:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8]] = add <4 x i32> [[TMP7]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 120, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_09:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[COEFF]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], [[TMP11]] +; CHECK-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[SUM_09]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !11 +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 120, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %in, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %coeff, i64 %indvars.iv + %1 = load i32, i32* %arrayidx2, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, %sum.09 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ 120, %entry ], [ %add, %for.body ] + ret i32 %sum.0.lcssa +} + +define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly { +; CHECK-LABEL: @reduction_and( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8]] = and <4 x i32> [[TMP7]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !12 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ -1, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[AND:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = and i32 [[RESULT_08]], [[TMP11]] +; CHECK-NEXT: [[AND]] = and i32 [[ADD]], [[TMP12]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !13 +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[AND_LCSSA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = and i32 %result.08, %0 + %and = and i32 %add, %1 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ -1, %entry ], [ %and, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly { +; CHECK-LABEL: @reduction_or( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8]] = or <4 x i32> [[TMP7]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !14 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[OR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +; CHECK-NEXT: [[OR]] = or i32 [[ADD]], [[RESULT_08]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !15 +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OR_LCSSA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %or = or i32 %add, %result.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %or, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly { +; CHECK-LABEL: @reduction_xor( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8]] = xor <4 x i32> [[TMP7]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !16 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[XOR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +; CHECK-NEXT: [[XOR]] = xor i32 [[ADD]], [[RESULT_08]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !17 +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[XOR_LCSSA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %xor = xor i32 %add, %result.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ] + ret i32 %result.0.lcssa +} + +define float @reduction_fadd(i32 %n, float* nocapture %A, float* nocapture %B) nounwind uwtable readonly { +; CHECK-LABEL: @reduction_fadd( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8]] = fadd fast <4 x float> [[TMP7]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !18 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[FADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[RESULT_08]], [[TMP11]] +; CHECK-NEXT: [[FADD]] = fadd fast float [[ADD]], [[TMP12]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !19 +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[FADD_LCSSA:%.*]] = phi float [ [[FADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[FADD_LCSSA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] +; +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ] + %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv + %1 = load float, float* %arrayidx2, align 4 + %add = fadd fast float %result.08, %0 + %fadd = fadd fast float %add, %1 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi float [ 0.0, %entry ], [ %fadd, %for.body ] + ret float %result.0.lcssa +} + +define float @reduction_fmul(i32 %n, float* nocapture %A, float* nocapture %B) nounwind uwtable readonly { +; CHECK-LABEL: @reduction_fmul( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8]] = fmul fast <4 x float> [[TMP7]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !20 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[FMUL:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fmul fast float [[RESULT_08]], [[TMP11]] +; CHECK-NEXT: [[FMUL]] = fmul fast float [[ADD]], [[TMP12]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !21 +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[FMUL_LCSSA:%.*]] = phi float [ [[FMUL]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[FMUL_LCSSA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] +; +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ] + %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv + %1 = load float, float* %arrayidx2, align 4 + %add = fmul fast float %result.08, %0 + %fmul = fmul fast float %add, %1 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi float [ 0.0, %entry ], [ %fmul, %for.body ] + ret float %result.0.lcssa +} + +define i32 @reduction_min(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly { +; CHECK-LABEL: @reduction_min( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !22 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP6]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 1000, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[RESULT_08]], [[TMP9]] +; CHECK-NEXT: [[V0]] = select i1 [[C0]], i32 [[RESULT_08]], i32 [[TMP9]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !23 +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[V0_LCSSA:%.*]] = phi i32 [ [[V0]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[V0_LCSSA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %c0 = icmp slt i32 %result.08, %0 + %v0 = select i1 %c0, i32 %result.08, i32 %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %v0, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_max(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly { +; CHECK-LABEL: @reduction_max( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !24 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> [[TMP6]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 1000, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[C0:%.*]] = icmp ugt i32 [[RESULT_08]], [[TMP9]] +; CHECK-NEXT: [[V0]] = select i1 [[C0]], i32 [[RESULT_08]], i32 [[TMP9]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !25 +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[V0_LCSSA:%.*]] = phi i32 [ [[V0]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[V0_LCSSA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %c0 = icmp ugt i32 %result.08, %0 + %v0 = select i1 %c0, i32 %result.08, i32 %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ 0, %entry ], [ %v0, %for.body ] + ret i32 %result.0.lcssa +} + +; Sub we can create a reduction, but not inloop +define i32 @reduction_sub_lhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly { +; CHECK-LABEL: @reduction_sub_lhs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !26 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[SUB:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[SUB]] = sub nsw i32 [[X_05]], [[TMP8]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !27 +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SUB_LCSSA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[X_0_LCSSA]] +; +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %sub = sub nsw i32 %x.05, %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ] + ret i32 %x.0.lcssa +} + +; Conditional reductions with multi-input phis. +define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) { +; CHECK-LABEL: @reduction_conditional( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> , float [[S:%.*]], i32 0 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], +; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i1> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], +; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP8]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP5]], +; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP9]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]] +; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP13]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !28 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI3]]) +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br i1 undef, label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then8: +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: if.else: +; CHECK-NEXT: br i1 undef, label [[IF_THEN16:%.*]], label [[FOR_INC]] +; CHECK: if.then16: +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: br i1 undef, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop !29 +; CHECK: for.end: +; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ undef, [[FOR_INC]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret float [[SUM_1_LCSSA]] +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ] + %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv + %1 = load float, float* %arrayidx2, align 4 + %cmp3 = fcmp ogt float %0, %1 + br i1 %cmp3, label %if.then, label %for.inc + +if.then: + %cmp6 = fcmp ogt float %1, 1.000000e+00 + br i1 %cmp6, label %if.then8, label %if.else + +if.then8: + %add = fadd fast float %sum.033, %0 + br label %for.inc + +if.else: + %cmp14 = fcmp ogt float %0, 2.000000e+00 + br i1 %cmp14, label %if.then16, label %for.inc + +if.then16: + %add19 = fadd fast float %sum.033, %1 + br label %for.inc + +for.inc: + %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ %sum.033, %if.else ], [ %sum.033, %for.body ] + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 128 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + %sum.1.lcssa = phi float [ %sum.1, %for.inc ] + ret float %sum.1.lcssa +} + +define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) { +; CHECK-LABEL: @reduction_sum_multiuse( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[END:%.*]] +; CHECK: .lr.ph.preheader: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND2]] +; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP11]] = add <4 x i32> [[TMP10]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !30 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[TMP21:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[SUM_02]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], [[TMP15]] +; CHECK-NEXT: [[TMP21]] = add i32 [[TMP20]], [[TMP17]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !31 +; CHECK: ._crit_edge: +; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_COPY:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[F1:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[SUM_LCSSA]], [[DOT_CRIT_EDGE]] ] +; CHECK-NEXT: [[F2:%.*]] = phi i32 [ 0, [[TMP0]] ], [ [[SUM_COPY]], [[DOT_CRIT_EDGE]] ] +; CHECK-NEXT: [[FINAL:%.*]] = add i32 [[F1]], [[F2]] +; CHECK-NEXT: ret i32 [[FINAL]] +; + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph.preheader, label %end +.lr.ph.preheader: ; preds = %0 + br label %.lr.ph + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %5 = load i32, i32* %4, align 4 + %6 = trunc i64 %indvars.iv to i32 + %7 = add i32 %sum.02, %6 + %8 = add i32 %7, %3 + %9 = add i32 %8, %5 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + %sum.lcssa = phi i32 [ %9, %.lr.ph ] + %sum.copy = phi i32 [ %9, %.lr.ph ] + br label %end + +end: + %f1 = phi i32 [ 0, %0 ], [ %sum.lcssa, %._crit_edge ] + %f2 = phi i32 [ 0, %0 ], [ %sum.copy, %._crit_edge ] + %final = add i32 %f1, %f2 + ret i32 %final +} + +; Predicated loop, cannot (yet) use in-loop reductions. +define i32 @reduction_predicated(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { +; CHECK-LABEL: @reduction_predicated( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] +; CHECK: .lr.ph.preheader: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP3]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[TMP3]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[PRED_LOAD_CONTINUE14]] ] +; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[PRED_LOAD_CONTINUE14]] ] +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK: pred.load.if: +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> undef, i32 [[TMP10]], i32 0 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK: pred.load.continue: +; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP11]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK: pred.load.if1: +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP15]], i32 1 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] +; CHECK: pred.load.continue2: +; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2 +; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK: pred.load.if3: +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i32 2 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] +; CHECK: pred.load.continue4: +; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP21]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3 +; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK: pred.load.if5: +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP22]], i32 [[TMP25]], i32 3 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] +; CHECK: pred.load.continue6: +; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i32> [ [[TMP22]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 +; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] +; CHECK: pred.load.if7: +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> undef, i32 [[TMP30]], i32 0 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] +; CHECK: pred.load.continue8: +; CHECK-NEXT: [[TMP32:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP31]], [[PRED_LOAD_IF7]] ] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 +; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] +; CHECK: pred.load.if9: +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP35]], i32 1 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] +; CHECK: pred.load.continue10: +; CHECK-NEXT: [[TMP37:%.*]] = phi <4 x i32> [ [[TMP32]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2 +; CHECK-NEXT: br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] +; CHECK: pred.load.if11: +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP40]], i32 2 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] +; CHECK: pred.load.continue12: +; CHECK-NEXT: [[TMP42:%.*]] = phi <4 x i32> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP41]], [[PRED_LOAD_IF11]] ] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3 +; CHECK-NEXT: br i1 [[TMP43]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] +; CHECK: pred.load.if13: +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP45]], i32 3 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] +; CHECK: pred.load.continue14: +; CHECK-NEXT: [[TMP47:%.*]] = phi <4 x i32> [ [[TMP42]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP46]], [[PRED_LOAD_IF13]] ] +; CHECK-NEXT: [[TMP48:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND15]] +; CHECK-NEXT: [[TMP49:%.*]] = add <4 x i32> [[TMP48]], [[TMP27]] +; CHECK-NEXT: [[TMP50]] = add <4 x i32> [[TMP49]], [[TMP47]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], +; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !32 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[TMP50]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP53:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP52]]) +; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !33 +; CHECK: ._crit_edge.loopexit: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP53]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] +; CHECK: ._crit_edge: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %5 = load i32, i32* %4, align 4 + %6 = trunc i64 %indvars.iv to i32 + %7 = add i32 %sum.02, %6 + %8 = add i32 %7, %3 + %9 = add i32 %8, %5 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph, !llvm.loop !6 + +._crit_edge: ; preds = %.lr.ph, %0 + %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +!6 = distinct !{!6, !7, !8} +!7 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} +!8 = !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/llvm/test/Transforms/Util/dbg-call-bitcast.ll b/llvm/test/Transforms/Util/dbg-call-bitcast.ll index 6625b469b06e4a..2d602c13635e22 100644 --- a/llvm/test/Transforms/Util/dbg-call-bitcast.ll +++ b/llvm/test/Transforms/Util/dbg-call-bitcast.ll @@ -10,6 +10,26 @@ define dso_local void @_Z1fv() { ; CHECK: call void @_Z1gPv call void @_Z1gPv(i8* nonnull %2) %3 = bitcast i32* %1 to i8* +; CHECK-NOT: call void @llvm.dbg.value +; CHECK: call void @_Z1gPv + call void @_Z1gPv(i8* nonnull %3) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) + ret void, !dbg !21 +} + +define dso_local void @_Z2fv() { + %1 = alloca i32, align 4 + %2 = bitcast i32* %1 to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) + call void @llvm.dbg.declare(metadata i32* %1, metadata !16, metadata !DIExpression()), !dbg !19 +; CHECK: %[[A:.*]] = alloca i32, align 4 +; CHECK: call void @llvm.dbg.value(metadata i32* %[[A]], {{.*}}, metadata !DIExpression(DW_OP_deref) +; CHECK: call void @_Z1gPv + call void @_Z1gPv(i8* nonnull %2) + br label %block2 + +block2: + %3 = bitcast i32* %1 to i8* ; CHECK: call void @llvm.dbg.value(metadata i32* %[[A]], {{.*}}, metadata !DIExpression(DW_OP_deref) ; CHECK: call void @_Z1gPv call void @_Z1gPv(i8* nonnull %3) diff --git a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll index bf7c8547f27102..32d964819fee5c 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll @@ -10,7 +10,7 @@ ; SUMMARY: TypeIdMap: ; SUMMARY-NEXT: typeid3: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: Kind: Unsat ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 @@ -23,7 +23,7 @@ ; SUMMARY-NEXT: ResByArg: ; SUMMARY-NEXT: typeid1: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: Kind: Unsat ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 @@ -36,7 +36,7 @@ ; SUMMARY-NEXT: ResByArg: ; SUMMARY-NEXT: typeid2: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: Kind: Unsat ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll index 7c85114239cf27..63ccfb833d4560 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll @@ -14,7 +14,7 @@ ; RUN: -wholeprogramdevirt-summary-action=export -o /dev/null 2>&1 | FileCheck %s --check-prefix=MISSING-MODULE ; Check single impl devirtulation in summary -; CHECK: typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: unknown, sizeM1BitWidth: 0), wpdResolutions: ((offset: 0, wpdRes: (kind: singleImpl, singleImplName: "_ZNK1A1fEv"))))) ; guid +; CHECK: typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: unsat, sizeM1BitWidth: 0), wpdResolutions: ((offset: 0, wpdRes: (kind: singleImpl, singleImplName: "_ZNK1A1fEv"))))) ; guid ; MISSING-MODULE: combined summary should contain Regular LTO module diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll index 861f5f6584898a..33ff9e1afe50f6 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll @@ -4,7 +4,7 @@ ; SUMMARY: TypeIdMap: ; SUMMARY-NEXT: typeid3: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: Kind: Unsat ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 @@ -17,7 +17,7 @@ ; SUMMARY-NEXT: ResByArg: ; SUMMARY-NEXT: typeid1: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: Kind: Unsat ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 @@ -30,7 +30,7 @@ ; SUMMARY-NEXT: ResByArg: ; SUMMARY-NEXT: typeid2: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: Kind: Unsat ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 @@ -43,7 +43,7 @@ ; SUMMARY-NEXT: ResByArg: ; SUMMARY-NEXT: typeid4: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: Kind: Unsat ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll b/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll index 634eaa12196eb0..cb2fddd75d1d0e 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll @@ -6,7 +6,7 @@ ; SUMMARY: TypeIdMap: ; SUMMARY-NEXT: typeid4: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: Kind: Unsat ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll b/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll index 7b646341ece278..0f780a3873687c 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll @@ -6,7 +6,7 @@ ; SUMMARY: TypeIdMap: ; SUMMARY-NEXT: typeid3: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: Kind: Unsat ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 @@ -24,7 +24,7 @@ ; SUMMARY-NEXT: Bit: 0 ; SUMMARY-NEXT: typeid4: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: Kind: Unsat ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll b/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll index e33abd259625a8..eb7b36e87dd62b 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll @@ -9,7 +9,7 @@ target datalayout = "e-p:64:64" ; SUMMARY: TypeIdMap: ; SUMMARY-NEXT: typeid3: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: Kind: Unsat ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 @@ -29,7 +29,7 @@ target datalayout = "e-p:64:64" ; SUMMARY-ARM-NEXT: Bit: 1 ; SUMMARY-NEXT: typeid4: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: Kind: Unsat ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 diff --git a/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll b/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll index 19ee68be955a0f..5c2be7d8629631 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll @@ -32,7 +32,7 @@ ; SUMMARY-NEXT: TypeIdMap: ; SUMMARY-NEXT: typeid1: ; SUMMARY-NEXT: TTRes: -; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: Kind: Unsat ; SUMMARY-NEXT: SizeM1BitWidth: 0 ; SUMMARY-NEXT: AlignLog2: 0 ; SUMMARY-NEXT: SizeM1: 0 diff --git a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll index 16f9ef822d6f3c..7626aba24c1ab5 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll @@ -25,7 +25,7 @@ define i32 @call(i8* %obj) { %fptr = load i8*, i8** %fptrptr %fptr_casted = bitcast i8* %fptr to i32 (i8*)* %result = call i32 %fptr_casted(i8* %obj) - ; CHECK-NOT: call i32 % + ; CHECK-NOT: call ; CHECK: ret i32 123 ret i32 %result } diff --git a/llvm/test/Verifier/disubprogram-name-match-only.ll b/llvm/test/Verifier/disubprogram-name-match-only.ll new file mode 100644 index 00000000000000..ae23ae201d55a5 --- /dev/null +++ b/llvm/test/Verifier/disubprogram-name-match-only.ll @@ -0,0 +1,26 @@ +; RUN: llvm-as -disable-output <%s 2>&1| FileCheck %s + +define void @f() !dbg !14 { + ret void, !dbg !5 +} + +!llvm.module.flags = !{!15} +!llvm.dbg.cu = !{!4} + +!0 = !{null} +!1 = distinct !DICompositeType(tag: DW_TAG_structure_type) +!2 = !DIFile(filename: "f.c", directory: "/") +!3 = !DISubroutineType(types: !0) +!4 = distinct !DICompileUnit(language: DW_LANG_C, file: !2) +; CHECK: !dbg attachment points at wrong subprogram for function +; CHECK: warning: ignoring invalid debug info +!5 = !DILocation(line: 1, scope: !9) +!9 = distinct !DISubprogram(name: "f", scope: !1, + file: !2, line: 1, type: !3, isLocal: true, + isDefinition: true, scopeLine: 2, + unit: !4) +!14 = distinct !DISubprogram(name: "f", scope: !1, + file: !2, line: 1, type: !3, isLocal: true, + isDefinition: true, scopeLine: 2, + unit: !4) +!15 = !{i32 1, !"Debug Info Version", i32 3} diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in index b872a8a0a6edeb..6f4d5f79082813 100644 --- a/llvm/test/lit.site.cfg.py.in +++ b/llvm/test/lit.site.cfg.py.in @@ -33,7 +33,7 @@ config.host_cxx = "@HOST_CXX@" config.host_ldflags = '@HOST_LDFLAGS@' config.llvm_use_intel_jitevents = @LLVM_USE_INTEL_JITEVENTS@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" -config.have_zlib = @LLVM_ENABLE_ZLIB@ +config.have_zlib = @HAVE_LIBZ@ config.have_libxar = @HAVE_LIBXAR@ config.have_dia_sdk = @LLVM_ENABLE_DIA_SDK@ config.enable_ffi = @LLVM_ENABLE_FFI@ diff --git a/llvm/test/tools/llvm-ar/response-utf8.test b/llvm/test/tools/llvm-ar/response-utf8.test new file mode 100644 index 00000000000000..b3e405f854902c --- /dev/null +++ b/llvm/test/tools/llvm-ar/response-utf8.test @@ -0,0 +1,11 @@ +## Check that response files can cope with non-ascii characters. + +# RUN: echo 'contents' > %t-£.txt + +# RUN: rm -f %t-£.a +# RUN: echo 'r %t-£.a %t-£.txt' > %t-replace.txt +# RUN: llvm-ar @%t-replace.txt + +# RUN: echo 'p %t-£.a %t-£.txt' > %t-print.txt +# RUN: llvm-ar @%t-print.txt | FileCheck %s +# CHECK: contents diff --git a/llvm/test/tools/llvm-ar/response.test b/llvm/test/tools/llvm-ar/response.test new file mode 100644 index 00000000000000..a08a63e88182b7 --- /dev/null +++ b/llvm/test/tools/llvm-ar/response.test @@ -0,0 +1,34 @@ +## llvm-ar should be able to consume response files. + +# RUN: echo 'contents' > %t.txt +# RUN: echo 'rc %t1.a %t.txt' > %t.response1.txt +# RUN: llvm-ar @%t.response1.txt +# RUN: llvm-ar p %t1.a | FileCheck %s --check-prefix=CONTENTS + +## Quotes and Spaces. +# RUN: echo 'contents' > '%t space.txt' +## Python is used here to ensure the quotes are written to the response file +# RUN: %python -c "import os; open(r'%t.response2.txt', 'w').write(r'%t2.a \"%t space.txt\"'+ '\n')" +# RUN: llvm-ar rc @%t.response2.txt +# RUN: llvm-ar p %t2.a | FileCheck %s --check-prefix=CONTENTS + +## Arguments after the response file. +# RUN: echo 'rc %t3.a' > %t.response3.txt +# RUN: llvm-ar @%t.response3.txt %t.txt +# RUN: llvm-ar p %t3.a | FileCheck %s --check-prefix=CONTENTS + +# CONTENTS: contents + +## rsp-quoting +# RUN: not llvm-ar --rsp-quoting=foobar @%t.response1.txt 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERROR +# ERROR: Invalid response file quoting style foobar + +# RUN: echo -e 'rc %/t.a blah\\foo' > %t-rsp.txt +# RUN: not llvm-ar --rsp-quoting=windows @%t-rsp.txt 2>&1 | \ +# RUN: FileCheck %s --check-prefix=WIN +# WIN: error: blah\foo: {{[Nn]}}o such file or directory + +# RUN: not llvm-ar -rsp-quoting posix @%t-rsp.txt 2>&1 | \ +# RUN: FileCheck %s --check-prefix=POSIX +# POSIX: error: blahfoo: {{[Nn]}}o such file or directory diff --git a/llvm/test/tools/llvm-gsymutil/fat-macho-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/fat-macho-dwarf.yaml similarity index 100% rename from llvm/test/tools/llvm-gsymutil/fat-macho-dwarf.yaml rename to llvm/test/tools/llvm-gsymutil/ARM_AArch64/fat-macho-dwarf.yaml diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/lit.local.cfg b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/lit.local.cfg new file mode 100644 index 00000000000000..e06c15ef14138a --- /dev/null +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/lit.local.cfg @@ -0,0 +1,4 @@ +if not ('ARM' in config.root.targets and 'AArch64' in config.root.targets): + config.unsupported = True + +config.suffixes = ['.test', '.yaml'] diff --git a/llvm/test/tools/llvm-gsymutil/elf-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/X86/elf-dwarf.yaml similarity index 100% rename from llvm/test/tools/llvm-gsymutil/elf-dwarf.yaml rename to llvm/test/tools/llvm-gsymutil/X86/elf-dwarf.yaml diff --git a/llvm/test/tools/llvm-gsymutil/X86/lit.local.cfg b/llvm/test/tools/llvm-gsymutil/X86/lit.local.cfg new file mode 100644 index 00000000000000..52c762f5cfb8b1 --- /dev/null +++ b/llvm/test/tools/llvm-gsymutil/X86/lit.local.cfg @@ -0,0 +1,4 @@ +if not 'X86' in config.root.targets: + config.unsupported = True + +config.suffixes = ['.test', '.yaml'] diff --git a/llvm/test/tools/llvm-gsymutil/mach-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/X86/mach-dwarf.yaml similarity index 100% rename from llvm/test/tools/llvm-gsymutil/mach-dwarf.yaml rename to llvm/test/tools/llvm-gsymutil/X86/mach-dwarf.yaml diff --git a/llvm/test/tools/obj2yaml/duplicate-symbol-and-section-names.test b/llvm/test/tools/obj2yaml/duplicate-symbol-and-section-names.test index 9dc198392c9b91..0eba412ffa471e 100644 --- a/llvm/test/tools/obj2yaml/duplicate-symbol-and-section-names.test +++ b/llvm/test/tools/obj2yaml/duplicate-symbol-and-section-names.test @@ -125,13 +125,7 @@ Symbols: # RUN: yaml2obj --docnum=3 %s -o %t3 # RUN: obj2yaml %t3 | FileCheck %s --check-prefix=CASE3 -# CASE3: --- !ELF -# CASE3-NEXT: FileHeader: -# CASE3-NEXT: Class: ELFCLASS64 -# CASE3-NEXT: Data: ELFDATA2LSB -# CASE3-NEXT: Type: ET_DYN -# CASE3-NEXT: Machine: EM_X86_64 -# CASE3-NEXT: Symbols: +# CASE3: Symbols: # CASE3-NEXT: - Name: foo # CASE3-NEXT: Binding: STB_GLOBAL # CASE3-NEXT: DynamicSymbols: diff --git a/llvm/test/tools/obj2yaml/implicit-sections-order.yaml b/llvm/test/tools/obj2yaml/implicit-sections-order.yaml new file mode 100644 index 00000000000000..555b1f3edc0423 --- /dev/null +++ b/llvm/test/tools/obj2yaml/implicit-sections-order.yaml @@ -0,0 +1,163 @@ +## Check that obj2yaml dumps SHT_STRTAB/SHT_SYMTAB/SHT_DYNSYM sections +## when they are allocatable. + +## In the following test we check the normal case: when .dynsym (SHT_DYNSYM) +## and .dynstr (SHT_STRTAB) are allocatable sections and .symtab (SHT_SYMTAB), +## .strtab (SHT_STRTAB) and .shstrtab (SHT_STRTAB) are not. +## Check we explicitly declare allocatable sections. + +# RUN: yaml2obj %s -o %t1.so -D FLAG1=SHF_ALLOC -D FLAG2="" +# RUN: llvm-readelf -S %t1.so | FileCheck %s --check-prefixes=RE,RE-1 +# RUN: obj2yaml %t1.so | FileCheck %s --check-prefix=OUTPUT + +## Check the information about sections using an independent tool. + +# RE: Section Headers: +# RE-NEXT: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# RE-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 +# RE-NEXT: [ 1] .foo.1 PROGBITS 0000000000000000 000040 000000 00 0 0 0 +# RE-1-NEXT: [ 2] .dynsym DYNSYM 0000000000001000 000040 000030 18 A 4 2 0 +# RE-2-NEXT: [ 2] .dynsym DYNSYM 0000000000001000 000040 000030 18 4 2 0 +# RE-NEXT: [ 3] .foo.2 PROGBITS 0000000000000000 000070 000000 00 0 0 0 +# RE-1-NEXT: [ 4] .dynstr STRTAB 0000000000002000 000070 000005 00 A 0 0 0 +# RE-2-NEXT: [ 4] .dynstr STRTAB 0000000000002000 000070 000005 00 0 0 0 +# RE-NEXT: [ 5] .foo.3 PROGBITS 0000000000000000 000075 000000 00 0 0 0 +# RE-1-NEXT: [ 6] .symtab SYMTAB 0000000000003000 000075 000030 18 8 2 0 +# RE-2-NEXT: [ 6] .symtab SYMTAB 0000000000003000 000075 000030 18 A 8 2 0 +# RE-NEXT: [ 7] .foo.4 PROGBITS 0000000000000000 0000a5 000000 00 0 0 0 +# RE-1-NEXT: [ 8] .strtab STRTAB 0000000000004000 0000a5 000005 00 0 0 0 +# RE-2-NEXT: [ 8] .strtab STRTAB 0000000000004000 0000a5 000005 00 A 0 0 0 +# RE-NEXT: [ 9] .foo.5 PROGBITS 0000000000000000 0000aa 000000 00 0 0 0 +# RE-1-NEXT: [10] .shstrtab STRTAB 0000000000005000 0000aa 000055 00 0 0 0 +# RE-2-NEXT: [10] .shstrtab STRTAB 0000000000005000 0000aa 000055 00 A 0 0 0 +# RE-NEXT: [11] .foo.6 PROGBITS 0000000000000000 0000ff 000000 00 0 0 0 + +# OUTPUT: --- !ELF +# OUTPUT-NEXT: FileHeader: +# OUTPUT-NEXT: Class: ELFCLASS64 +# OUTPUT-NEXT: Data: ELFDATA2LSB +# OUTPUT-NEXT: Type: ET_DYN +# OUTPUT-NEXT: Machine: EM_X86_64 +# OUTPUT-NEXT: Sections: +# OUTPUT-NEXT: - Name: .foo.1 +# OUTPUT-NEXT: Type: SHT_PROGBITS +# OUTPUT-NEXT: - Name: .dynsym +# OUTPUT-NEXT: Type: SHT_DYNSYM +# OUTPUT-NEXT: Flags: [ SHF_ALLOC ] +# OUTPUT-NEXT: Address: 0x0000000000001000 +# OUTPUT-NEXT: Link: .dynstr +# OUTPUT-NEXT: EntSize: 0x0000000000000018 +# OUTPUT-NEXT: - Name: .foo.2 +# OUTPUT-NEXT: Type: SHT_PROGBITS +# OUTPUT-NEXT: - Name: .dynstr +# OUTPUT-NEXT: Type: SHT_STRTAB +# OUTPUT-NEXT: Flags: [ SHF_ALLOC ] +# OUTPUT-NEXT: Address: 0x0000000000002000 +# OUTPUT-NEXT: - Name: .foo.3 +# OUTPUT-NEXT: Type: SHT_PROGBITS +# OUTPUT-NEXT: - Name: .foo.4 +# OUTPUT-NEXT: Type: SHT_PROGBITS +# OUTPUT-NEXT: - Name: .foo.5 +# OUTPUT-NEXT: Type: SHT_PROGBITS +# OUTPUT-NEXT: - Name: .foo.6 +# OUTPUT-NEXT: Type: SHT_PROGBITS +# OUTPUT-NEXT: Symbols: +# OUTPUT-NEXT: - Name: foo +# OUTPUT-NEXT: DynamicSymbols: +# OUTPUT-NEXT: - Name: bar +# OUTPUT-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .foo.1 + Type: SHT_PROGBITS + - Name: .dynsym + Type: SHT_DYNSYM + Address: 0x1000 + Flags: [ [[FLAG1]] ] + - Name: .foo.2 + Type: SHT_PROGBITS + - Name: .dynstr + Type: SHT_STRTAB + Address: 0x2000 + Flags: [ [[FLAG1]] ] + - Name: .foo.3 + Type: SHT_PROGBITS + - Name: .symtab + Type: SHT_SYMTAB + Address: 0x3000 + Flags: [ [[FLAG2]] ] + - Name: .foo.4 + Type: SHT_PROGBITS + - Name: .strtab + Type: SHT_STRTAB + Address: 0x4000 + Flags: [ [[FLAG2]] ] + - Name: .foo.5 + Type: SHT_PROGBITS + - Name: .shstrtab + Type: SHT_STRTAB + Address: 0x5000 + Flags: [ [[FLAG2]] ] + - Name: .foo.6 + Type: SHT_PROGBITS +Symbols: + - Name: foo +DynamicSymbols: + - Name: bar + +## Now test the abnormal case: when .symtab (SHT_SYMTAB), +## .strtab (SHT_STRTAB) and .shstrtab (SHT_STRTAB) are +## allocatable sections, but .dynsym (SHT_DYNSYM) and +## .dynstr (SHT_STRTAB) are not. +## Check that only allocatable versions are explicitly declared. + +# RUN: yaml2obj %s -o %t2.so -D FLAG1="" -D FLAG2=SHF_ALLOC +# RUN: llvm-readelf -S %t2.so | FileCheck %s --check-prefixes=RE,RE-2 +# RUN: obj2yaml %t2.so | FileCheck %s --check-prefix=OUTPUT2 + +## Check we explicitly declare only allocatable +## SHT_STRTAB/SHT_SYMTAB/SHT_DYNSYM sections. +# OUTPUT2: --- !ELF +# OUTPUT2-NEXT: FileHeader: +# OUTPUT2-NEXT: Class: ELFCLASS64 +# OUTPUT2-NEXT: Data: ELFDATA2LSB +# OUTPUT2-NEXT: Type: ET_DYN +# OUTPUT2-NEXT: Machine: EM_X86_64 +# OUTPUT2-NEXT: Sections: +# OUTPUT2-NEXT: - Name: .foo.1 +# OUTPUT2-NEXT: Type: SHT_PROGBITS +# OUTPUT2-NEXT: - Name: .foo.2 +# OUTPUT2-NEXT: Type: SHT_PROGBITS +# OUTPUT2-NEXT: - Name: .foo.3 +# OUTPUT2-NEXT: Type: SHT_PROGBITS +# OUTPUT2-NEXT: - Name: .symtab +# OUTPUT2-NEXT: Type: SHT_SYMTAB +# OUTPUT2-NEXT: Flags: [ SHF_ALLOC ] +# OUTPUT2-NEXT: Address: 0x0000000000003000 +# OUTPUT2-NEXT: Link: .strtab +# OUTPUT2-NEXT: EntSize: 0x0000000000000018 +# OUTPUT2-NEXT: - Name: .foo.4 +# OUTPUT2-NEXT: Type: SHT_PROGBITS +# OUTPUT2-NEXT: - Name: .strtab +# OUTPUT2-NEXT: Type: SHT_STRTAB +# OUTPUT2-NEXT: Flags: [ SHF_ALLOC ] +# OUTPUT2-NEXT: Address: 0x0000000000004000 +# OUTPUT2-NEXT: - Name: .foo.5 +# OUTPUT2-NEXT: Type: SHT_PROGBITS +# OUTPUT2-NEXT: - Name: .shstrtab +# OUTPUT2-NEXT: Type: SHT_STRTAB +# OUTPUT2-NEXT: Flags: [ SHF_ALLOC ] +# OUTPUT2-NEXT: Address: 0x0000000000005000 +# OUTPUT2-NEXT: - Name: .foo.6 +# OUTPUT2-NEXT: Type: SHT_PROGBITS +# OUTPUT2-NEXT: Symbols: +# OUTPUT2-NEXT: - Name: foo +# OUTPUT2-NEXT: DynamicSymbols: +# OUTPUT2-NEXT: - Name: bar +# OUTPUT2-NEXT: ... diff --git a/llvm/test/tools/obj2yaml/versym-section.yaml b/llvm/test/tools/obj2yaml/versym-section.yaml index 38836960615c17..0a04b3165ce210 100644 --- a/llvm/test/tools/obj2yaml/versym-section.yaml +++ b/llvm/test/tools/obj2yaml/versym-section.yaml @@ -19,7 +19,8 @@ # CHECK-NEXT: AddressAlign: 0x0000000000000002 # CHECK-NEXT: EntSize: 0x0000000000000002 # CHECK-NEXT: Entries: [ 0, 3, 4 ] -# CHECK-NEXT: DynamicSymbols: +# CHECK-NEXT: - Name: +# CHECK: DynamicSymbols: # CHECK-NEXT: - Name: f1 # CHECK-NEXT: Binding: STB_GLOBAL # CHECK-NEXT: - Name: f2 diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp index 401dc7f8c7d2b7..8e7a85f5a9b06d 100644 --- a/llvm/tools/llvm-ar/llvm-ar.cpp +++ b/llvm/tools/llvm-ar/llvm-ar.cpp @@ -83,6 +83,9 @@ USAGE: llvm-ar [options] [-][modifiers] [relpos] [count] [f =bsd - bsd --plugin= - ignored for compatibility -h --help - display this help and exit + --rsp-quoting - quoting style for response files + =posix - posix + =windows - windows --version - print the version and exit @ - read options from @@ -1096,61 +1099,105 @@ static bool handleGenericOption(StringRef arg) { return false; } +static const char *matchFlagWithArg(StringRef Expected, + ArrayRef::iterator &ArgIt, + ArrayRef Args) { + StringRef Arg = *ArgIt; + + if (Arg.startswith("--")) + Arg = Arg.substr(2); + else if (Arg.startswith("-")) + Arg = Arg.substr(1); + + size_t len = Expected.size(); + if (Arg == Expected) { + if (++ArgIt == Args.end()) + fail(std::string(Expected) + " requires an argument"); + + return *ArgIt; + } + if (Arg.startswith(Expected) && Arg.size() > len && Arg[len] == '=') + return Arg.data() + len + 1; + + return nullptr; +} + +static cl::TokenizerCallback getRspQuoting(ArrayRef ArgsArr) { + cl::TokenizerCallback Ret = + Triple(sys::getProcessTriple()).getOS() == Triple::Win32 + ? cl::TokenizeWindowsCommandLine + : cl::TokenizeGNUCommandLine; + + for (ArrayRef::iterator ArgIt = ArgsArr.begin(); + ArgIt != ArgsArr.end(); ++ArgIt) { + if (const char *Match = matchFlagWithArg("rsp-quoting", ArgIt, ArgsArr)) { + StringRef MatchRef = Match; + if (MatchRef == "posix") + Ret = cl::TokenizeGNUCommandLine; + else if (MatchRef == "windows") + Ret = cl::TokenizeWindowsCommandLine; + else + fail(std::string("Invalid response file quoting style ") + Match); + } + } + + return Ret; +} + static int ar_main(int argc, char **argv) { - SmallVector Argv(argv, argv + argc); + SmallVector Argv(argv + 1, argv + argc); StringSaver Saver(Alloc); - cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv); - for (size_t i = 1; i < Argv.size(); ++i) { - StringRef Arg = Argv[i]; - const char *match = nullptr; - auto MatchFlagWithArg = [&](const char *expected) { - size_t len = strlen(expected); - if (Arg == expected) { - if (++i >= Argv.size()) - fail(std::string(expected) + " requires an argument"); - match = Argv[i]; - return true; - } - if (Arg.startswith(expected) && Arg.size() > len && Arg[len] == '=') { - match = Arg.data() + len + 1; - return true; - } - return false; - }; - if (handleGenericOption(Argv[i])) + + cl::ExpandResponseFiles(Saver, getRspQuoting(makeArrayRef(argv, argc)), Argv); + + ArrayRef ArgsArr = makeArrayRef(argv, argc); + + for (ArrayRef::iterator ArgIt = Argv.begin(); + ArgIt != Argv.end(); ++ArgIt) { + const char *Match = nullptr; + + if (handleGenericOption(*ArgIt)) return 0; - if (Arg == "--") { - for (; i < Argv.size(); ++i) - PositionalArgs.push_back(Argv[i]); + if (strcmp(*ArgIt, "--") == 0) { + ++ArgIt; + for (; ArgIt != Argv.end(); ++ArgIt) + PositionalArgs.push_back(*ArgIt); break; } - if (Arg[0] == '-') { - if (Arg.startswith("--")) - Arg = Argv[i] + 2; + + if (*ArgIt[0] != '-') { + if (Options.empty()) + Options += *ArgIt; else - Arg = Argv[i] + 1; - if (Arg == "M") { - MRI = true; - } else if (MatchFlagWithArg("format")) { - FormatType = StringSwitch(match) - .Case("default", Default) - .Case("gnu", GNU) - .Case("darwin", DARWIN) - .Case("bsd", BSD) - .Default(Unknown); - if (FormatType == Unknown) - fail(std::string("Invalid format ") + match); - } else if (MatchFlagWithArg("plugin")) { - // Ignored. - } else { - Options += Argv[i] + 1; - } - } else if (Options.empty()) { - Options += Argv[i]; - } else { - PositionalArgs.push_back(Argv[i]); + PositionalArgs.push_back(*ArgIt); + continue; } + + if (strcmp(*ArgIt, "-M") == 0) { + MRI = true; + continue; + } + + Match = matchFlagWithArg("format", ArgIt, Argv); + if (Match) { + FormatType = StringSwitch(Match) + .Case("default", Default) + .Case("gnu", GNU) + .Case("darwin", DARWIN) + .Case("bsd", BSD) + .Default(Unknown); + if (FormatType == Unknown) + fail(std::string("Invalid format ") + Match); + continue; + } + + if (matchFlagWithArg("plugin", ArgIt, Argv) || + matchFlagWithArg("rsp-quoting", ArgIt, Argv)) + continue; + + Options += *ArgIt + 1; } + ArchiveOperation Operation = parseCommandLine(); return performOperation(Operation, nullptr); } diff --git a/llvm/tools/llvm-gsym/llvm-gsymutil.cpp b/llvm/tools/llvm-gsym/llvm-gsymutil.cpp index c7d6cf33da67a6..a3be9e3149dbe9 100644 --- a/llvm/tools/llvm-gsym/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsym/llvm-gsymutil.cpp @@ -179,7 +179,8 @@ static bool filterArch(MachOObjectFile &Obj) { if (ArchFilters.empty()) return true; - StringRef ObjArch = Obj.getArchTriple().getArchName(); + Triple ObjTriple(Obj.getArchTriple()); + StringRef ObjArch = ObjTriple.getArchName(); for (auto Arch : ArchFilters) { // Match name. @@ -350,7 +351,8 @@ static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer, error(Filename, errorToErrorCode(BinOrErr.takeError())); if (auto *Obj = dyn_cast(BinOrErr->get())) { - auto ArchName = Obj->makeTriple().getArchName(); + Triple ObjTriple(Obj->makeTriple()); + auto ArchName = ObjTriple.getArchName(); outs() << "Output file (" << ArchName << "): " << OutFile << "\n"; if (auto Err = handleObjectFile(*Obj, OutFile.c_str())) return Err; @@ -374,7 +376,8 @@ static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer, // Now handle each architecture we need to convert. for (auto &Obj: FilterObjs) { - auto ArchName = Obj->getArchTriple().getArchName(); + Triple ObjTriple(Obj->getArchTriple()); + auto ArchName = ObjTriple.getArchName(); std::string ArchOutFile(OutFile); // If we are only handling a single architecture, then we will use the // normal output file. If we are handling multiple architectures append diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 08c3587a821d5e..180457bb6d91eb 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -55,6 +55,7 @@ class ELFDumper { std::vector &Symbols); Error dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab, StringRef StrTable, ELFYAML::Symbol &S); + Expected>> dumpSections(); Error dumpCommonSection(const Elf_Shdr *Shdr, ELFYAML::Section &S); Error dumpCommonRelocationSection(const Elf_Shdr *Shdr, ELFYAML::RelocationSection &S); @@ -228,26 +229,53 @@ template Expected ELFDumper::dump() { return std::move(E); } + if (Expected>> ChunksOrErr = + dumpSections()) + Y->Chunks = std::move(*ChunksOrErr); + else + return ChunksOrErr.takeError(); + + return Y.release(); +} + +template +Expected>> +ELFDumper::dumpSections() { + std::vector> Ret; + for (const Elf_Shdr &Sec : Sections) { switch (Sec.sh_type) { case ELF::SHT_DYNAMIC: { Expected SecOrErr = dumpDynamicSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_STRTAB: case ELF::SHT_SYMTAB: - case ELF::SHT_DYNSYM: - // Do not dump these sections. + case ELF::SHT_DYNSYM: { + // The contents of these sections are described by other parts of the YAML + // file. We still dump them so that their positions in the section header + // table are correctly recorded. We only dump allocatable section because + // their positions and addresses are important, e.g. for creating program + // headers. Some sections, like .symtab or .strtab normally are not + // allocatable and do not have virtual addresses. We want to avoid noise + // in the YAML output and assume that they are placed at the end. + if (Sec.sh_flags & ELF::SHF_ALLOC) { + auto S = std::make_unique(); + if (Error E = dumpCommonSection(&Sec, *S.get())) + return std::move(E); + Ret.emplace_back(std::move(S)); + } break; + } case ELF::SHT_SYMTAB_SHNDX: { Expected SecOrErr = dumpSymtabShndxSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_REL: @@ -255,84 +283,84 @@ template Expected ELFDumper::dump() { Expected SecOrErr = dumpRelocSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_RELR: { Expected SecOrErr = dumpRelrSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_GROUP: { Expected GroupOrErr = dumpGroup(&Sec); if (!GroupOrErr) return GroupOrErr.takeError(); - Y->Chunks.emplace_back(*GroupOrErr); + Ret.emplace_back(*GroupOrErr); break; } case ELF::SHT_MIPS_ABIFLAGS: { Expected SecOrErr = dumpMipsABIFlags(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_NOBITS: { Expected SecOrErr = dumpNoBitsSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_NOTE: { Expected SecOrErr = dumpNoteSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_HASH: { Expected SecOrErr = dumpHashSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_GNU_HASH: { Expected SecOrErr = dumpGnuHashSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_GNU_verdef: { Expected SecOrErr = dumpVerdefSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_GNU_versym: { Expected SecOrErr = dumpSymverSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_GNU_verneed: { Expected SecOrErr = dumpVerneedSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_LLVM_ADDRSIG: { Expected SecOrErr = dumpAddrsigSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_LLVM_LINKER_OPTIONS: { @@ -340,7 +368,7 @@ template Expected ELFDumper::dump() { dumpLinkerOptionsSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_LLVM_DEPENDENT_LIBRARIES: { @@ -348,7 +376,7 @@ template Expected ELFDumper::dump() { dumpDependentLibrariesSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_LLVM_CALL_GRAPH_PROFILE: { @@ -356,7 +384,7 @@ template Expected ELFDumper::dump() { dumpCallGraphProfileSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); break; } case ELF::SHT_NULL: { @@ -378,7 +406,7 @@ template Expected ELFDumper::dump() { if (!SpecialSecOrErr) return SpecialSecOrErr.takeError(); if (*SpecialSecOrErr) { - Y->Chunks.emplace_back(*SpecialSecOrErr); + Ret.emplace_back(*SpecialSecOrErr); break; } } @@ -387,12 +415,11 @@ template Expected ELFDumper::dump() { dumpContentSection(&Sec); if (!SecOrErr) return SecOrErr.takeError(); - Y->Chunks.emplace_back(*SecOrErr); + Ret.emplace_back(*SecOrErr); } } } - - return Y.release(); + return std::move(Ret); } template diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt index d66dd39c601367..6fabd940a74af7 100644 --- a/llvm/unittests/Analysis/CMakeLists.txt +++ b/llvm/unittests/Analysis/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_unittest(AnalysisTests LazyCallGraphTest.cpp LoadsTest.cpp LoopInfoTest.cpp + LoopNestTest.cpp MemoryBuiltinsTest.cpp MemorySSATest.cpp OrderedInstructionsTest.cpp diff --git a/llvm/unittests/Analysis/LoopNestTest.cpp b/llvm/unittests/Analysis/LoopNestTest.cpp new file mode 100644 index 00000000000000..4e31b1f2e9046c --- /dev/null +++ b/llvm/unittests/Analysis/LoopNestTest.cpp @@ -0,0 +1,194 @@ +//===- LoopNestTest.cpp - LoopNestAnalysis unit tests ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopNestAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Support/SourceMgr.h" +#include "gtest/gtest.h" + +using namespace llvm; + +/// Build the loop nest analysis for a loop nest and run the given test \p Test. +static void runTest( + Module &M, StringRef FuncName, + function_ref Test) { + auto *F = M.getFunction(FuncName); + ASSERT_NE(F, nullptr) << "Could not find " << FuncName; + + TargetLibraryInfoImpl TLII; + TargetLibraryInfo TLI(TLII); + AssumptionCache AC(*F); + DominatorTree DT(*F); + LoopInfo LI(DT); + ScalarEvolution SE(*F, TLI, AC, DT, LI); + + Test(*F, LI, SE); +} + +static std::unique_ptr makeLLVMModule(LLVMContext &Context, + const char *ModuleStr) { + SMDiagnostic Err; + return parseAssemblyString(ModuleStr, Err, Context); +} + +TEST(LoopNestTest, PerfectLoopNest) { + const char *ModuleStr = + "target datalayout = \"e-m:o-i64:64-f80:128-n8:16:32:64-S128\"\n" + "define void @foo(i64 signext %nx, i64 signext %ny) {\n" + "entry:\n" + " br label %for.outer\n" + "for.outer:\n" + " %i = phi i64 [ 0, %entry ], [ %inc13, %for.outer.latch ]\n" + " %cmp21 = icmp slt i64 0, %ny\n" + " br i1 %cmp21, label %for.inner.preheader, label %for.outer.latch\n" + "for.inner.preheader:\n" + " br label %for.inner\n" + "for.inner:\n" + " %j = phi i64 [ 0, %for.inner.preheader ], [ %inc, %for.inner.latch ]\n" + " br label %for.inner.latch\n" + "for.inner.latch:\n" + " %inc = add nsw i64 %j, 1\n" + " %cmp2 = icmp slt i64 %inc, %ny\n" + " br i1 %cmp2, label %for.inner, label %for.inner.exit\n" + "for.inner.exit:\n" + " br label %for.outer.latch\n" + "for.outer.latch:\n" + " %inc13 = add nsw i64 %i, 1\n" + " %cmp = icmp slt i64 %inc13, %nx\n" + " br i1 %cmp, label %for.outer, label %for.outer.exit\n" + "for.outer.exit:\n" + " br label %for.end\n" + "for.end:\n" + " ret void\n" + "}\n"; + + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleStr); + + runTest(*M, "foo", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + Function::iterator FI = F.begin(); + // Skip the first basic block (entry), get to the outer loop header. + BasicBlock *Header = &*(++FI); + assert(Header->getName() == "for.outer"); + Loop *L = LI.getLoopFor(Header); + EXPECT_NE(L, nullptr); + + LoopNest LN(*L, SE); + EXPECT_TRUE(LN.areAllLoopsSimplifyForm()); + + // Ensure that we can identify the outermost loop in the nest. + const Loop &OL = LN.getOutermostLoop(); + EXPECT_EQ(OL.getName(), "for.outer"); + + // Ensure that we can identify the innermost loop in the nest. + const Loop *IL = LN.getInnermostLoop(); + EXPECT_NE(IL, nullptr); + EXPECT_EQ(IL->getName(), "for.inner"); + + // Ensure the loop nest is recognized as having 2 loops. + const ArrayRef Loops = LN.getLoops(); + EXPECT_EQ(Loops.size(), 2ull); + + // Ensure the loop nest is recognized as perfect in its entirety. + const SmallVector &PLV = LN.getPerfectLoops(SE); + EXPECT_EQ(PLV.size(), 1ull); + EXPECT_EQ(PLV.front().size(), 2ull); + + // Ensure the nest depth and perfect nest depth are computed correctly. + EXPECT_EQ(LN.getNestDepth(), 2u); + EXPECT_EQ(LN.getMaxPerfectDepth(), 2u); + }); +} + +TEST(LoopNestTest, ImperfectLoopNest) { + const char *ModuleStr = + "target datalayout = \"e-m:o-i64:64-f80:128-n8:16:32:64-S128\"\n" + "define void @foo(i32 signext %nx, i32 signext %ny, i32 signext %nk) {\n" + "entry:\n" + " br label %loop.i\n" + "loop.i:\n" + " %i = phi i32 [ 0, %entry ], [ %inci, %for.inci ]\n" + " %cmp21 = icmp slt i32 0, %ny\n" + " br i1 %cmp21, label %loop.j.preheader, label %for.inci\n" + "loop.j.preheader:\n" + " br label %loop.j\n" + "loop.j:\n" + " %j = phi i32 [ %incj, %for.incj ], [ 0, %loop.j.preheader ]\n" + " %cmp22 = icmp slt i32 0, %nk\n" + " br i1 %cmp22, label %loop.k.preheader, label %for.incj\n" + "loop.k.preheader:\n" + " call void @bar()\n" + " br label %loop.k\n" + "loop.k:\n" + " %k = phi i32 [ %inck, %for.inck ], [ 0, %loop.k.preheader ]\n" + " br label %for.inck\n" + "for.inck:\n" + " %inck = add nsw i32 %k, 1\n" + " %cmp5 = icmp slt i32 %inck, %nk\n" + " br i1 %cmp5, label %loop.k, label %for.incj.loopexit\n" + "for.incj.loopexit:\n" + " br label %for.incj\n" + "for.incj:\n" + " %incj = add nsw i32 %j, 1\n" + " %cmp2 = icmp slt i32 %incj, %ny\n" + " br i1 %cmp2, label %loop.j, label %for.inci.loopexit\n" + "for.inci.loopexit:\n" + " br label %for.inci\n" + "for.inci:\n" + " %inci = add nsw i32 %i, 1\n" + " %cmp = icmp slt i32 %inci, %nx\n" + " br i1 %cmp, label %loop.i, label %loop.i.end\n" + "loop.i.end:\n" + " ret void\n" + "}\n" + "declare void @bar()\n"; + + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleStr); + + runTest(*M, "foo", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + Function::iterator FI = F.begin(); + // Skip the first basic block (entry), get to the outermost loop header. + BasicBlock *Header = &*(++FI); + assert(Header->getName() == "loop.i"); + Loop *L = LI.getLoopFor(Header); + EXPECT_NE(L, nullptr); + + LoopNest LN(*L, SE); + EXPECT_TRUE(LN.areAllLoopsSimplifyForm()); + + dbgs() << "LN: " << LN << "\n"; + + // Ensure that we can identify the outermost loop in the nest. + const Loop &OL = LN.getOutermostLoop(); + EXPECT_EQ(OL.getName(), "loop.i"); + + // Ensure that we can identify the innermost loop in the nest. + const Loop *IL = LN.getInnermostLoop(); + EXPECT_NE(IL, nullptr); + EXPECT_EQ(IL->getName(), "loop.k"); + + // Ensure the loop nest is recognized as having 3 loops. + const ArrayRef Loops = LN.getLoops(); + EXPECT_EQ(Loops.size(), 3ull); + + // Ensure the loop nest is recognized as having 2 separate perfect loops groups. + const SmallVector &PLV = LN.getPerfectLoops(SE); + EXPECT_EQ(PLV.size(), 2ull); + EXPECT_EQ(PLV.front().size(), 2ull); + EXPECT_EQ(PLV.back().size(), 1ull); + + // Ensure the nest depth and perfect nest depth are computed correctly. + EXPECT_EQ(LN.getNestDepth(), 3u); + EXPECT_EQ(LN.getMaxPerfectDepth(), 2u); + }); +} + diff --git a/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp b/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp index 8072e05c7cea0c..6c4e42aa7e05f1 100644 --- a/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp +++ b/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp @@ -65,6 +65,20 @@ class ProfileSummaryInfoTest : public testing::Test { " %y2 = phi i32 [0, %bb1], [1, %bb2] \n" " ret i32 %y2\n" "}\n" + "define i32 @l(i32 %x) {{\n" + "bb0:\n" + " %y1 = icmp eq i32 %x, 0 \n" + " br i1 %y1, label %bb1, label %bb2, !prof !23 \n" + "bb1:\n" + " %z1 = call i32 @g(i32 %x)\n" + " br label %bb3\n" + "bb2:\n" + " %z2 = call i32 @h(i32 %x)\n" + " br label %bb3\n" + "bb3:\n" + " %y2 = phi i32 [0, %bb1], [1, %bb2] \n" + " ret i32 %y2\n" + "}\n" "!20 = !{{!\"function_entry_count\", i64 400}\n" "!21 = !{{!\"function_entry_count\", i64 1}\n" "!22 = !{{!\"function_entry_count\", i64 100}\n" @@ -141,14 +155,26 @@ TEST_F(ProfileSummaryInfoTest, TestCommon) { EXPECT_FALSE(PSI.isHotCountNthPercentile(990000, 100)); EXPECT_FALSE(PSI.isHotCountNthPercentile(990000, 2)); + EXPECT_FALSE(PSI.isColdCountNthPercentile(990000, 400)); + EXPECT_TRUE(PSI.isColdCountNthPercentile(990000, 100)); + EXPECT_TRUE(PSI.isColdCountNthPercentile(990000, 2)); + EXPECT_TRUE(PSI.isHotCountNthPercentile(999999, 400)); EXPECT_TRUE(PSI.isHotCountNthPercentile(999999, 100)); EXPECT_FALSE(PSI.isHotCountNthPercentile(999999, 2)); + EXPECT_FALSE(PSI.isColdCountNthPercentile(999999, 400)); + EXPECT_FALSE(PSI.isColdCountNthPercentile(999999, 100)); + EXPECT_TRUE(PSI.isColdCountNthPercentile(999999, 2)); + EXPECT_FALSE(PSI.isHotCountNthPercentile(10000, 400)); EXPECT_FALSE(PSI.isHotCountNthPercentile(10000, 100)); EXPECT_FALSE(PSI.isHotCountNthPercentile(10000, 2)); + EXPECT_TRUE(PSI.isColdCountNthPercentile(10000, 400)); + EXPECT_TRUE(PSI.isColdCountNthPercentile(10000, 100)); + EXPECT_TRUE(PSI.isColdCountNthPercentile(10000, 2)); + EXPECT_TRUE(PSI.isFunctionEntryHot(F)); EXPECT_FALSE(PSI.isFunctionEntryHot(G)); EXPECT_FALSE(PSI.isFunctionEntryHot(H)); @@ -177,16 +203,31 @@ TEST_F(ProfileSummaryInfoTest, InstrProf) { EXPECT_FALSE(PSI.isHotBlockNthPercentile(990000, BB2, &BFI)); EXPECT_TRUE(PSI.isHotBlockNthPercentile(990000, BB3, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, &BB0, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, BB1, &BFI)); + EXPECT_TRUE(PSI.isColdBlockNthPercentile(990000, BB2, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, BB3, &BFI)); + EXPECT_TRUE(PSI.isHotBlockNthPercentile(999900, &BB0, &BFI)); EXPECT_TRUE(PSI.isHotBlockNthPercentile(999900, BB1, &BFI)); EXPECT_TRUE(PSI.isHotBlockNthPercentile(999900, BB2, &BFI)); EXPECT_TRUE(PSI.isHotBlockNthPercentile(999900, BB3, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(999900, &BB0, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(999900, BB1, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(999900, BB2, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(999900, BB3, &BFI)); + EXPECT_FALSE(PSI.isHotBlockNthPercentile(10000, &BB0, &BFI)); EXPECT_FALSE(PSI.isHotBlockNthPercentile(10000, BB1, &BFI)); EXPECT_FALSE(PSI.isHotBlockNthPercentile(10000, BB2, &BFI)); EXPECT_FALSE(PSI.isHotBlockNthPercentile(10000, BB3, &BFI)); + EXPECT_TRUE(PSI.isColdBlockNthPercentile(10000, &BB0, &BFI)); + EXPECT_TRUE(PSI.isColdBlockNthPercentile(10000, BB1, &BFI)); + EXPECT_TRUE(PSI.isColdBlockNthPercentile(10000, BB2, &BFI)); + EXPECT_TRUE(PSI.isColdBlockNthPercentile(10000, BB3, &BFI)); + CallSite CS1(BB1->getFirstNonPHI()); auto *CI2 = BB2->getFirstNonPHI(); CallSite CS2(CI2); @@ -201,6 +242,31 @@ TEST_F(ProfileSummaryInfoTest, InstrProf) { EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI)); } +TEST_F(ProfileSummaryInfoTest, InstrProfNoFuncEntryCount) { + auto M = makeLLVMModule("InstrProf"); + Function *F = M->getFunction("l"); + ProfileSummaryInfo PSI = buildPSI(M.get()); + EXPECT_TRUE(PSI.hasProfileSummary()); + EXPECT_TRUE(PSI.hasInstrumentationProfile()); + + BasicBlock &BB0 = F->getEntryBlock(); + BasicBlock *BB1 = BB0.getTerminator()->getSuccessor(0); + BasicBlock *BB2 = BB0.getTerminator()->getSuccessor(1); + BasicBlock *BB3 = BB1->getSingleSuccessor(); + + BlockFrequencyInfo BFI = buildBFI(*F); + + // Without the entry count, all should return false. + EXPECT_FALSE(PSI.isHotBlockNthPercentile(990000, &BB0, &BFI)); + EXPECT_FALSE(PSI.isHotBlockNthPercentile(990000, BB1, &BFI)); + EXPECT_FALSE(PSI.isHotBlockNthPercentile(990000, BB2, &BFI)); + EXPECT_FALSE(PSI.isHotBlockNthPercentile(990000, BB3, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, &BB0, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, BB1, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, BB2, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, BB3, &BFI)); +} + TEST_F(ProfileSummaryInfoTest, SampleProf) { auto M = makeLLVMModule("SampleProfile"); Function *F = M->getFunction("f"); @@ -224,16 +290,31 @@ TEST_F(ProfileSummaryInfoTest, SampleProf) { EXPECT_FALSE(PSI.isHotBlockNthPercentile(990000, BB2, &BFI)); EXPECT_TRUE(PSI.isHotBlockNthPercentile(990000, BB3, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, &BB0, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, BB1, &BFI)); + EXPECT_TRUE(PSI.isColdBlockNthPercentile(990000, BB2, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, BB3, &BFI)); + EXPECT_TRUE(PSI.isHotBlockNthPercentile(999900, &BB0, &BFI)); EXPECT_TRUE(PSI.isHotBlockNthPercentile(999900, BB1, &BFI)); EXPECT_TRUE(PSI.isHotBlockNthPercentile(999900, BB2, &BFI)); EXPECT_TRUE(PSI.isHotBlockNthPercentile(999900, BB3, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(999900, &BB0, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(999900, BB1, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(999900, BB2, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(999900, BB3, &BFI)); + EXPECT_FALSE(PSI.isHotBlockNthPercentile(10000, &BB0, &BFI)); EXPECT_FALSE(PSI.isHotBlockNthPercentile(10000, BB1, &BFI)); EXPECT_FALSE(PSI.isHotBlockNthPercentile(10000, BB2, &BFI)); EXPECT_FALSE(PSI.isHotBlockNthPercentile(10000, BB3, &BFI)); + EXPECT_TRUE(PSI.isColdBlockNthPercentile(10000, &BB0, &BFI)); + EXPECT_TRUE(PSI.isColdBlockNthPercentile(10000, BB1, &BFI)); + EXPECT_TRUE(PSI.isColdBlockNthPercentile(10000, BB2, &BFI)); + EXPECT_TRUE(PSI.isColdBlockNthPercentile(10000, BB3, &BFI)); + CallSite CS1(BB1->getFirstNonPHI()); auto *CI2 = BB2->getFirstNonPHI(); // Manually attach branch weights metadata to the call instruction. @@ -250,6 +331,51 @@ TEST_F(ProfileSummaryInfoTest, SampleProf) { // weights that exceed the hot count threshold. CI2->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights({400})); EXPECT_TRUE(PSI.isHotCallSite(CS2, &BFI)); + + { + Function *F = M->getFunction("l"); + BlockFrequencyInfo BFI = buildBFI(*F); + BasicBlock &BB0 = F->getEntryBlock(); + BasicBlock *BB1 = BB0.getTerminator()->getSuccessor(0); + BasicBlock *BB2 = BB0.getTerminator()->getSuccessor(1); + BasicBlock *BB3 = BB1->getSingleSuccessor(); + + // Without the entry count, all should return false. + EXPECT_FALSE(PSI.isHotBlockNthPercentile(990000, &BB0, &BFI)); + EXPECT_FALSE(PSI.isHotBlockNthPercentile(990000, BB1, &BFI)); + EXPECT_FALSE(PSI.isHotBlockNthPercentile(990000, BB2, &BFI)); + EXPECT_FALSE(PSI.isHotBlockNthPercentile(990000, BB3, &BFI)); + + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, &BB0, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, BB1, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, BB2, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, BB3, &BFI)); + } +} + +TEST_F(ProfileSummaryInfoTest, SampleProfNoFuncEntryCount) { + auto M = makeLLVMModule("SampleProfile"); + Function *F = M->getFunction("l"); + ProfileSummaryInfo PSI = buildPSI(M.get()); + EXPECT_TRUE(PSI.hasProfileSummary()); + EXPECT_TRUE(PSI.hasSampleProfile()); + + BasicBlock &BB0 = F->getEntryBlock(); + BasicBlock *BB1 = BB0.getTerminator()->getSuccessor(0); + BasicBlock *BB2 = BB0.getTerminator()->getSuccessor(1); + BasicBlock *BB3 = BB1->getSingleSuccessor(); + + BlockFrequencyInfo BFI = buildBFI(*F); + + // Without the entry count, all should return false. + EXPECT_FALSE(PSI.isHotBlockNthPercentile(990000, &BB0, &BFI)); + EXPECT_FALSE(PSI.isHotBlockNthPercentile(990000, BB1, &BFI)); + EXPECT_FALSE(PSI.isHotBlockNthPercentile(990000, BB2, &BFI)); + EXPECT_FALSE(PSI.isHotBlockNthPercentile(990000, BB3, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, &BB0, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, BB1, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, BB2, &BFI)); + EXPECT_FALSE(PSI.isColdBlockNthPercentile(990000, BB3, &BFI)); } } // end anonymous namespace diff --git a/llvm/unittests/Support/CompressionTest.cpp b/llvm/unittests/Support/CompressionTest.cpp index 51723898e950d0..cc7be431b62bc3 100644 --- a/llvm/unittests/Support/CompressionTest.cpp +++ b/llvm/unittests/Support/CompressionTest.cpp @@ -21,7 +21,7 @@ using namespace llvm; namespace { -#if LLVM_ENABLE_ZLIB +#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ void TestZlibCompression(StringRef Input, int Level) { SmallString<32> Compressed; diff --git a/llvm/unittests/Transforms/Utils/KnowledgeRetentionTest.cpp b/llvm/unittests/Transforms/Utils/KnowledgeRetentionTest.cpp index 5c84a25745e594..08f2c6441645bb 100644 --- a/llvm/unittests/Transforms/Utils/KnowledgeRetentionTest.cpp +++ b/llvm/unittests/Transforms/Utils/KnowledgeRetentionTest.cpp @@ -41,7 +41,7 @@ static void RunTest( } } -void AssertMatchesExactlyAttributes(CallInst *Assume, Value *WasOn, +static void AssertMatchesExactlyAttributes(CallInst *Assume, Value *WasOn, StringRef AttrToMatch) { Regex Reg(AttrToMatch); SmallVector Matches; @@ -57,7 +57,7 @@ void AssertMatchesExactlyAttributes(CallInst *Assume, Value *WasOn, } } -void AssertHasTheRightValue(CallInst *Assume, Value *WasOn, +static void AssertHasTheRightValue(CallInst *Assume, Value *WasOn, Attribute::AttrKind Kind, unsigned Value, bool Both, AssumeQuery AQ = AssumeQuery::Highest) { if (!Both) { @@ -80,7 +80,7 @@ void AssertHasTheRightValue(CallInst *Assume, Value *WasOn, } } -TEST(AssumeQueryAPI, Basic) { +TEST(AssumeQueryAPI, hasAttributeInAssume) { StringRef Head = "declare void @llvm.assume(i1)\n" "declare void @func(i32*, i32*)\n" @@ -216,3 +216,174 @@ TEST(AssumeQueryAPI, Basic) { })); RunTest(Head, Tail, Tests); } + +static void AssertFindExactlyAttributes(RetainedKnowledgeMap &Map, Value *WasOn, + StringRef AttrToMatch) { + Regex Reg(AttrToMatch); + SmallVector Matches; + for (StringRef Attr : { +#define GET_ATTR_NAMES +#define ATTRIBUTE_ENUM(ENUM_NAME, DISPLAY_NAME) StringRef(#DISPLAY_NAME), +#include "llvm/IR/Attributes.inc" + }) { + bool ShouldHaveAttr = Reg.match(Attr, &Matches) && Matches[0] == Attr; + + if (ShouldHaveAttr != (Map.find(RetainedKnowledgeKey{WasOn, Attribute::getAttrKindFromName(Attr)}) != Map.end())) { + ASSERT_TRUE(false); + } + } +} + +static void AssertMapHasRightValue(RetainedKnowledgeMap &Map, + RetainedKnowledgeKey Key, MinMax MM) { + auto LookupIt = Map.find(Key); + ASSERT_TRUE(LookupIt != Map.end()); + ASSERT_TRUE(LookupIt->second.Min == MM.Min); + ASSERT_TRUE(LookupIt->second.Max == MM.Max); +} + +TEST(AssumeQueryAPI, fillMapFromAssume) { + StringRef Head = + "declare void @llvm.assume(i1)\n" + "declare void @func(i32*, i32*)\n" + "declare void @func1(i32*, i32*, i32*, i32*)\n" + "declare void @func_many(i32*) \"no-jump-tables\" nounwind " + "\"less-precise-fpmad\" willreturn norecurse\n" + "define void @test(i32* %P, i32* %P1, i32* %P2, i32* %P3) {\n"; + StringRef Tail = "ret void\n" + "}"; + std::vector>> + Tests; + Tests.push_back(std::make_pair( + "call void @func(i32* nonnull align 4 dereferenceable(16) %P, i32* align " + "8 noalias %P1)\n", + [](Instruction *I) { + CallInst *Assume = BuildAssumeFromInst(I); + Assume->insertBefore(I); + + RetainedKnowledgeMap Map; + fillMapFromAssume(*Assume, Map); + AssertFindExactlyAttributes(Map, I->getOperand(0), + "(nonnull|align|dereferenceable)"); + AssertFindExactlyAttributes(Map, I->getOperand(1), + "(noalias|align)"); + AssertMapHasRightValue( + Map, {I->getOperand(0), Attribute::Dereferenceable}, {16, 16}); + AssertMapHasRightValue(Map, {I->getOperand(0), Attribute::Alignment}, + {4, 4}); + AssertMapHasRightValue(Map, {I->getOperand(0), Attribute::Alignment}, + {4, 4}); + })); + Tests.push_back(std::make_pair( + "call void @func1(i32* nonnull align 32 dereferenceable(48) %P, i32* " + "nonnull " + "align 8 dereferenceable(28) %P, i32* nonnull align 64 " + "dereferenceable(4) " + "%P, i32* nonnull align 16 dereferenceable(12) %P)\n", + [](Instruction *I) { + CallInst *Assume = BuildAssumeFromInst(I); + Assume->insertBefore(I); + + RetainedKnowledgeMap Map; + fillMapFromAssume(*Assume, Map); + + AssertFindExactlyAttributes(Map, I->getOperand(0), + "(nonnull|align|dereferenceable)"); + AssertFindExactlyAttributes(Map, I->getOperand(1), + "(nonnull|align|dereferenceable)"); + AssertFindExactlyAttributes(Map, I->getOperand(2), + "(nonnull|align|dereferenceable)"); + AssertFindExactlyAttributes(Map, I->getOperand(3), + "(nonnull|align|dereferenceable)"); + AssertMapHasRightValue( + Map, {I->getOperand(0), Attribute::Dereferenceable}, {4, 48}); + AssertMapHasRightValue(Map, {I->getOperand(0), Attribute::Alignment}, + {8, 64}); + })); + Tests.push_back(std::make_pair( + "call void @func_many(i32* align 8 %P1) cold\n", [](Instruction *I) { + ShouldPreserveAllAttributes.setValue(true); + CallInst *Assume = BuildAssumeFromInst(I); + Assume->insertBefore(I); + + RetainedKnowledgeMap Map; + fillMapFromAssume(*Assume, Map); + + AssertFindExactlyAttributes( + Map, nullptr, "(nounwind|norecurse|willreturn|cold)"); + ShouldPreserveAllAttributes.setValue(false); + })); + Tests.push_back( + std::make_pair("call void @llvm.assume(i1 true)\n", [](Instruction *I) { + RetainedKnowledgeMap Map; + fillMapFromAssume(*cast(I), Map); + + AssertFindExactlyAttributes(Map, nullptr, ""); + ASSERT_TRUE(Map.empty()); + })); + Tests.push_back(std::make_pair( + "call void @func1(i32* readnone align 32 " + "dereferenceable(48) noalias %P, i32* " + "align 8 dereferenceable(28) %P1, i32* align 64 " + "dereferenceable(4) " + "%P2, i32* nonnull align 16 dereferenceable(12) %P3)\n", + [](Instruction *I) { + CallInst *Assume = BuildAssumeFromInst(I); + Assume->insertBefore(I); + + RetainedKnowledgeMap Map; + fillMapFromAssume(*Assume, Map); + + AssertFindExactlyAttributes(Map, I->getOperand(0), + "(readnone|align|dereferenceable|noalias)"); + AssertFindExactlyAttributes(Map, I->getOperand(1), + "(align|dereferenceable)"); + AssertFindExactlyAttributes(Map, I->getOperand(2), + "(align|dereferenceable)"); + AssertFindExactlyAttributes(Map, I->getOperand(3), + "(nonnull|align|dereferenceable)"); + AssertMapHasRightValue(Map, {I->getOperand(0), Attribute::Alignment}, + {32, 32}); + AssertMapHasRightValue( + Map, {I->getOperand(0), Attribute::Dereferenceable}, {48, 48}); + AssertMapHasRightValue( + Map, {I->getOperand(0), Attribute::NoAlias}, {0, 0}); + AssertMapHasRightValue( + Map, {I->getOperand(1), Attribute::Dereferenceable}, {28, 28}); + AssertMapHasRightValue(Map, {I->getOperand(1), Attribute::Alignment}, + {8, 8}); + AssertMapHasRightValue(Map, {I->getOperand(2), Attribute::Alignment}, + {64, 64}); + AssertMapHasRightValue( + Map, {I->getOperand(2), Attribute::Dereferenceable}, {4, 4}); + AssertMapHasRightValue(Map, {I->getOperand(3), Attribute::Alignment}, + {16, 16}); + AssertMapHasRightValue( + Map, {I->getOperand(3), Attribute::Dereferenceable}, {12, 12}); + })); + + /// Keep this test last as it modifies the function. + Tests.push_back(std::make_pair( + "call void @func(i32* nonnull align 4 dereferenceable(16) %P, i32* align " + "8 noalias %P1)\n", + [](Instruction *I) { + CallInst *Assume = BuildAssumeFromInst(I); + Assume->insertBefore(I); + + RetainedKnowledgeMap Map; + fillMapFromAssume(*Assume, Map); + + Value *New = I->getFunction()->getArg(3); + Value *Old = I->getOperand(0); + AssertFindExactlyAttributes(Map, New, ""); + AssertFindExactlyAttributes(Map, Old, + "(nonnull|align|dereferenceable)"); + Old->replaceAllUsesWith(New); + Map.clear(); + fillMapFromAssume(*Assume, Map); + AssertFindExactlyAttributes(Map, New, + "(nonnull|align|dereferenceable)"); + AssertFindExactlyAttributes(Map, Old, ""); + })); + RunTest(Head, Tail, Tests); +} diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index e08ea0c701fe09..3a25620f744a6f 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -50,6 +50,7 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) { EXPECT_EQ(7u, VecBB->size()); EXPECT_EQ(2u, VecBB->getNumPredecessors()); EXPECT_EQ(2u, VecBB->getNumSuccessors()); + EXPECT_EQ(&*Plan, VecBB->getPlan()); auto Iter = VecBB->begin(); VPInstruction *Phi = dyn_cast(&*Iter++); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanPredicatorTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanPredicatorTest.cpp index 81ed3cee3d2a81..dccbe9c4cf6534 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanPredicatorTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanPredicatorTest.cpp @@ -102,6 +102,13 @@ TEST_F(VPlanPredicatorTest, BasicPredicatorTest) { EXPECT_EQ(InnerLoopLinSucc, OuterIf); EXPECT_EQ(OuterIfLinSucc, InnerIf); EXPECT_EQ(InnerIfLinSucc, InnerLoopLatch); + + // Check that the containing VPlan is set correctly. + EXPECT_EQ(&*Plan, InnerLoopLinSucc->getPlan()); + EXPECT_EQ(&*Plan, OuterIfLinSucc->getPlan()); + EXPECT_EQ(&*Plan, InnerIfLinSucc->getPlan()); + EXPECT_EQ(&*Plan, InnerIf->getPlan()); + EXPECT_EQ(&*Plan, InnerLoopLatch->getPlan()); } // Test generation of Not and Or during predication. diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 67936a83efaf68..855016a1248836 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -86,5 +86,95 @@ TEST(VPInstructionTest, moveAfter) { EXPECT_EQ(I3->getParent(), I4->getParent()); } +TEST(VPBasicBlockTest, getPlan) { + { + VPBasicBlock *VPBB1 = new VPBasicBlock(); + VPBasicBlock *VPBB2 = new VPBasicBlock(); + VPBasicBlock *VPBB3 = new VPBasicBlock(); + VPBasicBlock *VPBB4 = new VPBasicBlock(); + + // VPBB1 + // / \ + // VPBB2 VPBB3 + // \ / + // VPBB4 + VPBlockUtils::connectBlocks(VPBB1, VPBB2); + VPBlockUtils::connectBlocks(VPBB1, VPBB3); + VPBlockUtils::connectBlocks(VPBB2, VPBB4); + VPBlockUtils::connectBlocks(VPBB3, VPBB4); + + VPlan Plan; + Plan.setEntry(VPBB1); + + EXPECT_EQ(&Plan, VPBB1->getPlan()); + EXPECT_EQ(&Plan, VPBB2->getPlan()); + EXPECT_EQ(&Plan, VPBB3->getPlan()); + EXPECT_EQ(&Plan, VPBB4->getPlan()); + } + + { + // Region block is entry into VPlan. + VPBasicBlock *R1BB1 = new VPBasicBlock(); + VPBasicBlock *R1BB2 = new VPBasicBlock(); + VPRegionBlock *R1 = new VPRegionBlock(R1BB1, R1BB2, "R1"); + VPBlockUtils::connectBlocks(R1BB1, R1BB2); + + VPlan Plan; + Plan.setEntry(R1); + EXPECT_EQ(&Plan, R1->getPlan()); + EXPECT_EQ(&Plan, R1BB1->getPlan()); + EXPECT_EQ(&Plan, R1BB2->getPlan()); + } + + { + // VPBasicBlock is the entry into the VPlan, followed by a region. + VPBasicBlock *R1BB1 = new VPBasicBlock(); + VPBasicBlock *R1BB2 = new VPBasicBlock(); + VPRegionBlock *R1 = new VPRegionBlock(R1BB1, R1BB2, "R1"); + VPBlockUtils::connectBlocks(R1BB1, R1BB2); + + VPBasicBlock *VPBB1 = new VPBasicBlock(); + VPBlockUtils::connectBlocks(VPBB1, R1); + + VPlan Plan; + Plan.setEntry(VPBB1); + EXPECT_EQ(&Plan, VPBB1->getPlan()); + EXPECT_EQ(&Plan, R1->getPlan()); + EXPECT_EQ(&Plan, R1BB1->getPlan()); + EXPECT_EQ(&Plan, R1BB2->getPlan()); + } + + { + VPBasicBlock *R1BB1 = new VPBasicBlock(); + VPBasicBlock *R1BB2 = new VPBasicBlock(); + VPRegionBlock *R1 = new VPRegionBlock(R1BB1, R1BB2, "R1"); + VPBlockUtils::connectBlocks(R1BB1, R1BB2); + + VPBasicBlock *R2BB1 = new VPBasicBlock(); + VPBasicBlock *R2BB2 = new VPBasicBlock(); + VPRegionBlock *R2 = new VPRegionBlock(R2BB1, R2BB2, "R2"); + VPBlockUtils::connectBlocks(R2BB1, R2BB2); + + VPBasicBlock *VPBB1 = new VPBasicBlock(); + VPBlockUtils::connectBlocks(VPBB1, R1); + VPBlockUtils::connectBlocks(VPBB1, R2); + + VPBasicBlock *VPBB2 = new VPBasicBlock(); + VPBlockUtils::connectBlocks(R1, VPBB2); + VPBlockUtils::connectBlocks(R2, VPBB2); + + VPlan Plan; + Plan.setEntry(VPBB1); + EXPECT_EQ(&Plan, VPBB1->getPlan()); + EXPECT_EQ(&Plan, R1->getPlan()); + EXPECT_EQ(&Plan, R1BB1->getPlan()); + EXPECT_EQ(&Plan, R1BB2->getPlan()); + EXPECT_EQ(&Plan, R2->getPlan()); + EXPECT_EQ(&Plan, R2BB1->getPlan()); + EXPECT_EQ(&Plan, R2BB2->getPlan()); + EXPECT_EQ(&Plan, VPBB2->getPlan()); + } +} + } // namespace } // namespace llvm diff --git a/llvm/utils/TableGen/CodeGenHwModes.h b/llvm/utils/TableGen/CodeGenHwModes.h index 1ff2faaa0e5245..55507cbca37ddd 100644 --- a/llvm/utils/TableGen/CodeGenHwModes.h +++ b/llvm/utils/TableGen/CodeGenHwModes.h @@ -12,6 +12,7 @@ #define LLVM_UTILS_TABLEGEN_CODEGENHWMODES_H #include "llvm/ADT/StringMap.h" +#include #include #include #include diff --git a/llvm/utils/TableGen/CodeGenInstruction.h b/llvm/utils/TableGen/CodeGenInstruction.h index 1f08ce481a8989..af851a11676b12 100644 --- a/llvm/utils/TableGen/CodeGenInstruction.h +++ b/llvm/utils/TableGen/CodeGenInstruction.h @@ -16,6 +16,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/SMLoc.h" +#include #include #include #include diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn index ec2db1a9e2ded2..db789dcd880bf9 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn @@ -59,6 +59,7 @@ static_library("clangd") { "ClangdServer.cpp", "CodeComplete.cpp", "CodeCompletionStrings.cpp", + "CollectMacros.cpp", "CompileCommands.cpp", "Compiler.cpp", "Context.cpp", diff --git a/llvm/utils/gn/secondary/clang/test/BUILD.gn b/llvm/utils/gn/secondary/clang/test/BUILD.gn index 2b5ecb166c249f..874891e89c81a2 100644 --- a/llvm/utils/gn/secondary/clang/test/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/test/BUILD.gn @@ -75,9 +75,9 @@ write_lit_config("lit_site_cfg") { } if (llvm_enable_zlib) { - extra_values += [ "LLVM_ENABLE_ZLIB=1" ] + extra_values += [ "HAVE_LIBZ=1" ] } else { - extra_values += [ "LLVM_ENABLE_ZLIB=0" ] # Must be 0. + extra_values += [ "HAVE_LIBZ=0" ] # Must be 0. } if (host_cpu == "x64") { diff --git a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn index b0c690193a2e6f..c03399193babac 100644 --- a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn @@ -85,8 +85,8 @@ write_cmake_config("lit_common_configured") { } if (llvm_enable_zlib) { - values += [ "LLVM_ENABLE_ZLIB=1" ] + values += [ "HAVE_LIBZ=1" ] } else { - values += [ "LLVM_ENABLE_ZLIB=0" ] + values += [ "HAVE_LIBZ=0" ] } } diff --git a/llvm/utils/gn/secondary/lld/test/BUILD.gn b/llvm/utils/gn/secondary/lld/test/BUILD.gn index 1da191cba15170..5408ea8b6b24b3 100644 --- a/llvm/utils/gn/secondary/lld/test/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/test/BUILD.gn @@ -49,9 +49,9 @@ write_lit_cfg("lit_site_cfg") { } if (llvm_enable_zlib) { - extra_values += [ "LLVM_ENABLE_ZLIB=1" ] + extra_values += [ "HAVE_LIBZ=1" ] } else { - extra_values += [ "LLVM_ENABLE_ZLIB=0" ] # Must be 0. + extra_values += [ "HAVE_LIBZ=0" ] # Must be 0. } if (current_cpu == "x64" || current_cpu == "arm64" || diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index 9f94540e4cff83..f8e1026475f5db 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -206,6 +206,7 @@ write_cmake_config("config") { "HAVE_ISATTY=", "HAVE_LIBPTHREAD=", "HAVE_PTHREAD_SETNAME_NP=", + "HAVE_LIBZ=", "HAVE_PREAD=", "HAVE_PTHREAD_GETSPECIFIC=", "HAVE_PTHREAD_H=", @@ -224,6 +225,7 @@ write_cmake_config("config") { "HAVE_SYS_TIME_H=", "HAVE_TERMIOS_H=", "HAVE_UNISTD_H=", + "HAVE_ZLIB_H=", "HAVE__CHSIZE_S=1", "HAVE__UNWIND_BACKTRACE=", "stricmp=_stricmp", @@ -242,6 +244,7 @@ write_cmake_config("config") { "HAVE_ISATTY=1", "HAVE_LIBPTHREAD=1", "HAVE_PTHREAD_SETNAME_NP=1", + "HAVE_LIBZ=1", "HAVE_PREAD=1", "HAVE_PTHREAD_GETSPECIFIC=1", "HAVE_PTHREAD_H=1", @@ -260,6 +263,7 @@ write_cmake_config("config") { "HAVE_SYS_TIME_H=1", "HAVE_TERMIOS_H=1", "HAVE_UNISTD_H=1", + "HAVE_ZLIB_H=1", "HAVE__CHSIZE_S=", "HAVE__UNWIND_BACKTRACE=1", "stricmp=", diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 328d819fd1f057..82bf99de1bc9b9 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -68,6 +68,7 @@ static_library("Analysis") { "LoopAnalysisManager.cpp", "LoopCacheAnalysis.cpp", "LoopInfo.cpp", + "LoopNestAnalysis.cpp", "LoopPass.cpp", "LoopUnrollAnalyzer.cpp", "MemDepPrinter.cpp", diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn index f5e0b1222c826b..7aaaf867e11982 100644 --- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn @@ -166,9 +166,9 @@ write_lit_config("lit_site_cfg") { } if (llvm_enable_zlib) { - extra_values += [ "LLVM_ENABLE_ZLIB=1" ] + extra_values += [ "HAVE_LIBZ=1" ] } else { - extra_values += [ "LLVM_ENABLE_ZLIB=0" ] # Must be 0. + extra_values += [ "HAVE_LIBZ=0" ] # Must be 0. } } diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn index 47bc50212651df..d73088885aa869 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn @@ -25,6 +25,7 @@ unittest("AnalysisTests") { "LazyCallGraphTest.cpp", "LoadsTest.cpp", "LoopInfoTest.cpp", + "LoopNestTest.cpp", "MemoryBuiltinsTest.cpp", "MemorySSATest.cpp", "OrderedInstructionsTest.cpp", diff --git a/mlir/include/mlir/IR/Matchers.h b/mlir/include/mlir/IR/Matchers.h index 6321e88c9c109e..d9979b8467ee0d 100644 --- a/mlir/include/mlir/IR/Matchers.h +++ b/mlir/include/mlir/IR/Matchers.h @@ -93,9 +93,8 @@ struct constant_int_op_binder { return false; auto type = op->getResult(0).getType(); - if (type.isSignlessIntOrIndex()) { + if (type.isa() || type.isa()) return attr_value_binder(bind_value).match(attr); - } if (type.isa() || type.isa()) { if (auto splatAttr = attr.dyn_cast()) { return attr_value_binder(bind_value) diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index 25c0238946a938..d431d4ebabf4c0 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -339,6 +339,30 @@ def I16 : I<16>; def I32 : I<32>; def I64 : I<64>; +// Unsigned integer types. +// Any unsigned integer type irrespective of its width. +def AnyUnsignedInteger : Type< + CPred<"$_self.isUnsignedInteger()">, "unsigned integer">; + +// Unsigned integer type of a specific width. +class UI + : Type, + width # "-bit unsigned integer">, + BuildableType<"$_builder.getIntegerType(" # width # + ", /*isSigned=*/false)"> { + int bitwidth = width; +} + +class UnsignedIntOfWidths widths> : + AnyTypeOf), + StrJoinInt.result # "-bit unsigned integer">; + +def UI1 : UI<1>; +def UI8 : UI<8>; +def UI16 : UI<16>; +def UI32 : UI<32>; +def UI64 : UI<64>; + // Floating point types. // Any float type irrespective of its width. diff --git a/mlir/include/mlir/IR/StandardTypes.h b/mlir/include/mlir/IR/StandardTypes.h index 9bb9a8c06234d9..cd5ba07b689d7e 100644 --- a/mlir/include/mlir/IR/StandardTypes.h +++ b/mlir/include/mlir/IR/StandardTypes.h @@ -328,8 +328,9 @@ class TensorType : public ShapedType { // Note: Non standard/builtin types are allowed to exist within tensor // types. Dialects are expected to verify that tensor types have a valid // element type within that dialect. - return type.isSignlessIntOrFloat() || type.isa() || - type.isa() || type.isa() || + return type.isa() || type.isa() || + type.isa() || type.isa() || + type.isa() || (type.getKind() > Type::Kind::LAST_STANDARD_TYPE); } diff --git a/mlir/include/mlir/IR/Types.h b/mlir/include/mlir/IR/Types.h index 40f1d481876996..eccc90cdae0c61 100644 --- a/mlir/include/mlir/IR/Types.h +++ b/mlir/include/mlir/IR/Types.h @@ -169,6 +169,9 @@ class Type { /// Return true of this is a signless integer or a float type. bool isSignlessIntOrFloat(); + /// Return true of this is an integer(of any signedness) or a float type. + bool isIntOrFloat(); + /// Print the current type. void print(raw_ostream &os); void dump(); diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp index b76c0c0770a36f..14635a1447358f 100644 --- a/mlir/lib/Analysis/Utils.cpp +++ b/mlir/lib/Analysis/Utils.cpp @@ -314,7 +314,7 @@ static unsigned getMemRefEltSizeInBytes(MemRefType memRefType) { auto elementType = memRefType.getElementType(); unsigned sizeInBits; - if (elementType.isSignlessIntOrFloat()) { + if (elementType.isIntOrFloat()) { sizeInBits = elementType.getIntOrFloatBitWidth(); } else { auto vectorType = elementType.cast(); @@ -358,7 +358,7 @@ Optional mlir::getMemRefSizeInBytes(MemRefType memRefType) { if (!memRefType.hasStaticShape()) return None; auto elementType = memRefType.getElementType(); - if (!elementType.isSignlessIntOrFloat() && !elementType.isa()) + if (!elementType.isIntOrFloat() && !elementType.isa()) return None; uint64_t sizeInBytes = getMemRefEltSizeInBytes(memRefType); diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp index ed9400fc2ad06a..293d9351214718 100644 --- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp +++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp @@ -572,6 +572,7 @@ static LogicalResult processParallelLoop(ParallelOp parallelOp, gpu::LaunchOp launchOp, BlockAndValueMapping &cloningMap, SmallVectorImpl &worklist, + DenseMap &bounds, PatternRewriter &rewriter) { // TODO(herhut): Verify that this is a valid GPU mapping. // processor ids: 0-2 block [x/y/z], 3-5 -> thread [x/y/z], 6-> sequential @@ -631,22 +632,27 @@ static LogicalResult processParallelLoop(ParallelOp parallelOp, // conditional. If the lower-bound is constant or defined before the // launch, we can use it in the launch bounds. Otherwise fail. if (!launchIndependent(lowerBound) && - !isa(lowerBound.getDefiningOp())) + !isa_and_nonnull(lowerBound.getDefiningOp())) return failure(); // The step must also be constant or defined outside of the loop nest. - if (!launchIndependent(step) && !isa(step.getDefiningOp())) + if (!launchIndependent(step) && + !isa_and_nonnull(step.getDefiningOp())) return failure(); // If the upper-bound is constant or defined before the launch, we can // use it in the launch bounds directly. Otherwise try derive a bound. - bool boundIsPrecise = launchIndependent(upperBound) || - isa(upperBound.getDefiningOp()); + bool boundIsPrecise = + launchIndependent(upperBound) || + isa_and_nonnull(upperBound.getDefiningOp()); { PatternRewriter::InsertionGuard guard(rewriter); rewriter.setInsertionPoint(launchOp); if (!boundIsPrecise) { upperBound = deriveStaticUpperBound(upperBound, rewriter); - if (!upperBound) - return failure(); + if (!upperBound) { + return parallelOp.emitOpError() + << "cannot derive loop-invariant upper bound for number " + "of iterations"; + } } // Compute the number of iterations needed. We compute this as an // affine expression ceilDiv (upperBound - lowerBound) step. We use @@ -654,8 +660,8 @@ static LogicalResult processParallelLoop(ParallelOp parallelOp, AffineMap stepMap = AffineMap::get(0, 3, ((rewriter.getAffineSymbolExpr(0) - - rewriter.getAffineSymbolExpr(1)).ceilDiv( - rewriter.getAffineSymbolExpr(2)))); + rewriter.getAffineSymbolExpr(1)) + .ceilDiv(rewriter.getAffineSymbolExpr(2)))); Value launchBound = rewriter.create( loc, annotation.boundMap.compose(stepMap), ValueRange{ @@ -664,7 +670,12 @@ static LogicalResult processParallelLoop(ParallelOp parallelOp, ensureLaunchIndependent( cloningMap.lookupOrDefault(lowerBound)), ensureLaunchIndependent(cloningMap.lookupOrDefault(step))}); - launchOp.setOperand(annotation.processor, launchBound); + if (bounds.find(annotation.processor) != bounds.end()) { + return parallelOp.emitOpError() + << "cannot redefine the bound for processor " + << annotation.processor; + } + bounds[annotation.processor] = launchBound; } if (!boundIsPrecise) { // We are using an approximation, create a surrounding conditional. @@ -746,9 +757,10 @@ ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp, rewriter.setInsertionPointToStart(&launchOp.body().front()); BlockAndValueMapping cloningMap; + llvm::DenseMap launchBounds; SmallVector worklist; if (failed(processParallelLoop(parallelOp, launchOp, cloningMap, worklist, - rewriter))) + launchBounds, rewriter))) return matchFailure(); // Whether we have seen any side-effects. Reset when leaving an inner scope. @@ -770,8 +782,9 @@ ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp, // A nested loop.parallel needs insertion of code to compute indices. // Insert that now. This will also update the worklist with the loops // body. - processParallelLoop(nestedParallel, launchOp, cloningMap, worklist, - rewriter); + if (failed(processParallelLoop(nestedParallel, launchOp, cloningMap, + worklist, launchBounds, rewriter))) + return matchFailure(); } else if (op == launchOp.getOperation()) { // Found our sentinel value. We have finished the operations from one // nesting level, pop one level back up. @@ -791,6 +804,11 @@ ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp, } } + // Now that we succeeded creating the launch operation, also update the + // bounds. + for (auto bound : launchBounds) + launchOp.setOperand(std::get<0>(bound), std::get<1>(bound)); + rewriter.eraseOp(parallelOp); return matchSuccess(); } diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 140f533e0b1558..ac2648846b2436 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -1372,17 +1372,18 @@ void ModulePrinter::printAttribute(Attribute attr, /// Print the integer element of the given DenseElementsAttr at 'index'. static void printDenseIntElement(DenseElementsAttr attr, raw_ostream &os, - unsigned index) { + unsigned index, bool isSigned) { APInt value = *std::next(attr.int_value_begin(), index); if (value.getBitWidth() == 1) os << (value.getBoolValue() ? "true" : "false"); else - value.print(os, /*isSigned=*/true); + value.print(os, isSigned); } /// Print the float element of the given DenseElementsAttr at 'index'. static void printDenseFloatElement(DenseElementsAttr attr, raw_ostream &os, - unsigned index) { + unsigned index, bool isSigned) { + assert(isSigned && "floating point values are always signed"); APFloat value = *std::next(attr.float_value_begin(), index); printFloatValue(value, os); } @@ -1392,6 +1393,7 @@ void ModulePrinter::printDenseElementsAttr(DenseElementsAttr attr, auto type = attr.getType(); auto shape = type.getShape(); auto rank = type.getRank(); + bool isSigned = !type.getElementType().isUnsignedInteger(); // The function used to print elements of this attribute. auto printEltFn = type.getElementType().isa() @@ -1400,7 +1402,7 @@ void ModulePrinter::printDenseElementsAttr(DenseElementsAttr attr, // Special case for 0-d and splat tensors. if (attr.isSplat()) { - printEltFn(attr, os, 0); + printEltFn(attr, os, 0, isSigned); return; } @@ -1452,7 +1454,7 @@ void ModulePrinter::printDenseElementsAttr(DenseElementsAttr attr, while (openBrackets++ < rank) os << '['; openBrackets = rank; - printEltFn(attr, os, idx); + printEltFn(attr, os, idx, isSigned); bumpCounter(); } while (openBrackets-- > 0) diff --git a/mlir/lib/IR/Attributes.cpp b/mlir/lib/IR/Attributes.cpp index 5beb12a59940bc..4526d7dc10be79 100644 --- a/mlir/lib/IR/Attributes.cpp +++ b/mlir/lib/IR/Attributes.cpp @@ -608,7 +608,7 @@ DenseElementsAttr::FloatElementIterator::FloatElementIterator( DenseElementsAttr DenseElementsAttr::get(ShapedType type, ArrayRef values) { - assert(type.getElementType().isSignlessIntOrFloat() && + assert(type.getElementType().isIntOrFloat() && "expected int or float element type"); assert(hasSameElementsOrSplat(type, values)); diff --git a/mlir/lib/IR/StandardTypes.cpp b/mlir/lib/IR/StandardTypes.cpp index 30d5bbcc7b3ced..774f80a46de3a8 100644 --- a/mlir/lib/IR/StandardTypes.cpp +++ b/mlir/lib/IR/StandardTypes.cpp @@ -84,6 +84,8 @@ bool Type::isSignlessIntOrFloat() { return isSignlessInteger() || isa(); } +bool Type::isIntOrFloat() { return isa() || isa(); } + //===----------------------------------------------------------------------===// // Integer Type //===----------------------------------------------------------------------===// @@ -147,13 +149,10 @@ const llvm::fltSemantics &FloatType::getFloatSemantics() { } unsigned Type::getIntOrFloatBitWidth() { - assert(isSignlessIntOrFloat() && "only ints and floats have a bitwidth"); - if (auto intType = dyn_cast()) { + assert(isIntOrFloat() && "only integers and floats have a bitwidth"); + if (auto intType = dyn_cast()) return intType.getWidth(); - } - - auto floatType = cast(); - return floatType.getWidth(); + return cast().getWidth(); } //===----------------------------------------------------------------------===// @@ -202,7 +201,7 @@ int64_t ShapedType::getSizeInBits() const { "cannot get the bit size of an aggregate with a dynamic shape"); auto elementType = getElementType(); - if (elementType.isSignlessIntOrFloat()) + if (elementType.isIntOrFloat()) return elementType.getIntOrFloatBitWidth() * getNumElements(); // Tensors can have vectors and other tensors as elements, other shaped types @@ -373,7 +372,7 @@ MemRefType MemRefType::getImpl(ArrayRef shape, Type elementType, auto *context = elementType.getContext(); // Check that memref is formed from allowed types. - if (!elementType.isSignlessIntOrFloat() && !elementType.isa() && + if (!elementType.isIntOrFloat() && !elementType.isa() && !elementType.isa()) return emitOptionalError(location, "invalid memref element type"), MemRefType(); @@ -451,7 +450,7 @@ LogicalResult UnrankedMemRefType::verifyConstructionInvariants(Location loc, Type elementType, unsigned memorySpace) { // Check that memref is formed from allowed types. - if (!elementType.isSignlessIntOrFloat() && !elementType.isa() && + if (!elementType.isIntOrFloat() && !elementType.isa() && !elementType.isa()) return emitError(loc, "invalid memref element type"); return success(); diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp index 668fb694d8fd52..661bddf8107a0d 100644 --- a/mlir/lib/Parser/Parser.cpp +++ b/mlir/lib/Parser/Parser.cpp @@ -1102,7 +1102,7 @@ Type Parser::parseMemRefType() { return nullptr; // Check that memref is formed from allowed types. - if (!elementType.isSignlessIntOrFloat() && !elementType.isa() && + if (!elementType.isIntOrFloat() && !elementType.isa() && !elementType.isa()) return emitError(typeLoc, "invalid memref element type"), nullptr; diff --git a/mlir/lib/Transforms/DialectConversion.cpp b/mlir/lib/Transforms/DialectConversion.cpp index 8e1a9cc942bdcf..ed81b588875ca8 100644 --- a/mlir/lib/Transforms/DialectConversion.cpp +++ b/mlir/lib/Transforms/DialectConversion.cpp @@ -51,9 +51,11 @@ computeConversionSet(iterator_range region, : Optional(); if (legalityInfo && legalityInfo->isRecursivelyLegal) continue; - for (auto ®ion : op.getRegions()) - computeConversionSet(region.getBlocks(), region.getLoc(), toConvert, - target); + for (auto ®ion : op.getRegions()) { + if (failed(computeConversionSet(region.getBlocks(), region.getLoc(), + toConvert, target))) + return failure(); + } } // Recurse to children that haven't been visited. diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp index ef1af5d71aa840..bcb0c16ba77ba3 100644 --- a/mlir/lib/Transforms/LoopFusion.cpp +++ b/mlir/lib/Transforms/LoopFusion.cpp @@ -869,7 +869,7 @@ static unsigned getMemRefEltSizeInBytes(MemRefType memRefType) { auto elementType = memRefType.getElementType(); unsigned sizeInBits; - if (elementType.isSignlessIntOrFloat()) { + if (elementType.isIntOrFloat()) { sizeInBits = elementType.getIntOrFloatBitWidth(); } else { auto vectorType = elementType.cast(); diff --git a/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir b/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir index 2a440a4456ba72..24ea0320f0ac36 100644 --- a/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir +++ b/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -convert-parallel-loops-to-gpu -split-input-file %s | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -convert-parallel-loops-to-gpu -split-input-file -verify-diagnostics %s | FileCheck %s -dump-input-on-failure // 2-d parallel loop mapped to block.y and block.x @@ -299,3 +299,55 @@ module { // CHECK: return // CHECK: } // CHECK: } + +// ----- + +// Mapping to the same processor twice. + +func @parallel_double_map(%arg0 : index, %arg1 : index, %arg2 : index, + %arg3 : index, + %buf : memref, + %res : memref) { + %four = constant 4 : index + // expected-error@+2 {{cannot redefine the bound for processor 1}} + // expected-error@+1 {{failed to legalize operation 'loop.parallel'}} + loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) + step (%four, %four) { + } { mapping = [ + {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, + {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} + ] } + return +} + +// ----- + +// Loop with loop-variant upper bound. + +func @parallel_loop_loop_variant_bound(%arg0 : index, %arg1 : index, %arg2 : index, + %arg3 : index, + %buf : memref, + %res : memref) { + %zero = constant 0 : index + %one = constant 1 : index + %four = constant 4 : index + // expected-error@+1 {{failed to legalize operation 'loop.parallel'}} + loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) + step (%four, %four) { + // expected-error@+1 {{cannot derive loop-invariant upper bound}} + loop.parallel (%si0, %si1) = (%zero, %zero) to (%i0, %i1) + step (%one, %one) { + %idx0 = addi %i0, %si0 : index + %idx1 = addi %i1, %si1 : index + %val = load %buf[%idx0, %idx1] : memref + store %val, %res[%idx1, %idx0] : memref + } { mapping = [ + {processor = 4, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, + {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} + ] } + } { mapping = [ + {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, + {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} + ] } + return +} diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir index bec1fbd4aca6fe..3baf0642e8b05c 100644 --- a/mlir/test/IR/parser.mlir +++ b/mlir/test/IR/parser.mlir @@ -616,6 +616,9 @@ func @splattensorattr() -> () { // CHECK: "splatBoolTensor"() {bar = dense : tensor} : () -> () "splatBoolTensor"(){bar = dense : tensor} : () -> () + // CHECK: "splatUIntTensor"() {bar = dense<222> : tensor<2x1x4xui8>} : () -> () + "splatUIntTensor"(){bar = dense<222> : tensor<2x1x4xui8>} : () -> () + // CHECK: "splatIntTensor"() {bar = dense<5> : tensor<2x1x4xi32>} : () -> () "splatIntTensor"(){bar = dense<5> : tensor<2x1x4xi32>} : () -> () diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp index 3e1b52718e2ee4..3b9efd6ecbdf4f 100644 --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -37,7 +37,7 @@ std::mutex *TrlTblMtx; HostPtrToTableMapTy *HostPtrToTableMap; std::mutex *TblMapMtx; -__attribute__((constructor(0))) void init() { +__attribute__((constructor(101))) void init() { DP("Init target library!\n"); RTLs = new RTLsTy(); RTLsMtx = new std::mutex(); @@ -47,7 +47,7 @@ __attribute__((constructor(0))) void init() { TblMapMtx = new std::mutex(); } -__attribute__((destructor(0))) void deinit() { +__attribute__((destructor(101))) void deinit() { DP("Deinit target library!\n"); delete RTLs; delete RTLsMtx;