diff --git a/clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h b/clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h index f58ff5bc44b214..335c333573f43b 100644 --- a/clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h +++ b/clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h @@ -47,14 +47,18 @@ AST_POLYMORPHIC_MATCHER( if (PrefixPosition == StringRef::npos) return false; Path = Path.drop_front(PrefixPosition + AbslPrefix.size()); - static const char *AbseilLibraries[] = { - "algorithm", "base", "container", "debugging", "flags", - "hash", "iterator", "memory", "meta", "numeric", - "random", "strings", "synchronization", "status", "time", - "types", "utility"}; - return std::any_of( - std::begin(AbseilLibraries), std::end(AbseilLibraries), - [&](const char *Library) { return Path.startswith(Library); }); + static const char *AbseilLibraries[] = {"algorithm", "base", + "container", "debugging", + "flags", "hash", + "iterator", "memory", + "meta", "numeric", + "random", "status", + "strings", "synchronization", + "time", "types", + "utility"}; + return llvm::any_of(AbseilLibraries, [&](const char *Library) { + return Path.startswith(Library); + }); } } // namespace ast_matchers diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 74ab21a5f7788f..d204e87c143b42 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -173,7 +173,8 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB, Callbacks *Callbacks) : ConfigProvider(Opts.ConfigProvider), TFS(TFS), DynamicIdx(Opts.BuildDynamicSymbolIndex - ? new FileIndex(Opts.HeavyweightDynamicSymbolIndex) + ? new FileIndex(Opts.HeavyweightDynamicSymbolIndex, + Opts.CollectMainFileRefs) : nullptr), GetClangTidyOptions(Opts.GetClangTidyOptions), SuggestMissingIncludes(Opts.SuggestMissingIncludes), diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index 1bc7d70eebaddc..7068cd5eb42179 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -111,6 +111,9 @@ class ClangdServer { /// on background threads. The index is stored in the project root. bool BackgroundIndex = false; + /// Store refs to main-file symbols in the index. + bool CollectMainFileRefs = false; + /// If set, use this index to augment code completion results. SymbolIndex *StaticIndex = nullptr; diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp index f73a6e58497274..9db814368a024d 100644 --- a/clang-tools-extra/clangd/FindTarget.cpp +++ b/clang-tools-extra/clangd/FindTarget.cpp @@ -100,7 +100,7 @@ CXXRecordDecl *resolveTypeToRecordDecl(const Type *T) { std::vector getMembersReferencedViaDependentName( const Type *T, llvm::function_ref NameFactory, - bool IsNonstaticMember) { + llvm::function_ref Filter) { if (!T) return {}; if (auto *ET = T->getAs()) { @@ -113,17 +113,22 @@ std::vector getMembersReferencedViaDependentName( return {}; RD = RD->getDefinition(); DeclarationName Name = NameFactory(RD->getASTContext()); - return RD->lookupDependentName(Name, [=](const NamedDecl *D) { - return IsNonstaticMember ? D->isCXXInstanceMember() - : !D->isCXXInstanceMember(); - }); + return RD->lookupDependentName(Name, Filter); } return {}; } -// Given the type T of a dependent expression that appears of the LHS of a "->", -// heuristically find a corresponding pointee type in whose scope we could look -// up the name appearing on the RHS. +const auto NonStaticFilter = [](const NamedDecl *D) { + return D->isCXXInstanceMember(); +}; +const auto StaticFilter = [](const NamedDecl *D) { + return !D->isCXXInstanceMember(); +}; +const auto ValueFilter = [](const NamedDecl *D) { return isa(D); }; + +// Given the type T of a dependent expression that appears of the LHS of a +// "->", heuristically find a corresponding pointee type in whose scope we +// could look up the name appearing on the RHS. const Type *getPointeeType(const Type *T) { if (!T) return nullptr; @@ -141,7 +146,7 @@ const Type *getPointeeType(const Type *T) { [](ASTContext &Ctx) { return Ctx.DeclarationNames.getCXXOperatorName(OO_Arrow); }, - /*IsNonStaticMember=*/true); + NonStaticFilter); if (ArrowOps.empty()) return nullptr; @@ -187,13 +192,12 @@ std::vector resolveExprToDecls(const Expr *E) { } return getMembersReferencedViaDependentName( BaseType, [ME](ASTContext &) { return ME->getMember(); }, - /*IsNonstaticMember=*/true); + NonStaticFilter); } if (const auto *RE = dyn_cast(E)) { return getMembersReferencedViaDependentName( RE->getQualifier()->getAsType(), - [RE](ASTContext &) { return RE->getDeclName(); }, - /*IsNonstaticMember=*/false); + [RE](ASTContext &) { return RE->getDeclName(); }, StaticFilter); } if (const auto *CE = dyn_cast(E)) { const auto *CalleeType = resolveExprToType(CE->getCallee()); @@ -291,7 +295,6 @@ const NamedDecl *getTemplatePattern(const NamedDecl *D) { // CXXDependentScopeMemberExpr, but some other constructs remain to be handled: // - DependentTemplateSpecializationType, // - DependentNameType -// - UnresolvedUsingValueDecl // - UnresolvedUsingTypenameDecl struct TargetFinder { using RelSet = DeclRelationSet; @@ -345,6 +348,15 @@ struct TargetFinder { } else if (const auto *NAD = dyn_cast(D)) { add(NAD->getUnderlyingDecl(), Flags | Rel::Underlying); Flags |= Rel::Alias; // continue with the alias + } else if (const UnresolvedUsingValueDecl *UUVD = + dyn_cast(D)) { + for (const NamedDecl *Target : getMembersReferencedViaDependentName( + UUVD->getQualifier()->getAsType(), + [UUVD](ASTContext &) { return UUVD->getNameInfo().getName(); }, + ValueFilter)) { + add(Target, Flags | Rel::Underlying); + } + Flags |= Rel::Alias; // continue with the alias } else if (const UsingShadowDecl *USD = dyn_cast(D)) { // Include the using decl, but don't traverse it. This may end up // including *all* shadows, which we don't want. diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 9936c67cb6e5b2..031a9c7bf5da31 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -345,7 +345,7 @@ locateASTReferent(SourceLocation CurLoc, const syntax::Token *TouchedIdentifier, // Give the underlying decl if navigation is triggered on a non-renaming // alias. - if (llvm::isa(D)) { + if (llvm::isa(D) || llvm::isa(D)) { // FIXME: address more complicated cases. TargetDecl(... Underlying) gives // all overload candidates, we only want the targeted one if the cursor is // on an using-alias usage, workround it with getDeclAtPosition. diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index 18037d694c11ed..2bac6ec39d308b 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -95,6 +95,7 @@ BackgroundIndex::BackgroundIndex( BackgroundIndexStorage::Factory IndexStorageFactory, Options Opts) : SwapIndex(std::make_unique()), TFS(TFS), CDB(CDB), ContextProvider(std::move(Opts.ContextProvider)), + CollectMainFileRefs(Opts.CollectMainFileRefs), Rebuilder(this, &IndexedSymbols, Opts.ThreadPoolSize), IndexStorageFactory(std::move(IndexStorageFactory)), Queue(std::move(Opts.OnProgress)), @@ -301,6 +302,7 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) { return false; // Skip files that haven't changed, without errors. return true; }; + IndexOpts.CollectMainFileRefs = CollectMainFileRefs; IndexFileIn Index; auto Action = createStaticIndexingAction( diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h index 72fe84466959fe..472603013a53ac 100644 --- a/clang-tools-extra/clangd/index/Background.h +++ b/clang-tools-extra/clangd/index/Background.h @@ -137,6 +137,8 @@ class BackgroundIndex : public SwapIndex { // file. Called with the empty string for other tasks. // (When called, the context from BackgroundIndex construction is active). std::function ContextProvider = nullptr; + // Whether to collect references to main-file-only symbols. + bool CollectMainFileRefs = false; }; /// Creates a new background index and starts its threads. @@ -188,6 +190,7 @@ class BackgroundIndex : public SwapIndex { const ThreadsafeFS &TFS; const GlobalCompilationDatabase &CDB; std::function ContextProvider; + bool CollectMainFileRefs; llvm::Error index(tooling::CompileCommand); diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp index 5f84545d7c73d1..dafec6742c2ca0 100644 --- a/clang-tools-extra/clangd/index/FileIndex.cpp +++ b/clang-tools-extra/clangd/index/FileIndex.cpp @@ -47,12 +47,13 @@ SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr PP, llvm::ArrayRef DeclsToIndex, const MainFileMacros *MacroRefsToIndex, const CanonicalIncludes &Includes, bool IsIndexMainAST, - llvm::StringRef Version) { + llvm::StringRef Version, bool CollectMainFileRefs) { SymbolCollector::Options CollectorOpts; CollectorOpts.CollectIncludePath = true; CollectorOpts.Includes = &Includes; CollectorOpts.CountReferences = false; CollectorOpts.Origin = SymbolOrigin::Dynamic; + CollectorOpts.CollectMainFileRefs = CollectMainFileRefs; index::IndexingOptions IndexOpts; // We only need declarations, because we don't count references. @@ -205,11 +206,11 @@ FileShardedIndex::getShard(llvm::StringRef Uri) const { return std::move(IF); } -SlabTuple indexMainDecls(ParsedAST &AST) { - return indexSymbols(AST.getASTContext(), AST.getPreprocessorPtr(), - AST.getLocalTopLevelDecls(), &AST.getMacros(), - AST.getCanonicalIncludes(), - /*IsIndexMainAST=*/true, AST.version()); +SlabTuple indexMainDecls(ParsedAST &AST, bool CollectMainFileRefs) { + return indexSymbols( + AST.getASTContext(), AST.getPreprocessorPtr(), + AST.getLocalTopLevelDecls(), &AST.getMacros(), AST.getCanonicalIncludes(), + /*IsIndexMainAST=*/true, AST.version(), CollectMainFileRefs); } SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, @@ -220,7 +221,8 @@ SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, AST.getTranslationUnitDecl()->decls().end()); return indexSymbols(AST, std::move(PP), DeclsToIndex, /*MainFileMacros=*/nullptr, Includes, - /*IsIndexMainAST=*/false, Version); + /*IsIndexMainAST=*/false, Version, + /*CollectMainFileRefs=*/false); } void FileSymbols::update(llvm::StringRef Key, @@ -371,8 +373,9 @@ FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle, llvm_unreachable("Unknown clangd::IndexType"); } -FileIndex::FileIndex(bool UseDex) +FileIndex::FileIndex(bool UseDex, bool CollectMainFileRefs) : MergedIndex(&MainFileIndex, &PreambleIndex), UseDex(UseDex), + CollectMainFileRefs(CollectMainFileRefs), PreambleIndex(std::make_unique()), MainFileIndex(std::make_unique()) {} @@ -415,7 +418,7 @@ void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version, } void FileIndex::updateMain(PathRef Path, ParsedAST &AST) { - auto Contents = indexMainDecls(AST); + auto Contents = indexMainDecls(AST, CollectMainFileRefs); MainFileSymbols.update( Path, std::make_unique(std::move(std::get<0>(Contents))), std::make_unique(std::move(std::get<1>(Contents))), diff --git a/clang-tools-extra/clangd/index/FileIndex.h b/clang-tools-extra/clangd/index/FileIndex.h index e6f8d1ef9e3d71..c7bc855bcb8e56 100644 --- a/clang-tools-extra/clangd/index/FileIndex.h +++ b/clang-tools-extra/clangd/index/FileIndex.h @@ -104,7 +104,7 @@ class FileSymbols { /// FIXME: Expose an interface to remove files that are closed. class FileIndex : public MergedIndex { public: - FileIndex(bool UseDex = true); + FileIndex(bool UseDex = true, bool CollectMainFileRefs = false); /// Update preamble symbols of file \p Path with all declarations in \p AST /// and macros in \p PP. @@ -118,6 +118,7 @@ class FileIndex : public MergedIndex { private: bool UseDex; // FIXME: this should be always on. + bool CollectMainFileRefs; // Contains information from each file's preamble only. Symbols and relations // are sharded per declaration file to deduplicate multiple symbols and reduce @@ -152,7 +153,7 @@ using SlabTuple = std::tuple; /// Retrieves symbols and refs of local top level decls in \p AST (i.e. /// `AST.getLocalTopLevelDecls()`). /// Exposed to assist in unit tests. -SlabTuple indexMainDecls(ParsedAST &AST); +SlabTuple indexMainDecls(ParsedAST &AST, bool CollectMainFileRefs = false); /// Index declarations from \p AST and macros from \p PP that are declared in /// included headers. diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index a3ceaa388cf9db..2e1f261ab18aee 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -334,12 +334,13 @@ bool SymbolCollector::handleDeclOccurrence( if (IsOnlyRef && !CollectRef) return true; - // Do not store references to main-file symbols. // Unlike other fields, e.g. Symbols (which use spelling locations), we use // file locations for references (as it aligns the behavior of clangd's // AST-based xref). // FIXME: we should try to use the file locations for other fields. - if (CollectRef && (!IsMainFileOnly || ND->isExternallyVisible()) && + if (CollectRef && + (!IsMainFileOnly || Opts.CollectMainFileRefs || + ND->isExternallyVisible()) && !isa(ND) && (Opts.RefsInHeaders || SM.getFileID(SM.getFileLoc(Loc)) == SM.getMainFileID())) diff --git a/clang-tools-extra/clangd/index/SymbolCollector.h b/clang-tools-extra/clangd/index/SymbolCollector.h index f66a71c2d59b10..9b30aeba95383c 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.h +++ b/clang-tools-extra/clangd/index/SymbolCollector.h @@ -78,6 +78,8 @@ class SymbolCollector : public index::IndexDataConsumer { /// Collect symbols local to main-files, such as static functions /// and symbols inside an anonymous namespace. bool CollectMainFileSymbols = true; + /// Collect references to main-file symbols. + bool CollectMainFileRefs = false; /// If set to true, SymbolCollector will collect doc for all symbols. /// Note that documents of symbols being indexed for completion will always /// be collected regardless of this option. diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 3d83f3652f3003..57dac600014d5e 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -450,6 +450,13 @@ opt EnableConfig{ init(true), }; +opt CollectMainFileRefs{ + "collect-main-file-refs", + cat(Misc), + desc("Store references to main-file-only symbols in the index"), + init(false), +}; + #if CLANGD_ENABLE_REMOTE opt RemoteIndexAddress{ "remote-index-address", @@ -682,6 +689,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var if (!ResourceDir.empty()) Opts.ResourceDir = ResourceDir; Opts.BuildDynamicSymbolIndex = EnableIndex; + Opts.CollectMainFileRefs = CollectMainFileRefs; std::unique_ptr StaticIdx; std::future AsyncIndexLoad; // Block exit while loading the index. if (EnableIndex && !IndexFile.empty()) { diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp index 06614872363f54..f9f584e8895f52 100644 --- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp @@ -229,6 +229,61 @@ TEST_F(BackgroundIndexTest, IndexTwoFiles) { FileURI("unittest:///root/B.cc")})); } +TEST_F(BackgroundIndexTest, MainFileRefs) { + MockFS FS; + FS.Files[testPath("root/A.h")] = R"cpp( + void header_sym(); + )cpp"; + FS.Files[testPath("root/A.cc")] = + "#include \"A.h\"\nstatic void main_sym() { (void)header_sym; }"; + + // Check the behaviour with CollectMainFileRefs = false (the default). + { + llvm::StringMap Storage; + size_t CacheHits = 0; + MemoryShardStorage MSS(Storage, CacheHits); + OverlayCDB CDB(/*Base=*/nullptr); + BackgroundIndex Idx(FS, CDB, [&](llvm::StringRef) { return &MSS; }, + /*Opts=*/{}); + + tooling::CompileCommand Cmd; + Cmd.Filename = testPath("root/A.cc"); + Cmd.Directory = testPath("root"); + Cmd.CommandLine = {"clang++", testPath("root/A.cc")}; + CDB.setCompileCommand(testPath("root/A.cc"), Cmd); + + ASSERT_TRUE(Idx.blockUntilIdleForTest()); + EXPECT_THAT( + runFuzzyFind(Idx, ""), + UnorderedElementsAre(AllOf(Named("header_sym"), NumReferences(1U)), + AllOf(Named("main_sym"), NumReferences(0U)))); + } + + // Check the behaviour with CollectMainFileRefs = true. + { + llvm::StringMap Storage; + size_t CacheHits = 0; + MemoryShardStorage MSS(Storage, CacheHits); + OverlayCDB CDB(/*Base=*/nullptr); + BackgroundIndex::Options Opts; + Opts.CollectMainFileRefs = true; + BackgroundIndex Idx( + FS, CDB, [&](llvm::StringRef) { return &MSS; }, Opts); + + tooling::CompileCommand Cmd; + Cmd.Filename = testPath("root/A.cc"); + Cmd.Directory = testPath("root"); + Cmd.CommandLine = {"clang++", testPath("root/A.cc")}; + CDB.setCompileCommand(testPath("root/A.cc"), Cmd); + + ASSERT_TRUE(Idx.blockUntilIdleForTest()); + EXPECT_THAT( + runFuzzyFind(Idx, ""), + UnorderedElementsAre(AllOf(Named("header_sym"), NumReferences(1U)), + AllOf(Named("main_sym"), NumReferences(1U)))); + } +} + TEST_F(BackgroundIndexTest, ShardStorageTest) { MockFS FS; FS.Files[testPath("root/A.h")] = R"cpp( diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp index 4c655c3338d203..5bfdaaf6c3434c 100644 --- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp @@ -207,6 +207,19 @@ TEST_F(TargetDeclTest, UsingDecl) { )cpp"; EXPECT_DECLS("MemberExpr", {"using X::foo", Rel::Alias}, {"int foo()", Rel::Underlying}); + + Code = R"cpp( + template + struct Base { + void waldo() {} + }; + template + struct Derived : Base { + using Base::[[waldo]]; + }; + )cpp"; + EXPECT_DECLS("UnresolvedUsingValueDecl", {"using Base::waldo", Rel::Alias}, + {"void waldo()", Rel::Underlying}); } TEST_F(TargetDeclTest, ConstructorInitList) { @@ -442,6 +455,28 @@ TEST_F(TargetDeclTest, Concept) { )cpp"; EXPECT_DECLS("ConceptSpecializationExpr", {"template concept Fooable = true;"}); + + // constrained-parameter + Code = R"cpp( + template + concept Fooable = true; + + template <[[Fooable]] T> + void bar(T t); + )cpp"; + EXPECT_DECLS("ConceptSpecializationExpr", + {"template concept Fooable = true;"}); + + // partial-concept-id + Code = R"cpp( + template + concept Fooable = true; + + template <[[Fooable]] T> + void bar(T t); + )cpp"; + EXPECT_DECLS("ConceptSpecializationExpr", + {"template concept Fooable = true;"}); } TEST_F(TargetDeclTest, FunctionTemplate) { diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp index 70a8e6832d02f0..d89db8f015cea0 100644 --- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -714,7 +714,6 @@ TEST_F(SymbolCollectorTest, Refs) { EXPECT_THAT(Refs, Not(Contains(Pair(findSymbol(Symbols, "NS").ID, _)))); EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "MACRO").ID, HaveRanges(Main.ranges("macro"))))); - // Symbols *only* in the main file: // - (a, b) externally visible and should have refs. // - (c, FUNC) externally invisible and had no refs collected. auto MainSymbols = @@ -723,6 +722,20 @@ TEST_F(SymbolCollectorTest, Refs) { EXPECT_THAT(Refs, Contains(Pair(findSymbol(MainSymbols, "b").ID, _))); EXPECT_THAT(Refs, Not(Contains(Pair(findSymbol(MainSymbols, "c").ID, _)))); EXPECT_THAT(Refs, Not(Contains(Pair(findSymbol(MainSymbols, "FUNC").ID, _)))); + + // Run the collector again with CollectMainFileRefs = true. + // We need to recreate InMemoryFileSystem because runSymbolCollector() + // calls MemoryBuffer::getMemBuffer(), which makes the buffers unusable + // after runSymbolCollector() exits. + InMemoryFileSystem = new llvm::vfs::InMemoryFileSystem(); + CollectorOpts.CollectMainFileRefs = true; + runSymbolCollector(Header.code(), + (Main.code() + SymbolsOnlyInMainCode.code()).str()); + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "a").ID, _))); + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "b").ID, _))); + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "c").ID, _))); + // However, references to main-file macros are not collected. + EXPECT_THAT(Refs, Not(Contains(Pair(findSymbol(Symbols, "FUNC").ID, _)))); } TEST_F(SymbolCollectorTest, MacroRefInHeader) { @@ -908,8 +921,9 @@ TEST_F(SymbolCollectorTest, HeaderAsMainFile) { $Foo[[Foo]] fo; } )"); - // The main file is normal .cpp file, we should collect the refs - // for externally visible symbols. + // We should collect refs to main-file symbols in all cases: + + // 1. The main file is normal .cpp file. TestFileName = testPath("foo.cpp"); runSymbolCollector("", Header.code()); EXPECT_THAT(Refs, @@ -918,7 +932,7 @@ TEST_F(SymbolCollectorTest, HeaderAsMainFile) { Pair(findSymbol(Symbols, "Func").ID, HaveRanges(Header.ranges("Func"))))); - // Run the .h file as main file, we should collect the refs. + // 2. Run the .h file as main file. TestFileName = testPath("foo.h"); runSymbolCollector("", Header.code(), /*ExtraArgs=*/{"-xobjective-c++-header"}); @@ -929,8 +943,7 @@ TEST_F(SymbolCollectorTest, HeaderAsMainFile) { Pair(findSymbol(Symbols, "Func").ID, HaveRanges(Header.ranges("Func"))))); - // Run the .hh file as main file (without "-x c++-header"), we should collect - // the refs as well. + // 3. Run the .hh file as main file (without "-x c++-header"). TestFileName = testPath("foo.hh"); runSymbolCollector("", Header.code()); EXPECT_THAT(Symbols, UnorderedElementsAre(QName("Foo"), QName("Func"))); diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp index 63e8c96daab842..d2337dcbd7b318 100644 --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -1087,66 +1087,78 @@ TEST(LocateSymbol, TextualDependent) { TEST(LocateSymbol, Alias) { const char *Tests[] = { - R"cpp( + R"cpp( template struct function {}; template using [[callback]] = function; c^allback foo; )cpp", - // triggered on non-definition of a renaming alias: should not give any - // underlying decls. - R"cpp( + // triggered on non-definition of a renaming alias: should not give any + // underlying decls. + R"cpp( class Foo {}; typedef Foo [[Bar]]; B^ar b; )cpp", - R"cpp( + R"cpp( class Foo {}; using [[Bar]] = Foo; // definition Ba^r b; )cpp", - // triggered on the underlying decl of a renaming alias. - R"cpp( + // triggered on the underlying decl of a renaming alias. + R"cpp( class [[Foo]]; using Bar = Fo^o; )cpp", - // triggered on definition of a non-renaming alias: should give underlying - // decls. - R"cpp( + // triggered on definition of a non-renaming alias: should give underlying + // decls. + R"cpp( namespace ns { class [[Foo]] {}; } using ns::F^oo; )cpp", - R"cpp( + R"cpp( namespace ns { int [[x]](char); int [[x]](double); } using ns::^x; )cpp", - R"cpp( + R"cpp( namespace ns { int [[x]](char); int x(double); } using ns::x; int y = ^x('a'); )cpp", - R"cpp( + R"cpp( namespace ns { class [[Foo]] {}; } using ns::Foo; F^oo f; )cpp", - // other cases that don't matter much. - R"cpp( + // other cases that don't matter much. + R"cpp( class Foo {}; typedef Foo [[Ba^r]]; )cpp", - R"cpp( + R"cpp( class Foo {}; using [[B^ar]] = Foo; )cpp", + + // Member of dependent base + R"cpp( + template + struct Base { + void [[waldo]]() {} + }; + template + struct Derived : Base { + using Base::w^aldo; + }; + )cpp", }; for (const auto* Case : Tests) { diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 3dcfc9fee629ac..6f07b92f253230 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -1777,8 +1777,17 @@ DEF_TRAVERSE_DECL(TemplateTypeParmDecl, { // D is the "T" in something like "template class vector;" if (D->getTypeForDecl()) TRY_TO(TraverseType(QualType(D->getTypeForDecl(), 0))); - if (const auto *TC = D->getTypeConstraint()) - TRY_TO(TraverseConceptReference(*TC)); + if (const auto *TC = D->getTypeConstraint()) { + if (Expr *IDC = TC->getImmediatelyDeclaredConstraint()) { + TRY_TO(TraverseStmt(IDC)); + } else { + // Avoid traversing the ConceptReference in the TypeCosntraint + // if we have an immediately-declared-constraint, otherwise + // we'll end up visiting the concept and the arguments in + // the TC twice. + TRY_TO(TraverseConceptReference(*TC)); + } + } if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) TRY_TO(TraverseTypeLoc(D->getDefaultArgumentInfo()->getTypeLoc())); }) diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index b79ed41284ace8..73c60780041572 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -298,6 +298,13 @@ BUILTIN(__builtin_altivec_vrldmi, "V2ULLiV2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vrlwnm, "V4UiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vrldnm, "V2ULLiV2ULLiV2ULLi", "") +// P10 Vector Extract with Mask built-ins. +BUILTIN(__builtin_altivec_vextractbm, "UiV16Uc", "") +BUILTIN(__builtin_altivec_vextracthm, "UiV8Us", "") +BUILTIN(__builtin_altivec_vextractwm, "UiV4Ui", "") +BUILTIN(__builtin_altivec_vextractdm, "UiV2ULLi", "") +BUILTIN(__builtin_altivec_vextractqm, "UiV1ULLLi", "") + // P10 Vector Parallel Bits built-ins. BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "") diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index fd21285b1f7929..a63fae5b5f726c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6038,9 +6038,8 @@ def err_func_def_incomplete_result : Error< def err_atomic_specifier_bad_type : Error<"_Atomic cannot be applied to " "%select{incomplete |array |function |reference |atomic |qualified " - "|sizeless ||integer |integer }0type " - "%1 %select{|||||||which is not trivially copyable|with less than " - "1 byte of precision|with a non power of 2 precision}0">; + "|sizeless ||integer }0type " + "%1 %select{|||||||which is not trivially copyable|}0">; // Expressions. def ext_sizeof_alignof_function_type : Extension< @@ -7967,6 +7966,8 @@ def err_atomic_exclusive_builtin_pointer_size : Error< " 1,2,4 or 8 byte type (%0 invalid)">; def err_atomic_builtin_ext_int_size : Error< "Atomic memory operand must have a power-of-two size">; +def err_atomic_builtin_ext_int_prohibit : Error< + "argument to atomic builtin of type '_ExtInt' is not supported">; def err_atomic_op_needs_atomic : Error< "address argument to atomic operation must be a pointer to _Atomic " "type (%0 invalid)">; diff --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h index 4a5d469b8e547c..d1cc024957dae4 100644 --- a/clang/include/clang/Basic/TargetOptions.h +++ b/clang/include/clang/Basic/TargetOptions.h @@ -35,6 +35,9 @@ class TargetOptions { /// If given, the name of the target CPU to generate code for. std::string CPU; + /// If given, the name of the target CPU to tune code for. + std::string TuneCPU; + /// If given, the unit to use for floating point math. std::string FPMath; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 9d36dc6cc3934a..6827c877acf8a2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2715,7 +2715,7 @@ def module_file_info : Flag<["-"], "module-file-info">, Flags<[DriverOption,CC1O HelpText<"Provide information about a particular module file">; def mthumb : Flag<["-"], "mthumb">, Group; def mtune_EQ : Joined<["-"], "mtune=">, Group, - HelpText<"Accepted for compatibility with GCC. Currently has no effect.">; + HelpText<"Only supported on X86. Otherwise accepted for compatibility with GCC.">; def multi__module : Flag<["-"], "multi_module">; def multiply__defined__unused : Separate<["-"], "multiply_defined_unused">; def multiply__defined : Separate<["-"], "multiply_defined">; @@ -3490,6 +3490,8 @@ let Flags = [CC1Option, CC1AsOption, NoDriverOption] in { def target_cpu : Separate<["-"], "target-cpu">, HelpText<"Target a specific cpu type">; +def tune_cpu : Separate<["-"], "tune-cpu">, + HelpText<"Tune for a specific cpu type">; def target_feature : Separate<["-"], "target-feature">, HelpText<"Target specific attributes">; def triple : Separate<["-"], "triple">, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 1d12551a8ad212..19d58b889ef75e 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12192,7 +12192,6 @@ class Sema final { bool CheckX86BuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall); bool CheckX86BuiltinTileArgumentsRange(CallExpr *TheCall, ArrayRef ArgNums); - bool CheckX86BuiltinTileArgumentsRange(CallExpr *TheCall, int ArgNum); bool CheckX86BuiltinTileDuplicate(CallExpr *TheCall, ArrayRef ArgNums); bool CheckX86BuiltinTileRangeAndDuplicate(CallExpr *TheCall, ArrayRef ArgNums); diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index e4456ea7fa0f85..50a3b0e83a56fb 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -391,6 +391,8 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple, switch (os) { case llvm::Triple::FreeBSD: return new FreeBSDTargetInfo(Triple, Opts); + case llvm::Triple::OpenBSD: + return new OpenBSDTargetInfo(Triple, Opts); case llvm::Triple::Fuchsia: return new FuchsiaTargetInfo(Triple, Opts); case llvm::Triple::Linux: @@ -652,6 +654,16 @@ TargetInfo::CreateTargetInfo(DiagnosticsEngine &Diags, return nullptr; } + // Check the TuneCPU name if specified. + if (!Opts->TuneCPU.empty() && !Target->isValidCPUName(Opts->TuneCPU)) { + Diags.Report(diag::err_target_unknown_cpu) << Opts->TuneCPU; + SmallVector ValidList; + Target->fillValidCPUList(ValidList); + if (!ValidList.empty()) + Diags.Report(diag::note_valid_options) << llvm::join(ValidList, ", "); + return nullptr; + } + // Set the target ABI if specified. if (!Opts->ABI.empty() && !Target->setABI(Opts->ABI)) { Diags.Report(diag::err_target_unknown_abi) << Opts->ABI; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 9d225b23e3c335..98ba1efc20de6f 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2520,6 +2520,9 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // bytes). if (ArrTy->getSizeModifier() == ArrayType::Static) { QualType ETy = ArrTy->getElementType(); + llvm::Align Alignment = + CGM.getNaturalTypeAlignment(ETy).getAsAlign(); + AI->addAttrs(llvm::AttrBuilder().addAlignmentAttr(Alignment)); uint64_t ArrSize = ArrTy->getSize().getZExtValue(); if (!ETy->isIncompleteType() && ETy->isConstantSizeType() && ArrSize) { @@ -2539,10 +2542,15 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // For C99 VLAs with the static keyword, we don't know the size so // we can't use the dereferenceable attribute, but in addrspace(0) // we know that it must be nonnull. - if (ArrTy->getSizeModifier() == VariableArrayType::Static && - !getContext().getTargetAddressSpace(ArrTy->getElementType()) && - !CGM.getCodeGenOpts().NullPointerIsValid) - AI->addAttr(llvm::Attribute::NonNull); + if (ArrTy->getSizeModifier() == VariableArrayType::Static) { + QualType ETy = ArrTy->getElementType(); + llvm::Align Alignment = + CGM.getNaturalTypeAlignment(ETy).getAsAlign(); + AI->addAttrs(llvm::AttrBuilder().addAlignmentAttr(Alignment)); + if (!getContext().getTargetAddressSpace(ETy) && + !CGM.getCodeGenOpts().NullPointerIsValid) + AI->addAttr(llvm::Attribute::NonNull); + } } // Set `align` attribute if any. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index ff35d94626d1dc..23d35f68e141b3 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1749,6 +1749,7 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, // we have a decl for the function and it has a target attribute then // parse that and add it to the feature set. StringRef TargetCPU = getTarget().getTargetOpts().CPU; + StringRef TuneCPU = getTarget().getTargetOpts().TuneCPU; std::vector Features; const auto *FD = dyn_cast_or_null(GD.getDecl()); FD = FD ? FD->getMostRecentDecl() : FD; @@ -1783,6 +1784,10 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, Attrs.addAttribute("target-cpu", TargetCPU); AddedAttr = true; } + if (TuneCPU != "") { + Attrs.addAttribute("tune-cpu", TuneCPU); + AddedAttr = true; + } if (!Features.empty()) { llvm::sort(Features); Attrs.addAttribute("target-features", llvm::join(Features, ",")); diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index e6e1b211193591..8277804d27c0ec 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -44,7 +44,8 @@ CoverageMappingModuleGen::setUpCoverageCallbacks(Preprocessor &PP) { PP.setTokenWatcher([CoverageInfo](clang::Token Tok) { // Update previous token location. CoverageInfo->PrevTokLoc = Tok.getLocation(); - CoverageInfo->updateNextTokLoc(Tok.getLocation()); + if (Tok.getKind() != clang::tok::eod) + CoverageInfo->updateNextTokLoc(Tok.getLocation()); }); return CoverageInfo; } @@ -305,20 +306,24 @@ class CoverageMappingBuilder { /// non-comment token. If shrinking the skipped range would make it empty, /// this returns None. Optional adjustSkippedRange(SourceManager &SM, - SpellingRegion SR, + SourceLocation LocStart, + SourceLocation LocEnd, SourceLocation PrevTokLoc, SourceLocation NextTokLoc) { + SpellingRegion SR{SM, LocStart, LocEnd}; // If Range begin location is invalid, it's not a comment region. if (PrevTokLoc.isInvalid()) return SR; unsigned PrevTokLine = SM.getSpellingLineNumber(PrevTokLoc); unsigned NextTokLine = SM.getSpellingLineNumber(NextTokLoc); SpellingRegion newSR(SR); - if (SR.LineStart == PrevTokLine) { + if (SM.isWrittenInSameFile(LocStart, PrevTokLoc) && + SR.LineStart == PrevTokLine) { newSR.LineStart = SR.LineStart + 1; newSR.ColumnStart = 1; } - if (SR.LineEnd == NextTokLine) { + if (SM.isWrittenInSameFile(LocEnd, NextTokLoc) && + SR.LineEnd == NextTokLine) { newSR.LineEnd = SR.LineEnd - 1; newSR.ColumnEnd = SR.ColumnStart + 1; } @@ -354,14 +359,13 @@ class CoverageMappingBuilder { auto CovFileID = getCoverageFileID(LocStart); if (!CovFileID) continue; - SpellingRegion SR{SM, LocStart, LocEnd}; - if (Optional res = - adjustSkippedRange(SM, SR, I.PrevTokLoc, I.NextTokLoc)) - SR = res.getValue(); - else + Optional SR = + adjustSkippedRange(SM, LocStart, LocEnd, I.PrevTokLoc, I.NextTokLoc); + if (!SR.hasValue()) continue; auto Region = CounterMappingRegion::makeSkipped( - *CovFileID, SR.LineStart, SR.ColumnStart, SR.LineEnd, SR.ColumnEnd); + *CovFileID, SR->LineStart, SR->ColumnStart, SR->LineEnd, + SR->ColumnEnd); // Make sure that we only collect the regions that are inside // the source code of this function. if (Region.LineStart >= FileLineRanges[*CovFileID].first && diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 82cf2538338f04..5350fb505ff379 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Compression.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Host.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/TargetParser.h" @@ -2071,6 +2072,18 @@ void Clang::AddX86TargetArgs(const ArgList &Args, CmdArgs.push_back("soft"); CmdArgs.push_back("-mstack-alignment=4"); } + + // Handle -mtune. + // FIXME: We should default to "generic" unless -march is set to match gcc. + if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) { + StringRef Name = A->getValue(); + + if (Name == "native") + Name = llvm::sys::getHostCPUName(); + + CmdArgs.push_back("-tune-cpu"); + CmdArgs.push_back(Args.MakeArgString(Name)); + } } void Clang::AddHexagonTargetArgs(const ArgList &Args, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 3b69eef12b90e3..477959f04c414c 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3658,6 +3658,7 @@ static void ParseTargetArgs(TargetOptions &Opts, ArgList &Args, Opts.EABIVersion = EABIVersion; } Opts.CPU = std::string(Args.getLastArgValue(OPT_target_cpu)); + Opts.TuneCPU = std::string(Args.getLastArgValue(OPT_tune_cpu)); Opts.FPMath = std::string(Args.getLastArgValue(OPT_mfpmath)); Opts.FeaturesAsWritten = Args.getAllArgValues(OPT_target_feature); Opts.LinkerVersion = diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index ac4182613cdda3..b1e70f6c41bbc6 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -16815,6 +16815,34 @@ static vector signed char __ATTRS_o_ai vec_nabs(vector signed char __a) { } #ifdef __POWER10_VECTOR__ + +/* vec_extractm */ + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned char __a) { + return __builtin_altivec_vextractbm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned short __a) { + return __builtin_altivec_vextracthm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned int __a) { + return __builtin_altivec_vextractwm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned long long __a) { + return __builtin_altivec_vextractdm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned __int128 __a) { + return __builtin_altivec_vextractqm(__a); +} + /* vec_pdep */ static __inline__ vector unsigned long long __ATTRS_o_ai diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 4efd62f58d2e6d..deceffdb0ba50c 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3705,7 +3705,7 @@ bool Sema::CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, enum { TileRegLow = 0, TileRegHigh = 7 }; bool Sema::CheckX86BuiltinTileArgumentsRange(CallExpr *TheCall, - ArrayRef ArgNums) { + ArrayRef ArgNums) { for (int ArgNum : ArgNums) { if (SemaBuiltinConstantArgRange(TheCall, ArgNum, TileRegLow, TileRegHigh)) return true; @@ -3713,19 +3713,20 @@ bool Sema::CheckX86BuiltinTileArgumentsRange(CallExpr *TheCall, return false; } -bool Sema::CheckX86BuiltinTileArgumentsRange(CallExpr *TheCall, int ArgNum) { - return SemaBuiltinConstantArgRange(TheCall, ArgNum, TileRegLow, TileRegHigh); -} - bool Sema::CheckX86BuiltinTileDuplicate(CallExpr *TheCall, ArrayRef ArgNums) { // Because the max number of tile register is TileRegHigh + 1, so here we use // each bit to represent the usage of them in bitset. std::bitset ArgValues; for (int ArgNum : ArgNums) { - llvm::APSInt Arg; - SemaBuiltinConstantArg(TheCall, ArgNum, Arg); - int ArgExtValue = Arg.getExtValue(); + Expr *Arg = TheCall->getArg(ArgNum); + if (Arg->isTypeDependent() || Arg->isValueDependent()) + continue; + + llvm::APSInt Result; + if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) + return true; + int ArgExtValue = Result.getExtValue(); assert((ArgExtValue >= TileRegLow || ArgExtValue <= TileRegHigh) && "Incorrect tile register num."); if (ArgValues.test(ArgExtValue)) @@ -5050,6 +5051,11 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, ? 0 : 1); + if (ValType->isExtIntType()) { + Diag(Ptr->getExprLoc(), diag::err_atomic_builtin_ext_int_prohibit); + return ExprError(); + } + return AE; } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index a493f3114dc299..53917ef98acdff 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -70,12 +70,15 @@ class DSAStackTy { const Expr *RefExpr = nullptr; DeclRefExpr *PrivateCopy = nullptr; SourceLocation ImplicitDSALoc; + bool AppliedToPointee = false; DSAVarData() = default; DSAVarData(OpenMPDirectiveKind DKind, OpenMPClauseKind CKind, const Expr *RefExpr, DeclRefExpr *PrivateCopy, - SourceLocation ImplicitDSALoc, unsigned Modifier) + SourceLocation ImplicitDSALoc, unsigned Modifier, + bool AppliedToPointee) : DKind(DKind), CKind(CKind), Modifier(Modifier), RefExpr(RefExpr), - PrivateCopy(PrivateCopy), ImplicitDSALoc(ImplicitDSALoc) {} + PrivateCopy(PrivateCopy), ImplicitDSALoc(ImplicitDSALoc), + AppliedToPointee(AppliedToPointee) {} }; using OperatorOffsetTy = llvm::SmallVector, 4>; @@ -99,6 +102,9 @@ class DSAStackTy { /// variable is marked as lastprivate(true) or not (false). llvm::PointerIntPair RefExpr; DeclRefExpr *PrivateCopy = nullptr; + /// true if the attribute is applied to the pointee, not the variable + /// itself. + bool AppliedToPointee = false; }; using DeclSAMapTy = llvm::SmallDenseMap; using UsedRefMapTy = llvm::SmallDenseMap; @@ -511,7 +517,8 @@ class DSAStackTy { /// Adds explicit data sharing attribute to the specified declaration. void addDSA(const ValueDecl *D, const Expr *E, OpenMPClauseKind A, - DeclRefExpr *PrivateCopy = nullptr, unsigned Modifier = 0); + DeclRefExpr *PrivateCopy = nullptr, unsigned Modifier = 0, + bool AppliedToPointee = false); /// Adds additional information for the reduction items with the reduction id /// represented as an operator. @@ -563,7 +570,8 @@ class DSAStackTy { /// match specified \a CPred predicate in any directive which matches \a DPred /// predicate. const DSAVarData - hasDSA(ValueDecl *D, const llvm::function_ref CPred, + hasDSA(ValueDecl *D, + const llvm::function_ref CPred, const llvm::function_ref DPred, bool FromParent) const; /// Checks if the specified variables has data-sharing attributes which @@ -571,15 +579,16 @@ class DSAStackTy { /// matches \a DPred predicate. const DSAVarData hasInnermostDSA(ValueDecl *D, - const llvm::function_ref CPred, + const llvm::function_ref CPred, const llvm::function_ref DPred, bool FromParent) const; /// Checks if the specified variables has explicit data-sharing /// attributes which match specified \a CPred predicate at the specified /// OpenMP region. - bool hasExplicitDSA(const ValueDecl *D, - const llvm::function_ref CPred, - unsigned Level, bool NotLastprivate = false) const; + bool + hasExplicitDSA(const ValueDecl *D, + const llvm::function_ref CPred, + unsigned Level, bool NotLastprivate = false) const; /// Returns true if the directive at level \Level matches in the /// specified \a DPred predicate. @@ -1185,6 +1194,7 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(const_iterator &Iter, DVar.CKind = Data.Attributes; DVar.ImplicitDSALoc = Iter->DefaultAttrLoc; DVar.Modifier = Data.Modifier; + DVar.AppliedToPointee = Data.AppliedToPointee; return DVar; } @@ -1341,7 +1351,8 @@ const ValueDecl *DSAStackTy::getParentLoopControlVariable(unsigned I) const { } void DSAStackTy::addDSA(const ValueDecl *D, const Expr *E, OpenMPClauseKind A, - DeclRefExpr *PrivateCopy, unsigned Modifier) { + DeclRefExpr *PrivateCopy, unsigned Modifier, + bool AppliedToPointee) { D = getCanonicalDecl(D); if (A == OMPC_threadprivate) { DSAInfo &Data = Threadprivates[D]; @@ -1365,12 +1376,14 @@ void DSAStackTy::addDSA(const ValueDecl *D, const Expr *E, OpenMPClauseKind A, Data.Attributes = A; Data.RefExpr.setPointerAndInt(E, IsLastprivate); Data.PrivateCopy = PrivateCopy; + Data.AppliedToPointee = AppliedToPointee; if (PrivateCopy) { DSAInfo &Data = getTopOfStack().SharingMap[PrivateCopy->getDecl()]; Data.Modifier = Modifier; Data.Attributes = A; Data.RefExpr.setPointerAndInt(PrivateCopy, IsLastprivate); Data.PrivateCopy = nullptr; + Data.AppliedToPointee = AppliedToPointee; } } } @@ -1480,7 +1493,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData( "set."); TaskgroupDescriptor = I->TaskgroupReductionRef; return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(), - Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task); + Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task, + /*AppliedToPointee=*/false); } return DSAVarData(); } @@ -1506,7 +1520,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData( "set."); TaskgroupDescriptor = I->TaskgroupReductionRef; return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(), - Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task); + Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task, + /*AppliedToPointee=*/false); } return DSAVarData(); } @@ -1675,6 +1690,7 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, DVar.ImplicitDSALoc = I->DefaultAttrLoc; DVar.DKind = I->Directive; DVar.Modifier = Data.Modifier; + DVar.AppliedToPointee = Data.AppliedToPointee; return DVar; } } @@ -1696,7 +1712,7 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, // listed in a firstprivate clause, even if they are static data members. DSAVarData DVarTemp = hasInnermostDSA( D, - [](OpenMPClauseKind C) { + [](OpenMPClauseKind C, bool) { return C == OMPC_firstprivate || C == OMPC_shared; }, MatchesAlways, FromParent); @@ -1725,6 +1741,7 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, DVar.ImplicitDSALoc = I->DefaultAttrLoc; DVar.DKind = I->Directive; DVar.Modifier = Data.Modifier; + DVar.AppliedToPointee = Data.AppliedToPointee; } return DVar; @@ -1755,7 +1772,7 @@ const DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D, const DSAStackTy::DSAVarData DSAStackTy::hasDSA(ValueDecl *D, - const llvm::function_ref CPred, + const llvm::function_ref CPred, const llvm::function_ref DPred, bool FromParent) const { if (isStackEmpty()) @@ -1771,14 +1788,14 @@ DSAStackTy::hasDSA(ValueDecl *D, continue; const_iterator NewI = I; DSAVarData DVar = getDSA(NewI, D); - if (I == NewI && CPred(DVar.CKind)) + if (I == NewI && CPred(DVar.CKind, DVar.AppliedToPointee)) return DVar; } return {}; } const DSAStackTy::DSAVarData DSAStackTy::hasInnermostDSA( - ValueDecl *D, const llvm::function_ref CPred, + ValueDecl *D, const llvm::function_ref CPred, const llvm::function_ref DPred, bool FromParent) const { if (isStackEmpty()) @@ -1792,26 +1809,28 @@ const DSAStackTy::DSAVarData DSAStackTy::hasInnermostDSA( return {}; const_iterator NewI = StartI; DSAVarData DVar = getDSA(NewI, D); - return (NewI == StartI && CPred(DVar.CKind)) ? DVar : DSAVarData(); + return (NewI == StartI && CPred(DVar.CKind, DVar.AppliedToPointee)) + ? DVar + : DSAVarData(); } bool DSAStackTy::hasExplicitDSA( - const ValueDecl *D, const llvm::function_ref CPred, + const ValueDecl *D, + const llvm::function_ref CPred, unsigned Level, bool NotLastprivate) const { if (getStackSize() <= Level) return false; D = getCanonicalDecl(D); const SharingMapTy &StackElem = getStackElemAtLevel(Level); auto I = StackElem.SharingMap.find(D); - if (I != StackElem.SharingMap.end() && - I->getSecond().RefExpr.getPointer() && - CPred(I->getSecond().Attributes) && + if (I != StackElem.SharingMap.end() && I->getSecond().RefExpr.getPointer() && + CPred(I->getSecond().Attributes, I->getSecond().AppliedToPointee) && (!NotLastprivate || !I->getSecond().RefExpr.getInt())) return true; // Check predetermined rules for the loop control variables. auto LI = StackElem.LCVMap.find(D); if (LI != StackElem.LCVMap.end()) - return CPred(OMPC_private); + return CPred(OMPC_private, /*AppliedToPointee=*/false); return false; } @@ -2057,14 +2076,17 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, // By default, all the data that has a scalar type is mapped by copy // (except for reduction variables). // Defaultmap scalar is mutual exclusive to defaultmap pointer - IsByRef = - (DSAStack->isForceCaptureByReferenceInTargetExecutable() && - !Ty->isAnyPointerType()) || - !Ty->isScalarType() || - DSAStack->isDefaultmapCapturedByRef( - Level, getVariableCategoryFromDecl(LangOpts, D)) || - DSAStack->hasExplicitDSA( - D, [](OpenMPClauseKind K) { return K == OMPC_reduction; }, Level); + IsByRef = (DSAStack->isForceCaptureByReferenceInTargetExecutable() && + !Ty->isAnyPointerType()) || + !Ty->isScalarType() || + DSAStack->isDefaultmapCapturedByRef( + Level, getVariableCategoryFromDecl(LangOpts, D)) || + DSAStack->hasExplicitDSA( + D, + [](OpenMPClauseKind K, bool AppliedToPointee) { + return K == OMPC_reduction && !AppliedToPointee; + }, + Level); } } @@ -2075,8 +2097,9 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, OMPD_target) || !(DSAStack->hasExplicitDSA( D, - [](OpenMPClauseKind K) -> bool { - return K == OMPC_firstprivate; + [](OpenMPClauseKind K, bool AppliedToPointee) -> bool { + return K == OMPC_firstprivate || + (K == OMPC_reduction && AppliedToPointee); }, Level, /*NotLastprivate=*/true) || DSAStack->isUsesAllocatorsDecl(Level, D))) && @@ -2088,7 +2111,8 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, // copy !(DSAStack->getDefaultDSA() == DSA_firstprivate && !DSAStack->hasExplicitDSA( - D, [](OpenMPClauseKind K) { return K != OMPC_unknown; }, Level) && + D, [](OpenMPClauseKind K, bool) { return K != OMPC_unknown; }, + Level) && !DSAStack->isLoopControlVariable(D, Level).first); } @@ -2151,7 +2175,8 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) checkDeclIsAllowedInOpenMPTarget(nullptr, VD); return nullptr; - } else if (isInOpenMPTargetExecutionDirective()) { + } + if (isInOpenMPTargetExecutionDirective()) { // If the declaration is enclosed in a 'declare target' directive, // then it should not be captured. // @@ -2204,7 +2229,8 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, return VD ? VD : Info.second; DSAStackTy::DSAVarData DVarTop = DSAStack->getTopDSA(D, DSAStack->isClauseParsingMode()); - if (DVarTop.CKind != OMPC_unknown && isOpenMPPrivate(DVarTop.CKind)) + if (DVarTop.CKind != OMPC_unknown && isOpenMPPrivate(DVarTop.CKind) && + (!VD || VD->hasLocalStorage() || !DVarTop.AppliedToPointee)) return VD ? VD : cast(DVarTop.PrivateCopy->getDecl()); // Threadprivate variables must not be captured. if (isOpenMPThreadPrivate(DVarTop.CKind)) @@ -2212,7 +2238,11 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, // The variable is not private or it is the variable in the directive with // default(none) clause and not used in any clause. DSAStackTy::DSAVarData DVarPrivate = DSAStack->hasDSA( - D, isOpenMPPrivate, [](OpenMPDirectiveKind) { return true; }, + D, + [](OpenMPClauseKind C, bool AppliedToPointee) { + return isOpenMPPrivate(C) && !AppliedToPointee; + }, + [](OpenMPDirectiveKind) { return true; }, DSAStack->isClauseParsingMode()); // Global shared must not be captured. if (VD && !VD->hasLocalStorage() && DVarPrivate.CKind == OMPC_unknown && @@ -2266,7 +2296,8 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, (IsTriviallyCopyable || !isOpenMPTaskLoopDirective(CaptureRegions[CapLevel]))) { if (DSAStack->hasExplicitDSA( - D, [](OpenMPClauseKind K) { return K == OMPC_firstprivate; }, + D, + [](OpenMPClauseKind K, bool) { return K == OMPC_firstprivate; }, Level, /*NotLastprivate=*/true)) return OMPC_firstprivate; DSAStackTy::DSAVarData DVar = DSAStack->getImplicitDSA(D, Level); @@ -2287,7 +2318,8 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, if ((DSAStack->getPossiblyLoopCunter() == D->getCanonicalDecl() || DSAStack->isLoopControlVariable(D).first) && !DSAStack->hasExplicitDSA( - D, [](OpenMPClauseKind K) { return K != OMPC_private; }, Level) && + D, [](OpenMPClauseKind K, bool) { return K != OMPC_private; }, + Level) && !isOpenMPSimdDirective(DSAStack->getCurrentDirective())) return OMPC_private; } @@ -2295,7 +2327,8 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, if (DSAStack->isThreadPrivate(const_cast(VD)) && DSAStack->isForceVarCapturing() && !DSAStack->hasExplicitDSA( - D, [](OpenMPClauseKind K) { return K == OMPC_copyin; }, Level)) + D, [](OpenMPClauseKind K, bool) { return K == OMPC_copyin; }, + Level)) return OMPC_private; } // User-defined allocators are private since they must be defined in the @@ -2306,7 +2339,8 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, DSAStackTy::UsesAllocatorsDeclKind::UserDefinedAllocator) return OMPC_private; return (DSAStack->hasExplicitDSA( - D, [](OpenMPClauseKind K) { return K == OMPC_private; }, Level) || + D, [](OpenMPClauseKind K, bool) { return K == OMPC_private; }, + Level) || (DSAStack->isClauseParsingMode() && DSAStack->getClauseParsingMode() == OMPC_private) || // Consider taskgroup reduction descriptor variable a private @@ -2331,15 +2365,16 @@ void Sema::setOpenMPCaptureKind(FieldDecl *FD, const ValueDecl *D, OpenMPClauseKind OMPC = OMPC_unknown; for (unsigned I = DSAStack->getNestingLevel() + 1; I > Level; --I) { const unsigned NewLevel = I - 1; - if (DSAStack->hasExplicitDSA(D, - [&OMPC](const OpenMPClauseKind K) { - if (isOpenMPPrivate(K)) { - OMPC = K; - return true; - } - return false; - }, - NewLevel)) + if (DSAStack->hasExplicitDSA( + D, + [&OMPC](const OpenMPClauseKind K, bool AppliedToPointee) { + if (isOpenMPPrivate(K) && !AppliedToPointee) { + OMPC = K; + return true; + } + return false; + }, + NewLevel)) break; if (DSAStack->checkMappableExprComponentListsForDeclAtLevel( D, NewLevel, @@ -3474,7 +3509,10 @@ class DSAAttrChecker final : public StmtVisitor { // enclosing worksharing or parallel construct may not be accessed in an // explicit task. DVar = Stack->hasInnermostDSA( - VD, [](OpenMPClauseKind C) { return C == OMPC_reduction; }, + VD, + [](OpenMPClauseKind C, bool AppliedToPointee) { + return C == OMPC_reduction && !AppliedToPointee; + }, [](OpenMPDirectiveKind K) { return isOpenMPParallelDirective(K) || isOpenMPWorksharingDirective(K) || isOpenMPTeamsDirective(K); @@ -3559,7 +3597,10 @@ class DSAAttrChecker final : public StmtVisitor { // enclosing worksharing or parallel construct may not be accessed in // an explicit task. DVar = Stack->hasInnermostDSA( - FD, [](OpenMPClauseKind C) { return C == OMPC_reduction; }, + FD, + [](OpenMPClauseKind C, bool AppliedToPointee) { + return C == OMPC_reduction && !AppliedToPointee; + }, [](OpenMPDirectiveKind K) { return isOpenMPParallelDirective(K) || isOpenMPWorksharingDirective(K) || isOpenMPTeamsDirective(K); @@ -14044,7 +14085,10 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, // from the worksharing construct. if (isOpenMPTaskingDirective(CurrDir)) { DVar = DSAStack->hasInnermostDSA( - D, [](OpenMPClauseKind C) { return C == OMPC_reduction; }, + D, + [](OpenMPClauseKind C, bool AppliedToPointee) { + return C == OMPC_reduction && !AppliedToPointee; + }, [](OpenMPDirectiveKind K) { return isOpenMPParallelDirective(K) || isOpenMPWorksharingDirective(K) || @@ -14435,7 +14479,11 @@ class DSARefChecker : public StmtVisitor { if (DVar.CKind != OMPC_unknown) return true; DSAStackTy::DSAVarData DVarPrivate = Stack->hasDSA( - VD, isOpenMPPrivate, [](OpenMPDirectiveKind) { return true; }, + VD, + [](OpenMPClauseKind C, bool AppliedToPointee) { + return isOpenMPPrivate(C) && !AppliedToPointee; + }, + [](OpenMPDirectiveKind) { return true; }, /*FromParent=*/true); return DVarPrivate.CKind != OMPC_unknown; } @@ -15513,7 +15561,8 @@ static bool actOnOMPReductionKindClause( // correct analysis of in_reduction clauses. if (CurrDir == OMPD_taskgroup && ClauseKind == OMPC_task_reduction) Modifier = OMPC_REDUCTION_task; - Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref, Modifier); + Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref, Modifier, + ASE || OASE); if (Modifier == OMPC_REDUCTION_task && (CurrDir == OMPD_taskgroup || ((isOpenMPParallelDirective(CurrDir) || diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index b2be31ac09904e..4ab5cc5fd8b989 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -8963,11 +8963,8 @@ QualType Sema::BuildAtomicType(QualType T, SourceLocation Loc) { else if (!T.isTriviallyCopyableType(Context)) // Some other non-trivially-copyable type (probably a C++ class) DisallowedKind = 7; - else if (auto *ExtTy = T->getAs()) { - if (ExtTy->getNumBits() < 8) + else if (T->isExtIntType()) { DisallowedKind = 8; - else if (!llvm::isPowerOf2_32(ExtTy->getNumBits())) - DisallowedKind = 9; } if (DisallowedKind != -1) { diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp index 38a680eb04c008..802bc934cfb067 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -602,11 +602,11 @@ void ExprEngine::handleConstructor(const Expr *E, *Call, *this); ExplodedNodeSet DstEvaluated; - StmtNodeBuilder Bldr(DstPreCall, DstEvaluated, *currBldrCtx); if (CE && CE->getConstructor()->isTrivial() && CE->getConstructor()->isCopyOrMoveConstructor() && !CallOpts.IsArrayCtorOrDtor) { + StmtNodeBuilder Bldr(DstPreCall, DstEvaluated, *currBldrCtx); // FIXME: Handle other kinds of trivial constructors as well. for (ExplodedNodeSet::iterator I = DstPreCall.begin(), E = DstPreCall.end(); I != E; ++I) @@ -626,6 +626,8 @@ void ExprEngine::handleConstructor(const Expr *E, // in the CFG, would be called at the end of the full expression or // later (for life-time extended temporaries) -- but avoids infeasible // paths when no-return temporary destructors are used for assertions. + ExplodedNodeSet DstEvaluatedPostProcessed; + StmtNodeBuilder Bldr(DstEvaluated, DstEvaluatedPostProcessed, *currBldrCtx); const AnalysisDeclContext *ADC = LCtx->getAnalysisDeclContext(); if (!ADC->getCFGBuildOptions().AddTemporaryDtors) { if (llvm::isa_and_nonnull(TargetRegion) && @@ -655,7 +657,7 @@ void ExprEngine::handleConstructor(const Expr *E, } ExplodedNodeSet DstPostArgumentCleanup; - for (ExplodedNode *I : DstEvaluated) + for (ExplodedNode *I : DstEvaluatedPostProcessed) finishArgumentConstruction(DstPostArgumentCleanup, I, *Call); // If there were other constructors called for object-type arguments diff --git a/clang/test/Analysis/smart-ptr-text-output.cpp b/clang/test/Analysis/smart-ptr-text-output.cpp index 5280d0021884d8..1132a37fa66795 100644 --- a/clang/test/Analysis/smart-ptr-text-output.cpp +++ b/clang/test/Analysis/smart-ptr-text-output.cpp @@ -36,14 +36,15 @@ void derefAfterCtrWithNullVariable() { } void derefAfterRelease() { - std::unique_ptr P(new A()); + std::unique_ptr P(new A()); // expected-note {{Smart pointer 'P' is constructed}} + // FIXME: should mark region as uninteresting after release, so above note will not be there P.release(); // expected-note {{Smart pointer 'P' is released and set to null}} P->foo(); // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} // expected-note@-1{{Dereference of null smart pointer 'P'}} } void derefAfterReset() { - std::unique_ptr P(new A()); + std::unique_ptr P(new A()); // expected-note {{Smart pointer 'P' is constructed}} P.reset(); // expected-note {{Smart pointer 'P' reset using a null value}} P->foo(); // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} // expected-note@-1{{Dereference of null smart pointer 'P'}} @@ -51,7 +52,7 @@ void derefAfterReset() { void derefAfterResetWithNull() { A *NullInnerPtr = nullptr; // expected-note {{'NullInnerPtr' initialized to a null pointer value}} - std::unique_ptr P(new A()); + std::unique_ptr P(new A()); // expected-note {{Smart pointer 'P' is constructed}} P.reset(NullInnerPtr); // expected-note {{Smart pointer 'P' reset using a null value}} P->foo(); // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} // expected-note@-1{{Dereference of null smart pointer 'P'}} @@ -67,7 +68,7 @@ void derefOnReleasedNullRawPtr() { } void derefOnSwappedNullPtr() { - std::unique_ptr P(new A()); + std::unique_ptr P(new A()); // expected-note {{Smart pointer 'P' is constructed}} std::unique_ptr PNull; // expected-note {{Default constructed smart pointer 'PNull' is null}} P.swap(PNull); // expected-note {{Swapped null smart pointer 'PNull' with smart pointer 'P'}} PNull->foo(); // No warning. @@ -77,13 +78,11 @@ void derefOnSwappedNullPtr() { // FIXME: Fix this test when "std::swap" is modeled seperately. void derefOnStdSwappedNullPtr() { - std::unique_ptr P; + std::unique_ptr P; // expected-note {{Default constructed smart pointer 'P' is null}} std::unique_ptr PNull; // expected-note {{Default constructed smart pointer 'PNull' is null}} std::swap(P, PNull); // expected-note@Inputs/system-header-simulator-cxx.h:978 {{Swapped null smart pointer 'PNull' with smart pointer 'P'}} // expected-note@-1 {{Calling 'swap'}} // expected-note@-2 {{Returning from 'swap'}} - PNull->foo(); // expected-warning {{Dereference of null smart pointer 'PNull' [alpha.cplusplus.SmartPtr]}} - // expected-note@-1{{Dereference of null smart pointer 'PNull'}} P->foo(); // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} // expected-note@-1{{Dereference of null smart pointer 'P'}} } diff --git a/clang/test/Analysis/smart-ptr.cpp b/clang/test/Analysis/smart-ptr.cpp index f72a918aee203e..bcf1e569d690ab 100644 --- a/clang/test/Analysis/smart-ptr.cpp +++ b/clang/test/Analysis/smart-ptr.cpp @@ -41,6 +41,7 @@ A *return_null() { void derefAfterValidCtr() { std::unique_ptr P(new A()); + clang_analyzer_numTimesReached(); // expected-warning {{1}} P->foo(); // No warning. } @@ -50,17 +51,20 @@ void derefOfUnknown(std::unique_ptr P) { void derefAfterDefaultCtr() { std::unique_ptr P; + clang_analyzer_numTimesReached(); // expected-warning {{1}} P->foo(); // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} } void derefAfterCtrWithNull() { std::unique_ptr P(nullptr); + clang_analyzer_numTimesReached(); // expected-warning {{1}} *P; // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} } void derefAfterCtrWithNullVariable() { A *InnerPtr = nullptr; std::unique_ptr P(InnerPtr); + clang_analyzer_numTimesReached(); // expected-warning {{1}} P->foo(); // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} } @@ -87,6 +91,7 @@ void derefAfterResetWithNull() { void derefAfterResetWithNonNull() { std::unique_ptr P; P.reset(new A()); + clang_analyzer_numTimesReached(); // expected-warning {{1}} P->foo(); // No warning. } @@ -116,37 +121,40 @@ void pass_smart_ptr_by_const_rvalue_ref(const std::unique_ptr &&a); void pass_smart_ptr_by_ptr(std::unique_ptr *a); void pass_smart_ptr_by_const_ptr(const std::unique_ptr *a); -void regioninvalidationTest() { - { - std::unique_ptr P; - pass_smart_ptr_by_ref(P); - P->foo(); // no-warning - } - { - std::unique_ptr P; - pass_smart_ptr_by_const_ref(P); - P->foo(); // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} - } - { - std::unique_ptr P; - pass_smart_ptr_by_rvalue_ref(std::move(P)); - P->foo(); // no-warning - } - { - std::unique_ptr P; - pass_smart_ptr_by_const_rvalue_ref(std::move(P)); - P->foo(); // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} - } - { - std::unique_ptr P; - pass_smart_ptr_by_ptr(&P); - P->foo(); - } - { - std::unique_ptr P; - pass_smart_ptr_by_const_ptr(&P); - P->foo(); // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} - } +void regioninvalidationWithPassByRef() { + std::unique_ptr P; + pass_smart_ptr_by_ref(P); + P->foo(); // no-warning +} + +void regioninvalidationWithPassByCostRef() { + std::unique_ptr P; + pass_smart_ptr_by_const_ref(P); + P->foo(); // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} +} + +void regioninvalidationWithPassByRValueRef() { + std::unique_ptr P; + pass_smart_ptr_by_rvalue_ref(std::move(P)); + P->foo(); // no-warning +} + +void regioninvalidationWithPassByConstRValueRef() { + std::unique_ptr P; + pass_smart_ptr_by_const_rvalue_ref(std::move(P)); + P->foo(); // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} +} + +void regioninvalidationWithPassByPtr() { + std::unique_ptr P; + pass_smart_ptr_by_ptr(&P); + P->foo(); +} + +void regioninvalidationWithPassByConstPtr() { + std::unique_ptr P; + pass_smart_ptr_by_const_ptr(&P); + P->foo(); // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} } struct StructWithSmartPtr { @@ -160,37 +168,40 @@ void pass_struct_with_smart_ptr_by_const_rvalue_ref(const StructWithSmartPtr &&a void pass_struct_with_smart_ptr_by_ptr(StructWithSmartPtr *a); void pass_struct_with_smart_ptr_by_const_ptr(const StructWithSmartPtr *a); -void regioninvalidationTestWithinStruct() { - { - StructWithSmartPtr S; - pass_struct_with_smart_ptr_by_ref(S); - S.P->foo(); // no-warning - } - { - StructWithSmartPtr S; - pass_struct_with_smart_ptr_by_const_ref(S); - S.P->foo(); // expected-warning {{Dereference of null smart pointer 'S.P' [alpha.cplusplus.SmartPtr]}} - } - { - StructWithSmartPtr S; - pass_struct_with_smart_ptr_by_rvalue_ref(std::move(S)); - S.P->foo(); // no-warning - } - { - StructWithSmartPtr S; - pass_struct_with_smart_ptr_by_const_rvalue_ref(std::move(S)); - S.P->foo(); // expected-warning {{Dereference of null smart pointer 'S.P' [alpha.cplusplus.SmartPtr]}} - } - { - StructWithSmartPtr S; - pass_struct_with_smart_ptr_by_ptr(&S); - S.P->foo(); - } - { - StructWithSmartPtr S; - pass_struct_with_smart_ptr_by_const_ptr(&S); - S.P->foo(); // expected-warning {{Dereference of null smart pointer 'S.P' [alpha.cplusplus.SmartPtr]}} - } +void regioninvalidationWithinStructPassByRef() { + StructWithSmartPtr S; + pass_struct_with_smart_ptr_by_ref(S); + S.P->foo(); // no-warning +} + +void regioninvalidationWithinStructPassByConstRef() { + StructWithSmartPtr S; + pass_struct_with_smart_ptr_by_const_ref(S); + S.P->foo(); // expected-warning {{Dereference of null smart pointer 'S.P' [alpha.cplusplus.SmartPtr]}} +} + +void regioninvalidationWithinStructPassByRValueRef() { + StructWithSmartPtr S; + pass_struct_with_smart_ptr_by_rvalue_ref(std::move(S)); + S.P->foo(); // no-warning +} + +void regioninvalidationWithinStructPassByConstRValueRef() { + StructWithSmartPtr S; + pass_struct_with_smart_ptr_by_const_rvalue_ref(std::move(S)); + S.P->foo(); // expected-warning {{Dereference of null smart pointer 'S.P' [alpha.cplusplus.SmartPtr]}} +} + +void regioninvalidationWithinStructPassByPtr() { + StructWithSmartPtr S; + pass_struct_with_smart_ptr_by_ptr(&S); + S.P->foo(); // no-warning +} + +void regioninvalidationWithinStructPassByConstPtr() { + StructWithSmartPtr S; + pass_struct_with_smart_ptr_by_const_ptr(&S); + S.P->foo(); // expected-warning {{Dereference of null smart pointer 'S.P' [alpha.cplusplus.SmartPtr]}} } void derefAfterAssignment() { @@ -217,14 +228,20 @@ void derefOnSwappedNullPtr() { (*P).foo(); // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} } -void derefOnStdSwappedNullPtr() { +void derefOnFirstStdSwappedNullPtr() { std::unique_ptr P; std::unique_ptr PNull; std::swap(P, PNull); - PNull->foo(); // expected-warning {{Dereference of null smart pointer 'PNull' [alpha.cplusplus.SmartPtr]}} P->foo(); // expected-warning {{Dereference of null smart pointer 'P' [alpha.cplusplus.SmartPtr]}} } +void derefOnSecondStdSwappedNullPtr() { + std::unique_ptr P; + std::unique_ptr PNull; + std::swap(P, PNull); + PNull->foo(); // expected-warning {{Dereference of null smart pointer 'PNull' [alpha.cplusplus.SmartPtr]}} +} + void derefOnSwappedValidPtr() { std::unique_ptr P(new A()); std::unique_ptr PValid(new A()); diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c index a575f5a924c5e6..fe3e678a579485 100644 --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -97,6 +97,36 @@ vector unsigned long long test_vpextd(void) { return vec_pext(vulla, vullb); } +unsigned int test_vec_extractm_uc(void) { + // CHECK: @llvm.ppc.altivec.vextractbm(<16 x i8> %{{.+}}) + // CHECK-NEXT: ret i32 + return vec_extractm(vuca); +} + +unsigned int test_vec_extractm_us(void) { + // CHECK: @llvm.ppc.altivec.vextracthm(<8 x i16> %{{.+}}) + // CHECK-NEXT: ret i32 + return vec_extractm(vusa); +} + +unsigned int test_vec_extractm_ui(void) { + // CHECK: @llvm.ppc.altivec.vextractwm(<4 x i32> %{{.+}}) + // CHECK-NEXT: ret i32 + return vec_extractm(vuia); +} + +unsigned int test_vec_extractm_ull(void) { + // CHECK: @llvm.ppc.altivec.vextractdm(<2 x i64> %{{.+}}) + // CHECK-NEXT: ret i32 + return vec_extractm(vulla); +} + +unsigned int test_vec_extractm_u128(void) { + // CHECK: @llvm.ppc.altivec.vextractqm(<1 x i128> %{{.+}}) + // CHECK-NEXT: ret i32 + return vec_extractm(vui128a); +} + vector unsigned long long test_vcfuged(void) { // CHECK: @llvm.ppc.altivec.vcfuged(<2 x i64> // CHECK-NEXT: ret <2 x i64> diff --git a/clang/test/CodeGen/vla.c b/clang/test/CodeGen/vla.c index 16b82f4acc7d38..3142050149aaab 100644 --- a/clang/test/CodeGen/vla.c +++ b/clang/test/CodeGen/vla.c @@ -200,13 +200,13 @@ void test7(int a[b(0)]) { // Make sure we emit dereferenceable or nonnull when the static keyword is // provided. void test8(int a[static 3]) { } -// CHECK: define void @test8(i32* dereferenceable(12) %a) +// CHECK: define void @test8(i32* align 4 dereferenceable(12) %a) void test9(int n, int a[static n]) { } -// NULL-INVALID: define void @test9(i32 %n, i32* nonnull %a) -// NULL-VALID: define void @test9(i32 %n, i32* %a) +// NULL-INVALID: define void @test9(i32 %n, i32* nonnull align 4 %a) +// NULL-VALID: define void @test9(i32 %n, i32* align 4 %a) // Make sure a zero-sized static array extent is still required to be nonnull. void test10(int a[static 0]) {} -// NULL-INVALID: define void @test10(i32* nonnull %a) -// NULL-VALID: define void @test10(i32* %a) +// NULL-INVALID: define void @test10(i32* nonnull align 4 %a) +// NULL-VALID: define void @test10(i32* align 4 %a) diff --git a/clang/test/CoverageMapping/Inputs/comment.h b/clang/test/CoverageMapping/Inputs/comment.h new file mode 100644 index 00000000000000..eec5833c2bd0bc --- /dev/null +++ b/clang/test/CoverageMapping/Inputs/comment.h @@ -0,0 +1,6 @@ + + + + + +x = 0; diff --git a/clang/test/CoverageMapping/comment.cpp b/clang/test/CoverageMapping/comment.cpp new file mode 100644 index 00000000000000..f8e4b4912e182f --- /dev/null +++ b/clang/test/CoverageMapping/comment.cpp @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s | FileCheck %s + +int f() { + int x = 0; +#include "Inputs/comment.h" /* + */ + return x; +} + +// CHECK: File 0, 3:9 -> 8:2 = #0 +// CHECK-NEXT: Expansion,File 0, 5:10 -> 5:28 = #0 +// CHECK-NEXT: Skipped,File 0, 6:1 -> 6:7 = 0 +// CHECK-NEXT: File 1, 1:1 -> 7:1 = #0 diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index bf6eaefe0b3caf..2cad62e2e245f0 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -8,6 +8,11 @@ // AARCH64: note: valid target CPU values are: // AARCH64-SAME: cortex-a35, +// RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64 +// TUNE_AARCH64: error: unknown target CPU 'not-a-cpu' +// TUNE_AARCH64: note: valid target CPU values are: +// TUNE_AARCH64-SAME: cortex-a35, + // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86 // X86: error: unknown target CPU 'not-a-cpu' // X86: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, @@ -32,6 +37,30 @@ // X86_64-SAME: athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, // X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, x86-64 +// RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86 +// TUNE_X86: error: unknown target CPU 'not-a-cpu' +// TUNE_X86: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, +// TUNE_X86-SAME: i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, +// TUNE_X86-SAME: pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, +// TUNE_X86-SAME: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, +// TUNE_X86-SAME: nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, +// TUNE_X86-SAME: core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, +// TUNE_X86-SAME: skx, cascadelake, cooperlake, cannonlake, icelake-client, icelake-server, tigerlake, knl, knm, lakemont, k6, k6-2, k6-3, +// TUNE_X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, +// TUNE_X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, +// TUNE_X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, +// TUNE_X86-SAME: x86-64, geode + +// RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64 +// TUNE_X86_64: error: unknown target CPU 'not-a-cpu' +// TUNE_X86_64: note: valid target CPU values are: nocona, core2, penryn, bonnell, +// TUNE_X86_64-SAME: atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, +// TUNE_X86_64-SAME: sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, +// TUNE_X86_64-SAME: core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, +// TUNE_X86_64-SAME: icelake-client, icelake-server, tigerlake, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, +// TUNE_X86_64-SAME: athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, +// TUNE_X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, x86-64 + // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX // NVPTX: error: unknown target CPU 'not-a-cpu' // NVPTX: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp index 971e9be8534ba6..995ded43db3d84 100644 --- a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp @@ -20,9 +20,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: alloca i32, // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], @@ -124,7 +124,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp index ea8fc55d9cb2f9..0018e109aaed9a 100644 --- a/clang/test/OpenMP/for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp @@ -124,7 +124,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp index b4f4f83ec95549..fcee3d645b4ae1 100644 --- a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp @@ -19,9 +19,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: alloca i32, // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], @@ -123,7 +123,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp index 0f8366fa95e338..ab76987a59c931 100644 --- a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp @@ -19,9 +19,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], // CHECK: [[TG:%.+]] = alloca i8*, @@ -122,7 +122,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp index 5e04aa8c1ec287..c64ffb50800648 100644 --- a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp @@ -19,9 +19,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], // CHECK: [[TG:%.+]] = alloca i8*, @@ -122,7 +122,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp index 867eb45a1332b6..5481f0b2daa4b3 100644 --- a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp @@ -19,9 +19,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: alloca i32, // CHECK: alloca i32, // CHECK: alloca i32, @@ -127,7 +127,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp index be67a2a174004f..1c0be118a03ca4 100644 --- a/clang/test/OpenMP/sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp @@ -128,7 +128,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp index 5c5ea6b90d5297..66a20141df0394 100644 --- a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp @@ -19,9 +19,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: alloca i32, // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], @@ -123,7 +123,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp index 2fc49d44c1e904..e42e372ea67a72 100644 --- a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp @@ -19,9 +19,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], // CHECK: [[TG:%.+]] = alloca i8*, @@ -122,7 +122,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp index 06c0f8744e8cca..fbd990699d8327 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -19,9 +19,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: alloca i32, // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]], @@ -123,7 +123,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp index 194999f8cbb05c..26ca2352cc9ac5 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -20,9 +20,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: alloca i32, // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]], @@ -124,7 +124,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c index 65928b08be8e08..63a96f20de88b2 100644 --- a/clang/test/Preprocessor/init.c +++ b/clang/test/Preprocessor/init.c @@ -1607,6 +1607,7 @@ // RUN: %clang_cc1 -E -dM -ffreestanding -triple=mips64-unknown-openbsd6.1 < /dev/null | FileCheck -match-full-lines -check-prefix OPENBSD %s // RUN: %clang_cc1 -E -dM -ffreestanding -triple=mips64el-unknown-openbsd6.1 < /dev/null | FileCheck -match-full-lines -check-prefix OPENBSD %s // RUN: %clang_cc1 -E -dM -ffreestanding -triple=sparc64-unknown-openbsd6.1 < /dev/null | FileCheck -match-full-lines -check-prefix OPENBSD %s +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=riscv64-unknown-openbsd6.1 < /dev/null | FileCheck -match-full-lines -check-prefix OPENBSD %s // OPENBSD:#define __ELF__ 1 // OPENBSD:#define __INT16_TYPE__ short // OPENBSD:#define __INT32_TYPE__ int diff --git a/clang/test/Sema/builtins.c b/clang/test/Sema/builtins.c index 4b445724f712a1..e4093edb5f0063 100644 --- a/clang/test/Sema/builtins.c +++ b/clang/test/Sema/builtins.c @@ -285,12 +285,16 @@ void test_ei_i42i(_ExtInt(42) *ptr, int value) { __sync_fetch_and_add(ptr, value); // expected-error {{Atomic memory operand must have a power-of-two size}} // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}} __sync_nand_and_fetch(ptr, value); // expected-error {{Atomic memory operand must have a power-of-two size}} + + __atomic_fetch_add(ptr, 1, 0); // expected-error {{argument to atomic builtin of type '_ExtInt' is not supported}} } void test_ei_i64i(_ExtInt(64) *ptr, int value) { __sync_fetch_and_add(ptr, value); // expect success // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}} __sync_nand_and_fetch(ptr, value); // expect success + + __atomic_fetch_add(ptr, 1, 0); // expected-error {{argument to atomic builtin of type '_ExtInt' is not supported}} } void test_ei_ii42(int *ptr, _ExtInt(42) value) { diff --git a/clang/test/SemaCXX/ext-int.cpp b/clang/test/SemaCXX/ext-int.cpp index 0f2a3b89be1f1c..a619cd2eb5de1a 100644 --- a/clang/test/SemaCXX/ext-int.cpp +++ b/clang/test/SemaCXX/ext-int.cpp @@ -91,10 +91,11 @@ typedef _ExtInt(32) __attribute__((vector_size(16))) VecTy; _Complex _ExtInt(3) Cmplx; // Reject cases of _Atomic: -// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(4)' with less than 1 byte of precision}} +// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(4)'}} _Atomic _ExtInt(4) TooSmallAtomic; -// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(9)' with a non power of 2 precision}} +// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(9)'}} _Atomic _ExtInt(9) NotPow2Atomic; +// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(128)'}} _Atomic _ExtInt(128) JustRightAtomic; // Test result types of Unary/Bitwise/Binary Operations: diff --git a/clang/unittests/Tooling/CMakeLists.txt b/clang/unittests/Tooling/CMakeLists.txt index f290c3d2bedee8..9de330ab73d425 100644 --- a/clang/unittests/Tooling/CMakeLists.txt +++ b/clang/unittests/Tooling/CMakeLists.txt @@ -22,6 +22,7 @@ add_clang_unittest(ToolingTests RecursiveASTVisitorTests/Attr.cpp RecursiveASTVisitorTests/Callbacks.cpp RecursiveASTVisitorTests/Class.cpp + RecursiveASTVisitorTests/Concept.cpp RecursiveASTVisitorTests/ConstructExpr.cpp RecursiveASTVisitorTests/CXXBoolLiteralExpr.cpp RecursiveASTVisitorTests/CXXMemberCall.cpp diff --git a/clang/unittests/Tooling/RecursiveASTVisitorTests/Concept.cpp b/clang/unittests/Tooling/RecursiveASTVisitorTests/Concept.cpp new file mode 100644 index 00000000000000..f0f700204dd5a9 --- /dev/null +++ b/clang/unittests/Tooling/RecursiveASTVisitorTests/Concept.cpp @@ -0,0 +1,45 @@ +//===- unittest/Tooling/RecursiveASTVisitorTests/Concept.cpp----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TestVisitor.h" +#include "clang/AST/ExprConcepts.h" + +using namespace clang; + +namespace { + +struct ConceptVisitor : ExpectedLocationVisitor { + bool VisitConceptSpecializationExpr(ConceptSpecializationExpr *E) { + ++ConceptSpecializationExprsVisited; + return true; + } + bool TraverseConceptReference(const ConceptReference &R) { + ++ConceptReferencesTraversed; + return true; + } + + int ConceptSpecializationExprsVisited = 0; + int ConceptReferencesTraversed = 0; +}; + +TEST(RecursiveASTVisitor, ConstrainedParameter) { + ConceptVisitor Visitor; + EXPECT_TRUE(Visitor.runOver("template concept Fooable = true;\n" + "template void bar(T);", + ConceptVisitor::Lang_CXX2a)); + // Check that we visit the "Fooable T" template parameter's TypeConstraint's + // ImmediatelyDeclaredConstraint, which is a ConceptSpecializationExpr. + EXPECT_EQ(1, Visitor.ConceptSpecializationExprsVisited); + // There are two ConceptReference objects in the AST: the base subobject + // of the ConceptSpecializationExpr, and the base subobject of the + // TypeConstraint itself. To avoid traversing the concept and arguments + // multiple times, we only traverse one. + EXPECT_EQ(1, Visitor.ConceptReferencesTraversed); +} + +} // end anonymous namespace diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp index 211e8b1ae901ff..fd858dfba91fe1 100644 --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -437,672 +437,355 @@ void test() { } TEST_P(SyntaxTreeTest, UnqualifiedId_Identifier) { - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test(int a) { - a; + [[a]]; } )cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-int - | | `-SimpleDeclarator - | | `-a - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-IdExpression - | | `-UnqualifiedId - | | `-a - | `-; - `-} -)txt")); + {R"txt( +IdExpression +`-UnqualifiedId + `-a +)txt"})); } TEST_P(SyntaxTreeTest, UnqualifiedId_OperatorFunctionId) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { friend X operator+(const X&, const X&); }; void test(X x) { - operator+(x, x); -} -)cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-UnknownDeclaration -| | `-SimpleDeclaration -| | |-friend -| | |-X -| | |-SimpleDeclarator -| | | |-operator -| | | |-+ -| | | `-ParametersAndQualifiers -| | | |-( -| | | |-SimpleDeclaration -| | | | |-const -| | | | |-X -| | | | `-SimpleDeclarator -| | | | `-& -| | | |-, -| | | |-SimpleDeclaration -| | | | |-const -| | | | |-X -| | | | `-SimpleDeclarator -| | | | `-& -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-UnknownExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | |-operator - | | | `-+ - | | |-( - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-x - | | |-, - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-x - | | `-) - | `-; - `-} -)txt")); + [[operator+(x, x)]]; +} +)cpp", + {R"txt( +UnknownExpression +|-IdExpression +| `-UnqualifiedId +| |-operator +| `-+ +|-( +|-IdExpression +| `-UnqualifiedId +| `-x +|-, +|-IdExpression +| `-UnqualifiedId +| `-x +`-) +)txt"})); } TEST_P(SyntaxTreeTest, UnqualifiedId_ConversionFunctionId) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { operator int(); }; void test(X x) { // TODO: Expose `id-expression` from `MemberExpr` - x.operator int(); + [[x.operator int()]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-SimpleDeclaration -| | |-SimpleDeclarator -| | | |-operator -| | | |-int -| | | `-ParametersAndQualifiers -| | | |-( -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-UnknownExpression - | | |-UnknownExpression - | | | |-IdExpression - | | | | `-UnqualifiedId - | | | | `-x - | | | |-. - | | | |-operator - | | | `-int - | | |-( - | | `-) - | `-; - `-} -)txt")); + {R"txt( +UnknownExpression +|-UnknownExpression +| |-IdExpression +| | `-UnqualifiedId +| | `-x +| |-. +| |-operator +| `-int +|-( +`-) +)txt"})); } TEST_P(SyntaxTreeTest, UnqualifiedId_LiteralOperatorId) { if (!GetParam().isCXX11OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( unsigned operator "" _w(char); void test() { - operator "" _w('1'); + [[operator "" _w('1')]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-unsigned -| |-SimpleDeclarator -| | |-operator -| | |-"" -| | |-_w -| | `-ParametersAndQualifiers -| | |-( -| | |-SimpleDeclaration -| | | `-char -| | `-) -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-UnknownExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | |-operator - | | | |-"" - | | | `-_w - | | |-( - | | |-CharacterLiteralExpression - | | | `-'1' - | | `-) - | `-; - `-} -)txt")); + {R"txt( +UnknownExpression +|-IdExpression +| `-UnqualifiedId +| |-operator +| |-"" +| `-_w +|-( +|-CharacterLiteralExpression +| `-'1' +`-) +)txt"})); } TEST_P(SyntaxTreeTest, UnqualifiedId_Destructor) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { }; void test(X x) { // TODO: Expose `id-expression` from `MemberExpr` - x.~X(); + [[x.~X()]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-UnknownExpression - | | |-UnknownExpression - | | | |-IdExpression - | | | | `-UnqualifiedId - | | | | `-x - | | | |-. - | | | |-~ - | | | `-X - | | |-( - | | `-) - | `-; - `-} -)txt")); + {R"txt( +UnknownExpression +|-UnknownExpression +| |-IdExpression +| | `-UnqualifiedId +| | `-x +| |-. +| |-~ +| `-X +|-( +`-) +)txt"})); } TEST_P(SyntaxTreeTest, UnqualifiedId_DecltypeDestructor) { if (!GetParam().isCXX11OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { }; void test(X x) { // TODO: Expose `id-expression` from `MemberExpr` - x.~decltype(x)(); -} -)cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-UnknownExpression - | | |-UnknownExpression - | | | |-IdExpression - | | | | `-UnqualifiedId - | | | | `-x - | | | |-. - | | | `-~ - | | |-decltype - | | |-( - | | |-x - | | |-) - | | |-( - | | `-) - | `-; - `-} -)txt")); + [[x.~decltype(x)()]]; +} +)cpp", + {R"txt( +UnknownExpression +|-UnknownExpression +| |-IdExpression +| | `-UnqualifiedId +| | `-x +| |-. +| `-~ +|-decltype +|-( +|-x +|-) +|-( +`-) +)txt"})); } TEST_P(SyntaxTreeTest, UnqualifiedId_TemplateId) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( template T f(); void test() { - f(); + [[f()]]; } )cpp", - R"txt( -*: TranslationUnit -|-TemplateDeclaration -| |-template -| |-< -| |-UnknownDeclaration -| | |-typename -| | `-T -| |-> -| `-SimpleDeclaration -| |-T -| |-SimpleDeclarator -| | |-f -| | `-ParametersAndQualifiers -| | |-( -| | `-) -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-UnknownExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | |-f - | | | |-< - | | | |-int - | | | `-> - | | |-( - | | `-) - | `-; - `-} -)txt")); + {R"txt( +UnknownExpression +|-IdExpression +| `-UnqualifiedId +| |-f +| |-< +| |-int +| `-> +|-( +`-) +)txt"})); } TEST_P(SyntaxTreeTest, QualifiedId_NamespaceSpecifier) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( namespace n { struct S { }; } void test() { - ::n::S s1; - n::S s2; + [[::n::S s1]]; + [[n::S s2]]; } )cpp", - R"txt( -*: TranslationUnit -|-NamespaceDefinition -| |-namespace -| |-n -| |-{ -| |-SimpleDeclaration -| | |-struct -| | |-S -| | |-{ -| | |-} -| | `-; -| `-} -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-DeclarationStatement - | |-SimpleDeclaration - | | |-NestedNameSpecifier - | | | |-:: - | | | |-IdentifierNameSpecifier - | | | | `-n - | | | `-:: - | | |-S - | | `-SimpleDeclarator - | | `-UnknownExpression - | | `-s1 - | `-; - |-DeclarationStatement - | |-SimpleDeclaration - | | |-NestedNameSpecifier - | | | |-IdentifierNameSpecifier - | | | | `-n - | | | `-:: - | | |-S - | | `-SimpleDeclarator - | | `-UnknownExpression - | | `-s2 - | `-; - `-} -)txt")); + {R"txt( +SimpleDeclaration +|-NestedNameSpecifier +| |-:: +| |-IdentifierNameSpecifier +| | `-n +| `-:: +|-S +`-SimpleDeclarator + `-UnknownExpression + `-s1 +)txt", + R"txt( +SimpleDeclaration +|-NestedNameSpecifier +| |-IdentifierNameSpecifier +| | `-n +| `-:: +|-S +`-SimpleDeclarator + `-UnknownExpression + `-s2 +)txt"})); } TEST_P(SyntaxTreeTest, QualifiedId_TemplateSpecifier) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( template struct ST { struct S { }; }; void test() { - ::template ST::S s1; - ::ST::S s2; + [[::template ST::S s1]]; + [[::ST::S s2]]; } )cpp", - R"txt( -*: TranslationUnit -|-TemplateDeclaration -| |-template -| |-< -| |-UnknownDeclaration -| | |-typename -| | `-T -| |-> -| `-SimpleDeclaration -| |-struct -| |-ST -| |-{ -| |-SimpleDeclaration -| | |-struct -| | |-S -| | |-{ -| | |-} -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-DeclarationStatement - | |-SimpleDeclaration - | | |-NestedNameSpecifier - | | | |-:: - | | | |-SimpleTemplateNameSpecifier - | | | | |-template - | | | | |-ST - | | | | |-< - | | | | |-int - | | | | `-> - | | | `-:: - | | |-S - | | `-SimpleDeclarator - | | `-UnknownExpression - | | `-s1 - | `-; - |-DeclarationStatement - | |-SimpleDeclaration - | | |-NestedNameSpecifier - | | | |-:: - | | | |-SimpleTemplateNameSpecifier - | | | | |-ST - | | | | |-< - | | | | |-int - | | | | `-> - | | | `-:: - | | |-S - | | `-SimpleDeclarator - | | `-UnknownExpression - | | `-s2 - | `-; - `-} -)txt")); + {R"txt( +SimpleDeclaration +|-NestedNameSpecifier +| |-:: +| |-SimpleTemplateNameSpecifier +| | |-template +| | |-ST +| | |-< +| | |-int +| | `-> +| `-:: +|-S +`-SimpleDeclarator + `-UnknownExpression + `-s1 +)txt", + R"txt( +SimpleDeclaration +|-NestedNameSpecifier +| |-:: +| |-SimpleTemplateNameSpecifier +| | |-ST +| | |-< +| | |-int +| | `-> +| `-:: +|-S +`-SimpleDeclarator + `-UnknownExpression + `-s2 +)txt"})); } TEST_P(SyntaxTreeTest, QualifiedId_DecltypeSpecifier) { if (!GetParam().isCXX11OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct S { static void f(){} }; void test(S s) { - decltype(s)::f(); + [[decltype(s)::f()]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-S -| |-{ -| |-SimpleDeclaration -| | |-static -| | |-void -| | |-SimpleDeclarator -| | | |-f -| | | `-ParametersAndQualifiers -| | | |-( -| | | `-) -| | `-CompoundStatement -| | |-{ -| | `-} -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-S - | | `-SimpleDeclarator - | | `-s - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-UnknownExpression - | | |-IdExpression - | | | |-NestedNameSpecifier - | | | | |-DecltypeNameSpecifier - | | | | | |-decltype - | | | | | |-( - | | | | | |-IdExpression - | | | | | | `-UnqualifiedId - | | | | | | `-s - | | | | | `-) - | | | | `-:: - | | | `-UnqualifiedId - | | | `-f - | | |-( - | | `-) - | `-; - `-} -)txt")); + {R"txt( +UnknownExpression +|-IdExpression +| |-NestedNameSpecifier +| | |-DecltypeNameSpecifier +| | | |-decltype +| | | |-( +| | | |-IdExpression +| | | | `-UnqualifiedId +| | | | `-s +| | | `-) +| | `-:: +| `-UnqualifiedId +| `-f +|-( +`-) +)txt"})); } TEST_P(SyntaxTreeTest, QualifiedId_OptionalTemplateKw) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct S { template static U f(); }; void test() { - S::f(); - S::template f(); -} -)cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-S -| |-{ -| |-TemplateDeclaration -| | |-template -| | |-< -| | |-UnknownDeclaration -| | | |-typename -| | | `-U -| | |-> -| | `-SimpleDeclaration -| | |-static -| | |-U -| | |-SimpleDeclarator -| | | |-f -| | | `-ParametersAndQualifiers -| | | |-( -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-UnknownExpression - | | |-IdExpression - | | | |-NestedNameSpecifier - | | | | |-IdentifierNameSpecifier - | | | | | `-S - | | | | `-:: - | | | `-UnqualifiedId - | | | |-f - | | | |-< - | | | |-int - | | | `-> - | | |-( - | | `-) - | `-; - |-ExpressionStatement - | |-UnknownExpression - | | |-IdExpression - | | | |-NestedNameSpecifier - | | | | |-IdentifierNameSpecifier - | | | | | `-S - | | | | `-:: - | | | |-template - | | | `-UnqualifiedId - | | | |-f - | | | |-< - | | | |-int - | | | `-> - | | |-( - | | `-) - | `-; - `-} -)txt")); + [[S::f()]]; + [[S::template f()]]; +} +)cpp", + {R"txt( +UnknownExpression +|-IdExpression +| |-NestedNameSpecifier +| | |-IdentifierNameSpecifier +| | | `-S +| | `-:: +| `-UnqualifiedId +| |-f +| |-< +| |-int +| `-> +|-( +`-) +)txt", + R"txt( +UnknownExpression +|-IdExpression +| |-NestedNameSpecifier +| | |-IdentifierNameSpecifier +| | | `-S +| | `-:: +| |-template +| `-UnqualifiedId +| |-f +| |-< +| |-int +| `-> +|-( +`-) +)txt"})); } TEST_P(SyntaxTreeTest, QualifiedId_Complex) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( namespace n { template @@ -1112,80 +795,33 @@ namespace n { }; } void test() { - ::n::template ST::template f(); -} -)cpp", - R"txt( -*: TranslationUnit -|-NamespaceDefinition -| |-namespace -| |-n -| |-{ -| |-TemplateDeclaration -| | |-template -| | |-< -| | |-UnknownDeclaration -| | | |-typename -| | | `-T -| | |-> -| | `-SimpleDeclaration -| | |-struct -| | |-ST -| | |-{ -| | |-TemplateDeclaration -| | | |-template -| | | |-< -| | | |-UnknownDeclaration -| | | | |-typename -| | | | `-U -| | | |-> -| | | `-SimpleDeclaration -| | | |-static -| | | |-U -| | | |-SimpleDeclarator -| | | | |-f -| | | | `-ParametersAndQualifiers -| | | | |-( -| | | | `-) -| | | `-; -| | |-} -| | `-; -| `-} -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-UnknownExpression - | | |-IdExpression - | | | |-NestedNameSpecifier - | | | | |-:: - | | | | |-IdentifierNameSpecifier - | | | | | `-n - | | | | |-:: - | | | | |-SimpleTemplateNameSpecifier - | | | | | |-template - | | | | | |-ST - | | | | | |-< - | | | | | |-int - | | | | | `-> - | | | | `-:: - | | | |-template - | | | `-UnqualifiedId - | | | |-f - | | | |-< - | | | |-int - | | | `-> - | | |-( - | | `-) - | `-; - `-} -)txt")); + [[::n::template ST::template f()]]; +} +)cpp", + {R"txt( +UnknownExpression +|-IdExpression +| |-NestedNameSpecifier +| | |-:: +| | |-IdentifierNameSpecifier +| | | `-n +| | |-:: +| | |-SimpleTemplateNameSpecifier +| | | |-template +| | | |-ST +| | | |-< +| | | |-int +| | | `-> +| | `-:: +| |-template +| `-UnqualifiedId +| |-f +| |-< +| |-int +| `-> +|-( +`-) +)txt"})); } TEST_P(SyntaxTreeTest, QualifiedId_DependentType) { @@ -1197,255 +833,152 @@ TEST_P(SyntaxTreeTest, QualifiedId_DependentType) { // tree when `-fdelayed-template-parsing` is active. return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( template void test() { - T::template U::f(); - T::U::f(); - T::template f<0>(); + [[T::template U::f()]]; + [[T::U::f()]]; + [[T::template f<0>()]]; } )cpp", - R"txt( -*: TranslationUnit -`-TemplateDeclaration - |-template - |-< - |-UnknownDeclaration - | |-typename - | `-T - |-> - `-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-UnknownExpression - | | |-IdExpression - | | | |-NestedNameSpecifier - | | | | |-IdentifierNameSpecifier - | | | | | `-T - | | | | |-:: - | | | | |-SimpleTemplateNameSpecifier - | | | | | |-template - | | | | | |-U - | | | | | |-< - | | | | | |-int - | | | | | `-> - | | | | `-:: - | | | `-UnqualifiedId - | | | `-f - | | |-( - | | `-) - | `-; - |-ExpressionStatement - | |-UnknownExpression - | | |-IdExpression - | | | |-NestedNameSpecifier - | | | | |-IdentifierNameSpecifier - | | | | | `-T - | | | | |-:: - | | | | |-IdentifierNameSpecifier - | | | | | `-U - | | | | `-:: - | | | `-UnqualifiedId - | | | `-f - | | |-( - | | `-) - | `-; - |-ExpressionStatement - | |-UnknownExpression - | | |-IdExpression - | | | |-NestedNameSpecifier - | | | | |-IdentifierNameSpecifier - | | | | | `-T - | | | | `-:: - | | | |-template - | | | `-UnqualifiedId - | | | |-f - | | | |-< - | | | |-IntegerLiteralExpression - | | | | `-0 - | | | `-> - | | |-( - | | `-) - | `-; - `-} -)txt")); + {R"txt( +UnknownExpression +|-IdExpression +| |-NestedNameSpecifier +| | |-IdentifierNameSpecifier +| | | `-T +| | |-:: +| | |-SimpleTemplateNameSpecifier +| | | |-template +| | | |-U +| | | |-< +| | | |-int +| | | `-> +| | `-:: +| `-UnqualifiedId +| `-f +|-( +`-) +)txt", + R"txt( +UnknownExpression +|-IdExpression +| |-NestedNameSpecifier +| | |-IdentifierNameSpecifier +| | | `-T +| | |-:: +| | |-IdentifierNameSpecifier +| | | `-U +| | `-:: +| `-UnqualifiedId +| `-f +|-( +`-) +)txt", + R"txt( +UnknownExpression +|-IdExpression +| |-NestedNameSpecifier +| | |-IdentifierNameSpecifier +| | | `-T +| | `-:: +| |-template +| `-UnqualifiedId +| |-f +| |-< +| |-IntegerLiteralExpression +| | `-0 +| `-> +|-( +`-) +)txt"})); } TEST_P(SyntaxTreeTest, ParenExpr) { - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { - (1); - ((1)); - (1 + (2)); -} -)cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-ParenExpression - | | |-( - | | |-IntegerLiteralExpression - | | | `-1 - | | `-) - | `-; - |-ExpressionStatement - | |-ParenExpression - | | |-( - | | |-ParenExpression - | | | |-( - | | | |-IntegerLiteralExpression - | | | | `-1 - | | | `-) - | | `-) - | `-; - |-ExpressionStatement - | |-ParenExpression - | | |-( - | | |-BinaryOperatorExpression - | | | |-IntegerLiteralExpression - | | | | `-1 - | | | |-+ - | | | `-ParenExpression - | | | |-( - | | | |-IntegerLiteralExpression - | | | | `-2 - | | | `-) - | | `-) - | `-; - `-} -)txt")); + [[(1)]]; + [[((1))]]; + [[(1 + (2))]]; +} +)cpp", + {R"txt( +ParenExpression +|-( +|-IntegerLiteralExpression +| `-1 +`-) +)txt", + R"txt( +ParenExpression +|-( +|-ParenExpression +| |-( +| |-IntegerLiteralExpression +| | `-1 +| `-) +`-) +)txt", + R"txt( +ParenExpression +|-( +|-BinaryOperatorExpression +| |-IntegerLiteralExpression +| | `-1 +| |-+ +| `-ParenExpression +| |-( +| |-IntegerLiteralExpression +| | `-2 +| `-) +`-) +)txt"})); } TEST_P(SyntaxTreeTest, UserDefinedLiteral_Char) { if (!GetParam().isCXX11OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( unsigned operator "" _c(char); void test() { - '2'_c; + [['2'_c]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-unsigned -| |-SimpleDeclarator -| | |-operator -| | |-"" -| | |-_c -| | `-ParametersAndQualifiers -| | |-( -| | |-SimpleDeclaration -| | | `-char -| | `-) -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-CharUserDefinedLiteralExpression - | | `-'2'_c - | `-; - `-} -)txt")); + {R"txt( +CharUserDefinedLiteralExpression +`-'2'_c +)txt"})); } TEST_P(SyntaxTreeTest, UserDefinedLiteral_String) { if (!GetParam().isCXX11OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( typedef decltype(sizeof(void *)) size_t; unsigned operator "" _s(const char*, size_t); void test() { - "12"_s; + [["12"_s]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-typedef -| |-decltype -| |-( -| |-UnknownExpression -| | |-sizeof -| | |-( -| | |-void -| | |-* -| | `-) -| |-) -| |-SimpleDeclarator -| | `-size_t -| `-; -|-SimpleDeclaration -| |-unsigned -| |-SimpleDeclarator -| | |-operator -| | |-"" -| | |-_s -| | `-ParametersAndQualifiers -| | |-( -| | |-SimpleDeclaration -| | | |-const -| | | |-char -| | | `-SimpleDeclarator -| | | `-* -| | |-, -| | |-SimpleDeclaration -| | | `-size_t -| | `-) -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-StringUserDefinedLiteralExpression - | | `-"12"_s - | `-; - `-} -)txt")); + {R"txt( +StringUserDefinedLiteralExpression +`-"12"_s +)txt"})); } TEST_P(SyntaxTreeTest, UserDefinedLiteral_Integer) { if (!GetParam().isCXX11OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( unsigned operator "" _i(unsigned long long); unsigned operator "" _r(const char*); @@ -1453,89 +986,30 @@ template unsigned operator "" _t(); void test() { - 12_i; - 12_r; - 12_t; + [[12_i]]; + [[12_r]]; + [[12_t]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-unsigned -| |-SimpleDeclarator -| | |-operator -| | |-"" -| | |-_i -| | `-ParametersAndQualifiers -| | |-( -| | |-SimpleDeclaration -| | | |-unsigned -| | | |-long -| | | `-long -| | `-) -| `-; -|-SimpleDeclaration -| |-unsigned -| |-SimpleDeclarator -| | |-operator -| | |-"" -| | |-_r -| | `-ParametersAndQualifiers -| | |-( -| | |-SimpleDeclaration -| | | |-const -| | | |-char -| | | `-SimpleDeclarator -| | | `-* -| | `-) -| `-; -|-TemplateDeclaration -| |-template -| |-< -| |-SimpleDeclaration -| | `-char -| |-... -| |-> -| `-SimpleDeclaration -| |-unsigned -| |-SimpleDeclarator -| | |-operator -| | |-"" -| | |-_t -| | `-ParametersAndQualifiers -| | |-( -| | `-) -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-IntegerUserDefinedLiteralExpression - | | `-12_i - | `-; - |-ExpressionStatement - | |-IntegerUserDefinedLiteralExpression - | | `-12_r - | `-; - |-ExpressionStatement - | |-IntegerUserDefinedLiteralExpression - | | `-12_t - | `-; - `-} -)txt")); + {R"txt( +IntegerUserDefinedLiteralExpression +`-12_i +)txt", + R"txt( +IntegerUserDefinedLiteralExpression +`-12_r +)txt", + R"txt( +IntegerUserDefinedLiteralExpression +`-12_t +)txt"})); } TEST_P(SyntaxTreeTest, UserDefinedLiteral_Float) { if (!GetParam().isCXX11OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( unsigned operator "" _f(long double); unsigned operator "" _r(const char*); @@ -1543,457 +1017,269 @@ template unsigned operator "" _t(); void test() { - 1.2_f; // call: operator "" _f(1.2L) | kind: float - 1.2_r; // call: operator "" _i("1.2") | kind: float - 1.2_t; // call: operator<'1', '2'> "" _x() | kind: float + [[1.2_f]]; + [[1.2_r]]; + [[1.2_t]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-unsigned -| |-SimpleDeclarator -| | |-operator -| | |-"" -| | |-_f -| | `-ParametersAndQualifiers -| | |-( -| | |-SimpleDeclaration -| | | |-long -| | | `-double -| | `-) -| `-; -|-SimpleDeclaration -| |-unsigned -| |-SimpleDeclarator -| | |-operator -| | |-"" -| | |-_r -| | `-ParametersAndQualifiers -| | |-( -| | |-SimpleDeclaration -| | | |-const -| | | |-char -| | | `-SimpleDeclarator -| | | `-* -| | `-) -| `-; -|-TemplateDeclaration -| |-template -| |-< -| |-SimpleDeclaration -| | `-char -| |-... -| |-> -| `-SimpleDeclaration -| |-unsigned -| |-SimpleDeclarator -| | |-operator -| | |-"" -| | |-_t -| | `-ParametersAndQualifiers -| | |-( -| | `-) -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-FloatUserDefinedLiteralExpression - | | `-1.2_f - | `-; - |-ExpressionStatement - | |-FloatUserDefinedLiteralExpression - | | `-1.2_r - | `-; - |-ExpressionStatement - | |-FloatUserDefinedLiteralExpression - | | `-1.2_t - | `-; - `-} -)txt")); + {R"txt( +FloatUserDefinedLiteralExpression +`-1.2_f +)txt", + R"txt( +FloatUserDefinedLiteralExpression +`-1.2_r +)txt", + R"txt( +FloatUserDefinedLiteralExpression +`-1.2_t +)txt"})); } TEST_P(SyntaxTreeTest, IntegerLiteral_LongLong) { if (!GetParam().isCXX11OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { - 12ll; - 12ull; + [[12ll]]; + [[12ull]]; } )cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-12ll - | `-; - |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-12ull - | `-; - `-} -)txt")); + {R"txt( +IntegerLiteralExpression +`-12ll +)txt", + R"txt( +IntegerLiteralExpression +`-12ull +)txt"})); } TEST_P(SyntaxTreeTest, IntegerLiteral_Binary) { if (!GetParam().isCXX14OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { - 0b1100; + [[0b1100]]; } )cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-0b1100 - | `-; - `-} -)txt")); + {R"txt( +IntegerLiteralExpression +`-0b1100 +)txt"})); } TEST_P(SyntaxTreeTest, IntegerLiteral_WithDigitSeparators) { if (!GetParam().isCXX14OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { - 1'2'0ull; + [[1'2'0ull]]; } )cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-1'2'0ull - | `-; - `-} -)txt")); + {R"txt( +IntegerLiteralExpression +`-1'2'0ull +)txt"})); } TEST_P(SyntaxTreeTest, CharacterLiteral) { - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { - 'a'; - '\n'; - '\x20'; - '\0'; - L'a'; - L'α'; -} -)cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-CharacterLiteralExpression - | | `-'a' - | `-; - |-ExpressionStatement - | |-CharacterLiteralExpression - | | `-'\n' - | `-; - |-ExpressionStatement - | |-CharacterLiteralExpression - | | `-'\x20' - | `-; - |-ExpressionStatement - | |-CharacterLiteralExpression - | | `-'\0' - | `-; - |-ExpressionStatement - | |-CharacterLiteralExpression - | | `-L'a' - | `-; - |-ExpressionStatement - | |-CharacterLiteralExpression - | | `-L'α' - | `-; - `-} -)txt")); + [['a']]; + [['\n']]; + [['\x20']]; + [['\0']]; + [[L'a']]; + [[L'α']]; +} +)cpp", + {R"txt( +CharacterLiteralExpression +`-'a' +)txt", + R"txt( +CharacterLiteralExpression +`-'\n' +)txt", + R"txt( +CharacterLiteralExpression +`-'\x20' +)txt", + R"txt( +CharacterLiteralExpression +`-'\0' +)txt", + R"txt( +CharacterLiteralExpression +`-L'a' +)txt", + R"txt( +CharacterLiteralExpression +`-L'α' +)txt"})); } TEST_P(SyntaxTreeTest, CharacterLiteral_Utf) { if (!GetParam().isCXX11OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { - u'a'; - u'構'; - U'a'; - U'🌲'; -} -)cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-CharacterLiteralExpression - | | `-u'a' - | `-; - |-ExpressionStatement - | |-CharacterLiteralExpression - | | `-u'構' - | `-; - |-ExpressionStatement - | |-CharacterLiteralExpression - | | `-U'a' - | `-; - |-ExpressionStatement - | |-CharacterLiteralExpression - | | `-U'🌲' - | `-; - `-} -)txt")); + [[u'a']]; + [[u'構']]; + [[U'a']]; + [[U'🌲']]; +} +)cpp", + {R"txt( +CharacterLiteralExpression +`-u'a' +)txt", + R"txt( +CharacterLiteralExpression +`-u'構' +)txt", + R"txt( +CharacterLiteralExpression +`-U'a' +)txt", + R"txt( +CharacterLiteralExpression +`-U'🌲' +)txt"})); } TEST_P(SyntaxTreeTest, CharacterLiteral_Utf8) { if (!GetParam().isCXX17OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { - u8'a'; - u8'\x7f'; + [[u8'a']]; + [[u8'\x7f']]; } )cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-CharacterLiteralExpression - | | `-u8'a' - | `-; - |-ExpressionStatement - | |-CharacterLiteralExpression - | | `-u8'\x7f' - | `-; - `-} -)txt")); + {R"txt( +CharacterLiteralExpression +`-u8'a' +)txt", + R"txt( +CharacterLiteralExpression +`-u8'\x7f' +)txt"})); } TEST_P(SyntaxTreeTest, FloatingLiteral) { - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { - 1e-2; - 2.; - .2; - 2.f; -} -)cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-FloatingLiteralExpression - | | `-1e-2 - | `-; - |-ExpressionStatement - | |-FloatingLiteralExpression - | | `-2. - | `-; - |-ExpressionStatement - | |-FloatingLiteralExpression - | | `-.2 - | `-; - |-ExpressionStatement - | |-FloatingLiteralExpression - | | `-2.f - | `-; - `-} -)txt")); + [[1e-2]]; + [[2.]]; + [[.2]]; + [[2.f]]; +} +)cpp", + {R"txt( +FloatingLiteralExpression +`-1e-2 +)txt", + R"txt( +FloatingLiteralExpression +`-2. +)txt", + R"txt( +FloatingLiteralExpression +`-.2 +)txt", + R"txt( +FloatingLiteralExpression +`-2.f +)txt"})); } TEST_P(SyntaxTreeTest, FloatingLiteral_Hexadecimal) { if (!GetParam().isCXX17OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +void test() { + [[0xfp1]]; + [[0xf.p1]]; + [[0x.fp1]]; + [[0xf.fp1f]]; +} +)cpp", + {R"txt( +FloatingLiteralExpression +`-0xfp1 +)txt", + R"txt( +FloatingLiteralExpression +`-0xf.p1 +)txt", + R"txt( +FloatingLiteralExpression +`-0x.fp1 +)txt", + R"txt( +FloatingLiteralExpression +`-0xf.fp1f +)txt"})); +} + +TEST_P(SyntaxTreeTest, StringLiteral) { + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { - 0xfp1; - 0xf.p1; - 0x.fp1; - 0xf.fp1f; + [["a\n\0\x20"]]; + [[L"αβ"]]; } )cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-FloatingLiteralExpression - | | `-0xfp1 - | `-; - |-ExpressionStatement - | |-FloatingLiteralExpression - | | `-0xf.p1 - | `-; - |-ExpressionStatement - | |-FloatingLiteralExpression - | | `-0x.fp1 - | `-; - |-ExpressionStatement - | |-FloatingLiteralExpression - | | `-0xf.fp1f - | `-; - `-} -)txt")); -} - -TEST_P(SyntaxTreeTest, StringLiteral) { - EXPECT_TRUE(treeDumpEqual( - R"cpp( -void test() { - "a\n\0\x20"; - L"αβ"; -} -)cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-StringLiteralExpression - | | `-"a\n\0\x20" - | `-; - |-ExpressionStatement - | |-StringLiteralExpression - | | `-L"αβ" - | `-; - `-} -)txt")); + {R"txt( +StringLiteralExpression +`-"a\n\0\x20" +)txt", + R"txt( +StringLiteralExpression +`-L"αβ" +)txt"})); } TEST_P(SyntaxTreeTest, StringLiteral_Utf) { if (!GetParam().isCXX11OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { - u8"a\x1f\x05"; - u"C++抽象構文木"; - U"📖🌲\n"; + [[u8"a\x1f\x05"]]; + [[u"C++抽象構文木"]]; + [[U"📖🌲\n"]]; } )cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-StringLiteralExpression - | | `-u8"a\x1f\x05" - | `-; - |-ExpressionStatement - | |-StringLiteralExpression - | | `-u"C++抽象構文木" - | `-; - |-ExpressionStatement - | |-StringLiteralExpression - | | `-U"📖🌲\n" - | `-; - `-} -)txt")); + {R"txt( +StringLiteralExpression +`-u8"a\x1f\x05" +)txt", + R"txt( +StringLiteralExpression +`-u"C++抽象構文木" +)txt", + R"txt( +StringLiteralExpression +`-U"📖🌲\n" +)txt"})); } TEST_P(SyntaxTreeTest, StringLiteral_Raw) { @@ -2033,1221 +1319,646 @@ TEST_P(SyntaxTreeTest, BoolLiteral) { if (GetParam().isC()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { - true; - false; + [[true]]; + [[false]]; } )cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-BoolLiteralExpression - | | `-true - | `-; - |-ExpressionStatement - | |-BoolLiteralExpression - | | `-false - | `-; - `-} -)txt")); + {R"txt( +BoolLiteralExpression +`-true +)txt", + R"txt( +BoolLiteralExpression +`-false +)txt"})); } TEST_P(SyntaxTreeTest, CxxNullPtrLiteral) { if (!GetParam().isCXX11OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { - nullptr; + [[nullptr]]; } )cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-CxxNullPtrExpression - | | `-nullptr - | `-; - `-} -)txt")); + {R"txt( +CxxNullPtrExpression +`-nullptr +)txt"})); } TEST_P(SyntaxTreeTest, PostfixUnaryOperator) { - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test(int a) { - a++; - a--; -} -)cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-int - | | `-SimpleDeclarator - | | `-a - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-PostfixUnaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-a - | | `-++ - | `-; - |-ExpressionStatement - | |-PostfixUnaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-a - | | `--- - | `-; - `-} -)txt")); + [[a++]]; + [[a--]]; +} +)cpp", + {R"txt( +PostfixUnaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-a +`-++ +)txt", + R"txt( +PostfixUnaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-a +`--- +)txt"})); } TEST_P(SyntaxTreeTest, PrefixUnaryOperator) { - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test(int a, int *ap) { - --a; ++a; - ~a; - -a; - +a; - &a; - *ap; - !a; - __real a; __imag a; -} -)cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-int - | | `-SimpleDeclarator - | | `-a - | |-, - | |-SimpleDeclaration - | | |-int - | | `-SimpleDeclarator - | | |-* - | | `-ap - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |--- - | | `-IdExpression - | | `-UnqualifiedId - | | `-a - | `-; - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-++ - | | `-IdExpression - | | `-UnqualifiedId - | | `-a - | `-; - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-~ - | | `-IdExpression - | | `-UnqualifiedId - | | `-a - | `-; - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-- - | | `-IdExpression - | | `-UnqualifiedId - | | `-a - | `-; - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-+ - | | `-IdExpression - | | `-UnqualifiedId - | | `-a - | `-; - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-& - | | `-IdExpression - | | `-UnqualifiedId - | | `-a - | `-; - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-* - | | `-IdExpression - | | `-UnqualifiedId - | | `-ap - | `-; - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-! - | | `-IdExpression - | | `-UnqualifiedId - | | `-a - | `-; - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-__real - | | `-IdExpression - | | `-UnqualifiedId - | | `-a - | `-; - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-__imag - | | `-IdExpression - | | `-UnqualifiedId - | | `-a - | `-; - `-} -)txt")); + [[--a]]; [[++a]]; + [[~a]]; + [[-a]]; + [[+a]]; + [[&a]]; + [[*ap]]; + [[!a]]; + [[__real a]]; [[__imag a]]; +} +)cpp", + {R"txt( +PrefixUnaryOperatorExpression +|--- +`-IdExpression + `-UnqualifiedId + `-a +)txt", + R"txt( +PrefixUnaryOperatorExpression +|-++ +`-IdExpression + `-UnqualifiedId + `-a +)txt", + R"txt( +PrefixUnaryOperatorExpression +|-~ +`-IdExpression + `-UnqualifiedId + `-a +)txt", + R"txt( +PrefixUnaryOperatorExpression +|-- +`-IdExpression + `-UnqualifiedId + `-a +)txt", + R"txt( +PrefixUnaryOperatorExpression +|-+ +`-IdExpression + `-UnqualifiedId + `-a +)txt", + R"txt( +PrefixUnaryOperatorExpression +|-& +`-IdExpression + `-UnqualifiedId + `-a +)txt", + R"txt( +PrefixUnaryOperatorExpression +|-* +`-IdExpression + `-UnqualifiedId + `-ap +)txt", + R"txt( +PrefixUnaryOperatorExpression +|-! +`-IdExpression + `-UnqualifiedId + `-a +)txt", + R"txt( +PrefixUnaryOperatorExpression +|-__real +`-IdExpression + `-UnqualifiedId + `-a +)txt", + R"txt( +PrefixUnaryOperatorExpression +|-__imag +`-IdExpression + `-UnqualifiedId + `-a +)txt"})); } TEST_P(SyntaxTreeTest, PrefixUnaryOperatorCxx) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test(int a, bool b) { - compl a; - not b; -} -)cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-int - | | `-SimpleDeclarator - | | `-a - | |-, - | |-SimpleDeclaration - | | |-bool - | | `-SimpleDeclarator - | | `-b - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-compl - | | `-IdExpression - | | `-UnqualifiedId - | | `-a - | `-; - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-not - | | `-IdExpression - | | `-UnqualifiedId - | | `-b - | `-; - `-} -)txt")); + [[compl a]]; + [[not b]]; +} +)cpp", + {R"txt( +PrefixUnaryOperatorExpression +|-compl +`-IdExpression + `-UnqualifiedId + `-a +)txt", + R"txt( +PrefixUnaryOperatorExpression +|-not +`-IdExpression + `-UnqualifiedId + `-b +)txt"})); } TEST_P(SyntaxTreeTest, BinaryOperator) { - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test(int a) { - 1 - 2; - 1 == 2; - a = 1; - a <<= 1; - 1 || 0; - 1 & 2; - a ^= 3; -} -)cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-int - | | `-SimpleDeclarator - | | `-a - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IntegerLiteralExpression - | | | `-1 - | | |-- - | | `-IntegerLiteralExpression - | | `-2 - | `-; - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IntegerLiteralExpression - | | | `-1 - | | |-== - | | `-IntegerLiteralExpression - | | `-2 - | `-; - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-a - | | |-= - | | `-IntegerLiteralExpression - | | `-1 - | `-; - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-a - | | |-<<= - | | `-IntegerLiteralExpression - | | `-1 - | `-; - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IntegerLiteralExpression - | | | `-1 - | | |-|| - | | `-IntegerLiteralExpression - | | `-0 - | `-; - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IntegerLiteralExpression - | | | `-1 - | | |-& - | | `-IntegerLiteralExpression - | | `-2 - | `-; - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-a - | | |-^= - | | `-IntegerLiteralExpression - | | `-3 - | `-; - `-} -)txt")); + [[1 - 2]]; + [[1 == 2]]; + [[a = 1]]; + [[a <<= 1]]; + [[1 || 0]]; + [[1 & 2]]; + [[a != 3]]; +} +)cpp", + {R"txt( +BinaryOperatorExpression +|-IntegerLiteralExpression +| `-1 +|-- +`-IntegerLiteralExpression + `-2 +)txt", + R"txt( +BinaryOperatorExpression +|-IntegerLiteralExpression +| `-1 +|-== +`-IntegerLiteralExpression + `-2 +)txt", + R"txt( +BinaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-a +|-= +`-IntegerLiteralExpression + `-1 +)txt", + R"txt( +BinaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-a +|-<<= +`-IntegerLiteralExpression + `-1 +)txt", + R"txt( +BinaryOperatorExpression +|-IntegerLiteralExpression +| `-1 +|-|| +`-IntegerLiteralExpression + `-0 +)txt", + R"txt( +BinaryOperatorExpression +|-IntegerLiteralExpression +| `-1 +|-& +`-IntegerLiteralExpression + `-2 +)txt", + R"txt( +BinaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-a +|-!= +`-IntegerLiteralExpression + `-3 +)txt"})); } TEST_P(SyntaxTreeTest, BinaryOperatorCxx) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test(int a) { - true || false; - true or false; - 1 bitand 2; - a xor_eq 3; -} -)cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-int - | | `-SimpleDeclarator - | | `-a - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-BoolLiteralExpression - | | | `-true - | | |-|| - | | `-BoolLiteralExpression - | | `-false - | `-; - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-BoolLiteralExpression - | | | `-true - | | |-or - | | `-BoolLiteralExpression - | | `-false - | `-; - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IntegerLiteralExpression - | | | `-1 - | | |-bitand - | | `-IntegerLiteralExpression - | | `-2 - | `-; - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-a - | | |-xor_eq - | | `-IntegerLiteralExpression - | | `-3 - | `-; - `-} -)txt")); + [[true || false]]; + [[true or false]]; + [[1 bitand 2]]; + [[a xor_eq 3]]; +} +)cpp", + {R"txt( +BinaryOperatorExpression +|-BoolLiteralExpression +| `-true +|-|| +`-BoolLiteralExpression + `-false +)txt", + R"txt( +BinaryOperatorExpression +|-BoolLiteralExpression +| `-true +|-or +`-BoolLiteralExpression + `-false +)txt", + R"txt( +BinaryOperatorExpression +|-IntegerLiteralExpression +| `-1 +|-bitand +`-IntegerLiteralExpression + `-2 +)txt", + R"txt( +BinaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-a +|-xor_eq +`-IntegerLiteralExpression + `-3 +)txt"})); } TEST_P(SyntaxTreeTest, BinaryOperator_NestedWithParenthesis) { - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { - (1 + 2) * (4 / 2); + [[(1 + 2) * (4 / 2)]]; } )cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-ParenExpression - | | | |-( - | | | |-BinaryOperatorExpression - | | | | |-IntegerLiteralExpression - | | | | | `-1 - | | | | |-+ - | | | | `-IntegerLiteralExpression - | | | | `-2 - | | | `-) - | | |-* - | | `-ParenExpression - | | |-( - | | |-BinaryOperatorExpression - | | | |-IntegerLiteralExpression - | | | | `-4 - | | | |-/ - | | | `-IntegerLiteralExpression - | | | `-2 - | | `-) - | `-; - `-} -)txt")); + {R"txt( +BinaryOperatorExpression +|-ParenExpression +| |-( +| |-BinaryOperatorExpression +| | |-IntegerLiteralExpression +| | | `-1 +| | |-+ +| | `-IntegerLiteralExpression +| | `-2 +| `-) +|-* +`-ParenExpression + |-( + |-BinaryOperatorExpression + | |-IntegerLiteralExpression + | | `-4 + | |-/ + | `-IntegerLiteralExpression + | `-2 + `-) +)txt"})); } TEST_P(SyntaxTreeTest, BinaryOperator_Associativity) { - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test(int a, int b) { - a + b + 42; - a = b = 42; -} -)cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-int - | | `-SimpleDeclarator - | | `-a - | |-, - | |-SimpleDeclaration - | | |-int - | | `-SimpleDeclarator - | | `-b - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-BinaryOperatorExpression - | | | |-IdExpression - | | | | `-UnqualifiedId - | | | | `-a - | | | |-+ - | | | `-IdExpression - | | | `-UnqualifiedId - | | | `-b - | | |-+ - | | `-IntegerLiteralExpression - | | `-42 - | `-; - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-a - | | |-= - | | `-BinaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-b - | | |-= - | | `-IntegerLiteralExpression - | | `-42 - | `-; - `-} -)txt")); + [[a + b + 42]]; + [[a = b = 42]]; +} +)cpp", + {R"txt( +BinaryOperatorExpression +|-BinaryOperatorExpression +| |-IdExpression +| | `-UnqualifiedId +| | `-a +| |-+ +| `-IdExpression +| `-UnqualifiedId +| `-b +|-+ +`-IntegerLiteralExpression + `-42 +)txt", + R"txt( +BinaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-a +|-= +`-BinaryOperatorExpression + |-IdExpression + | `-UnqualifiedId + | `-b + |-= + `-IntegerLiteralExpression + `-42 +)txt"})); } TEST_P(SyntaxTreeTest, BinaryOperator_Precedence) { - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( void test() { - 1 + 2 * 3 + 4; - 1 % 2 + 3 * 4; -} -)cpp", - R"txt( -*: TranslationUnit -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-BinaryOperatorExpression - | | | |-IntegerLiteralExpression - | | | | `-1 - | | | |-+ - | | | `-BinaryOperatorExpression - | | | |-IntegerLiteralExpression - | | | | `-2 - | | | |-* - | | | `-IntegerLiteralExpression - | | | `-3 - | | |-+ - | | `-IntegerLiteralExpression - | | `-4 - | `-; - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-BinaryOperatorExpression - | | | |-IntegerLiteralExpression - | | | | `-1 - | | | |-% - | | | `-IntegerLiteralExpression - | | | `-2 - | | |-+ - | | `-BinaryOperatorExpression - | | |-IntegerLiteralExpression - | | | `-3 - | | |-* - | | `-IntegerLiteralExpression - | | `-4 - | `-; - `-} -)txt")); + [[1 + 2 * 3 + 4]]; + [[1 % 2 + 3 * 4]]; +} +)cpp", + {R"txt( +BinaryOperatorExpression +|-BinaryOperatorExpression +| |-IntegerLiteralExpression +| | `-1 +| |-+ +| `-BinaryOperatorExpression +| |-IntegerLiteralExpression +| | `-2 +| |-* +| `-IntegerLiteralExpression +| `-3 +|-+ +`-IntegerLiteralExpression + `-4 +)txt", + R"txt( +BinaryOperatorExpression +|-BinaryOperatorExpression +| |-IntegerLiteralExpression +| | `-1 +| |-% +| `-IntegerLiteralExpression +| `-2 +|-+ +`-BinaryOperatorExpression + |-IntegerLiteralExpression + | `-3 + |-* + `-IntegerLiteralExpression + `-4 +)txt"})); } TEST_P(SyntaxTreeTest, OverloadedOperator_Assignment) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { X& operator=(const X&); }; void test(X x, X y) { - x = y; + [[x = y]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-SimpleDeclaration -| | |-X -| | |-SimpleDeclarator -| | | |-& -| | | |-operator -| | | |-= -| | | `-ParametersAndQualifiers -| | | |-( -| | | |-SimpleDeclaration -| | | | |-const -| | | | |-X -| | | | `-SimpleDeclarator -| | | | `-& -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | |-, - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-y - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-x - | | |-= - | | `-IdExpression - | | `-UnqualifiedId - | | `-y - | `-; - `-} -)txt")); + {R"txt( +BinaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-x +|-= +`-IdExpression + `-UnqualifiedId + `-y +)txt"})); } TEST_P(SyntaxTreeTest, OverloadedOperator_Plus) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { friend X operator+(X, const X&); }; void test(X x, X y) { - x + y; + [[x + y]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-UnknownDeclaration -| | `-SimpleDeclaration -| | |-friend -| | |-X -| | |-SimpleDeclarator -| | | |-operator -| | | |-+ -| | | `-ParametersAndQualifiers -| | | |-( -| | | |-SimpleDeclaration -| | | | `-X -| | | |-, -| | | |-SimpleDeclaration -| | | | |-const -| | | | |-X -| | | | `-SimpleDeclarator -| | | | `-& -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | |-, - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-y - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-UnknownExpression - | | | `-IdExpression - | | | `-UnqualifiedId - | | | `-x - | | |-+ - | | `-IdExpression - | | `-UnqualifiedId - | | `-y - | `-; - `-} -)txt")); + {R"txt( +BinaryOperatorExpression +|-UnknownExpression +| `-IdExpression +| `-UnqualifiedId +| `-x +|-+ +`-IdExpression + `-UnqualifiedId + `-y +)txt"})); } TEST_P(SyntaxTreeTest, OverloadedOperator_Less) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { friend bool operator<(const X&, const X&); }; void test(X x, X y) { - x < y; + [[x < y]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-UnknownDeclaration -| | `-SimpleDeclaration -| | |-friend -| | |-bool -| | |-SimpleDeclarator -| | | |-operator -| | | |-< -| | | `-ParametersAndQualifiers -| | | |-( -| | | |-SimpleDeclaration -| | | | |-const -| | | | |-X -| | | | `-SimpleDeclarator -| | | | `-& -| | | |-, -| | | |-SimpleDeclaration -| | | | |-const -| | | | |-X -| | | | `-SimpleDeclarator -| | | | `-& -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | |-, - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-y - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-x - | | |-< - | | `-IdExpression - | | `-UnqualifiedId - | | `-y - | `-; - `-} -)txt")); + {R"txt( +BinaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-x +|-< +`-IdExpression + `-UnqualifiedId + `-y +)txt"})); } TEST_P(SyntaxTreeTest, OverloadedOperator_LeftShift) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { friend X operator<<(X&, const X&); }; void test(X x, X y) { - x << y; + [[x << y]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-UnknownDeclaration -| | `-SimpleDeclaration -| | |-friend -| | |-X -| | |-SimpleDeclarator -| | | |-operator -| | | |-<< -| | | `-ParametersAndQualifiers -| | | |-( -| | | |-SimpleDeclaration -| | | | |-X -| | | | `-SimpleDeclarator -| | | | `-& -| | | |-, -| | | |-SimpleDeclaration -| | | | |-const -| | | | |-X -| | | | `-SimpleDeclarator -| | | | `-& -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | |-, - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-y - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-x - | | |-<< - | | `-IdExpression - | | `-UnqualifiedId - | | `-y - | `-; - `-} -)txt")); + {R"txt( +BinaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-x +|-<< +`-IdExpression + `-UnqualifiedId + `-y +)txt"})); } TEST_P(SyntaxTreeTest, OverloadedOperator_Comma) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { X operator,(X&); }; void test(X x, X y) { - x, y; + [[x, y]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-SimpleDeclaration -| | |-X -| | |-SimpleDeclarator -| | | |-operator -| | | |-, -| | | `-ParametersAndQualifiers -| | | |-( -| | | |-SimpleDeclaration -| | | | |-X -| | | | `-SimpleDeclarator -| | | | `-& -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | |-, - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-y - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-x - | | |-, - | | `-IdExpression - | | `-UnqualifiedId - | | `-y - | `-; - `-} -)txt")); + {R"txt( +BinaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-x +|-, +`-IdExpression + `-UnqualifiedId + `-y +)txt"})); } TEST_P(SyntaxTreeTest, OverloadedOperator_PointerToMember) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { X operator->*(int); }; void test(X* xp, int X::* pmi) { - xp->*pmi; + [[xp->*pmi]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-SimpleDeclaration -| | |-X -| | |-SimpleDeclarator -| | | |-operator -| | | |-->* -| | | `-ParametersAndQualifiers -| | | |-( -| | | |-SimpleDeclaration -| | | | `-int -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | |-* - | | `-xp - | |-, - | |-SimpleDeclaration - | | |-int - | | `-SimpleDeclarator - | | |-MemberPointer - | | | |-X - | | | |-:: - | | | `-* - | | `-pmi - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-xp - | | |-->* - | | `-IdExpression - | | `-UnqualifiedId - | | `-pmi - | `-; - `-} -)txt")); + {R"txt( +BinaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-xp +|-->* +`-IdExpression + `-UnqualifiedId + `-pmi +)txt"})); } TEST_P(SyntaxTreeTest, OverloadedOperator_Negation) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { bool operator!(); }; void test(X x) { - !x; + [[!x]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-SimpleDeclaration -| | |-bool -| | |-SimpleDeclarator -| | | |-operator -| | | |-! -| | | `-ParametersAndQualifiers -| | | |-( -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-! - | | `-IdExpression - | | `-UnqualifiedId - | | `-x - | `-; - `-} -)txt")); + {R"txt( +PrefixUnaryOperatorExpression +|-! +`-IdExpression + `-UnqualifiedId + `-x +)txt"})); } TEST_P(SyntaxTreeTest, OverloadedOperator_AddressOf) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { X* operator&(); }; void test(X x) { - &x; + [[&x]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-SimpleDeclaration -| | |-X -| | |-SimpleDeclarator -| | | |-* -| | | |-operator -| | | |-& -| | | `-ParametersAndQualifiers -| | | |-( -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-& - | | `-IdExpression - | | `-UnqualifiedId - | | `-x - | `-; - `-} -)txt")); + {R"txt( +PrefixUnaryOperatorExpression +|-& +`-IdExpression + `-UnqualifiedId + `-x +)txt"})); } TEST_P(SyntaxTreeTest, OverloadedOperator_PrefixIncrement) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { X operator++(); }; void test(X x) { - ++x; + [[++x]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-SimpleDeclaration -| | |-X -| | |-SimpleDeclarator -| | | |-operator -| | | |-++ -| | | `-ParametersAndQualifiers -| | | |-( -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-++ - | | `-IdExpression - | | `-UnqualifiedId - | | `-x - | `-; - `-} -)txt")); + {R"txt( +PrefixUnaryOperatorExpression +|-++ +`-IdExpression + `-UnqualifiedId + `-x +)txt"})); } TEST_P(SyntaxTreeTest, OverloadedOperator_PostfixIncrement) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { X operator++(int); }; void test(X x) { - x++; + [[x++]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-SimpleDeclaration -| | |-X -| | |-SimpleDeclarator -| | | |-operator -| | | |-++ -| | | `-ParametersAndQualifiers -| | | |-( -| | | |-SimpleDeclaration -| | | | `-int -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-PostfixUnaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-x - | | `-++ - | `-; - `-} -)txt")); + {R"txt( +PostfixUnaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-x +`-++ +)txt"})); } TEST_P(SyntaxTreeTest, MultipleDeclaratorsGrouping) { @@ -3343,6 +2054,33 @@ void foo() { )txt")); } +TEST_P(SyntaxTreeTest, SizeTTypedef) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +typedef decltype(sizeof(void *)) size_t; + )cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-typedef + |-decltype + |-( + |-UnknownExpression + | |-sizeof + | |-( + | |-void + | |-* + | `-) + |-) + |-SimpleDeclarator + | `-size_t + `-; +)txt")); +} + TEST_P(SyntaxTreeTest, Namespaces) { if (!GetParam().isCXX()) { return; @@ -3496,68 +2234,318 @@ struct {} *a1; )txt")); } -TEST_P(SyntaxTreeTest, Templates) { +TEST_P(SyntaxTreeTest, StaticMemberFunction) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +struct S { + static void f(){} +}; +)cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-struct + |-S + |-{ + |-SimpleDeclaration + | |-static + | |-void + | |-SimpleDeclarator + | | |-f + | | `-ParametersAndQualifiers + | | |-( + | | `-) + | `-CompoundStatement + | |-{ + | `-} + |-} + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, ConversionMemberFunction) { if (!GetParam().isCXX()) { return; } - if (GetParam().hasDelayedTemplateParsing()) { - // FIXME: Make this test work on Windows by generating the expected syntax - // tree when `-fdelayed-template-parsing` is active. + EXPECT_TRUE(treeDumpEqual( + R"cpp( +struct X { + operator int(); +}; +)cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-struct + |-X + |-{ + |-SimpleDeclaration + | |-SimpleDeclarator + | | |-operator + | | |-int + | | `-ParametersAndQualifiers + | | |-( + | | `-) + | `-; + |-} + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, LiteralOperatorDeclaration) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +unsigned operator "" _c(char); + )cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-unsigned + |-SimpleDeclarator + | |-operator + | |-"" + | |-_c + | `-ParametersAndQualifiers + | |-( + | |-SimpleDeclaration + | | `-char + | `-) + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, NumericLiteralOperatorTemplateDeclaration) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +template +unsigned operator "" _t(); + )cpp", + R"txt( +*: TranslationUnit +`-TemplateDeclaration + |-template + |-< + |-SimpleDeclaration + | `-char + |-... + |-> + `-SimpleDeclaration + |-unsigned + |-SimpleDeclarator + | |-operator + | |-"" + | |-_t + | `-ParametersAndQualifiers + | |-( + | `-) + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, OverloadedOperatorDeclaration) { + if (!GetParam().isCXX()) { return; } EXPECT_TRUE(treeDumpEqual( R"cpp( -template struct cls {}; -template int var = 10; -template int fun() {} +struct X { + X& operator=(const X&); +}; +)cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-struct + |-X + |-{ + |-SimpleDeclaration + | |-X + | |-SimpleDeclarator + | | |-& + | | |-operator + | | |-= + | | `-ParametersAndQualifiers + | | |-( + | | |-SimpleDeclaration + | | | |-const + | | | |-X + | | | `-SimpleDeclarator + | | | `-& + | | `-) + | `-; + |-} + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, OverloadedOperatorFriendDeclarataion) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +struct X { + friend X operator+(X, const X&); +}; +)cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-struct + |-X + |-{ + |-UnknownDeclaration + | `-SimpleDeclaration + | |-friend + | |-X + | |-SimpleDeclarator + | | |-operator + | | |-+ + | | `-ParametersAndQualifiers + | | |-( + | | |-SimpleDeclaration + | | | `-X + | | |-, + | | |-SimpleDeclaration + | | | |-const + | | | |-X + | | | `-SimpleDeclarator + | | | `-& + | | `-) + | `-; + |-} + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, ClassTemplateDeclaration) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +template +struct ST {}; )cpp", R"txt( *: TranslationUnit -|-TemplateDeclaration -| |-template -| |-< -| |-UnknownDeclaration -| | |-class -| | `-T -| |-> -| `-SimpleDeclaration -| |-struct -| |-cls -| |-{ -| |-} -| `-; -|-TemplateDeclaration -| |-template -| |-< -| |-UnknownDeclaration -| | |-class -| | `-T -| |-> -| `-SimpleDeclaration -| |-int -| |-SimpleDeclarator -| | |-var -| | |-= -| | `-IntegerLiteralExpression -| | `-10 -| `-; `-TemplateDeclaration |-template |-< |-UnknownDeclaration - | |-class + | |-typename + | `-T + |-> + `-SimpleDeclaration + |-struct + |-ST + |-{ + |-} + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, FunctionTemplateDeclaration) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +template +T f(); +)cpp", + R"txt( +*: TranslationUnit +`-TemplateDeclaration + |-template + |-< + |-UnknownDeclaration + | |-typename | `-T |-> `-SimpleDeclaration - |-int + |-T |-SimpleDeclarator - | |-fun + | |-f | `-ParametersAndQualifiers | |-( | `-) - `-CompoundStatement - |-{ - `-} + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, VariableTemplateDeclaration) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +template T var = 10; +)cpp", + R"txt( +*: TranslationUnit +`-TemplateDeclaration + |-template + |-< + |-UnknownDeclaration + | |-class + | `-T + |-> + `-SimpleDeclaration + |-T + |-SimpleDeclarator + | |-var + | |-= + | `-IntegerLiteralExpression + | `-10 + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, StaticMemberFunctionTemplate) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +struct S { + template + static U f(); +}; +)cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-struct + |-S + |-{ + |-TemplateDeclaration + | |-template + | |-< + | |-UnknownDeclaration + | | |-typename + | | `-U + | |-> + | `-SimpleDeclaration + | |-static + | |-U + | |-SimpleDeclarator + | | |-f + | | `-ParametersAndQualifiers + | | |-( + | | `-) + | `-; + |-} + `-; )txt")); } @@ -3606,6 +2594,59 @@ struct X { )txt")); } +TEST_P(SyntaxTreeTest, NestedTemplatesInNamespace) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +namespace n { + template + struct ST { + template + static U f(); + }; +} +)cpp", + R"txt( +*: TranslationUnit +`-NamespaceDefinition + |-namespace + |-n + |-{ + |-TemplateDeclaration + | |-template + | |-< + | |-UnknownDeclaration + | | |-typename + | | `-T + | |-> + | `-SimpleDeclaration + | |-struct + | |-ST + | |-{ + | |-TemplateDeclaration + | | |-template + | | |-< + | | |-UnknownDeclaration + | | | |-typename + | | | `-U + | | |-> + | | `-SimpleDeclaration + | | |-static + | | |-U + | | |-SimpleDeclarator + | | | |-f + | | | `-ParametersAndQualifiers + | | | |-( + | | | `-) + | | `-; + | |-} + | `-; + `-} +)txt")); +} + TEST_P(SyntaxTreeTest, Templates2) { if (!GetParam().isCXX()) { return; diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp index 6d2efeaaa8ebf9..c5dbb770c53879 100644 --- a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -171,7 +171,7 @@ ::testing::AssertionResult SyntaxTreeTest::treeDumpEqual(StringRef Code, << "Source file has syntax errors, they were printed to the test " "log"; } - std::string Actual = std::string(StringRef(Root->dump(*Arena)).trim()); + auto Actual = StringRef(Root->dump(*Arena)).trim().str(); // EXPECT_EQ shows the diff between the two strings if they are different. EXPECT_EQ(Tree.trim().str(), Actual); if (Actual != Tree.trim().str()) { @@ -180,6 +180,43 @@ ::testing::AssertionResult SyntaxTreeTest::treeDumpEqual(StringRef Code, return ::testing::AssertionSuccess(); } +::testing::AssertionResult +SyntaxTreeTest::treeDumpEqualOnAnnotations(StringRef CodeWithAnnotations, + ArrayRef TreeDumps) { + SCOPED_TRACE(llvm::join(GetParam().getCommandLineArgs(), " ")); + + auto AnnotatedCode = llvm::Annotations(CodeWithAnnotations); + auto *Root = buildTree(AnnotatedCode.code(), GetParam()); + + if (Diags->getClient()->getNumErrors() != 0) { + return ::testing::AssertionFailure() + << "Source file has syntax errors, they were printed to the test " + "log"; + } + + auto AnnotatedRanges = AnnotatedCode.ranges(); + if (AnnotatedRanges.size() != TreeDumps.size()) { + return ::testing::AssertionFailure() + << "The number of annotated ranges in the source code is different " + "to the number of their corresponding tree dumps."; + } + bool Failed = false; + for (unsigned i = 0; i < AnnotatedRanges.size(); i++) { + auto *AnnotatedNode = nodeByRange(AnnotatedRanges[i], Root); + assert(AnnotatedNode); + auto AnnotatedNodeDump = + StringRef(AnnotatedNode->dump(*Arena)).trim().str(); + // EXPECT_EQ shows the diff between the two strings if they are different. + EXPECT_EQ(TreeDumps[i].trim().str(), AnnotatedNodeDump) + << "Dumps diverged for the code:\n" + << AnnotatedCode.code().slice(AnnotatedRanges[i].Begin, + AnnotatedRanges[i].End); + if (AnnotatedNodeDump != TreeDumps[i].trim().str()) + Failed = true; + } + return Failed ? ::testing::AssertionFailure() : ::testing::AssertionSuccess(); +} + syntax::Node *SyntaxTreeTest::nodeByRange(llvm::Annotations::Range R, syntax::Node *Root) { ArrayRef Toks = tokens(Root); diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.h b/clang/unittests/Tooling/Syntax/TreeTestBase.h index bfa6ecd7909f8f..c282bbf45fd390 100644 --- a/clang/unittests/Tooling/Syntax/TreeTestBase.h +++ b/clang/unittests/Tooling/Syntax/TreeTestBase.h @@ -34,6 +34,9 @@ class SyntaxTreeTest : public ::testing::Test, ::testing::AssertionResult treeDumpEqual(StringRef Code, StringRef Tree); + ::testing::AssertionResult + treeDumpEqualOnAnnotations(StringRef CodeWithAnnotations, + ArrayRef TreeDumps); /// Finds the deepest node in the tree that covers exactly \p R. /// FIXME: implement this efficiently and move to public syntax tree API. syntax::Node *nodeByRange(llvm::Annotations::Range R, syntax::Node *Root); diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 5f9e868de5fd81..c9d0da2fc08936 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -133,17 +133,16 @@ check_library_exists(pthread pthread_create "" COMPILER_RT_HAS_LIBPTHREAD) check_library_exists(execinfo backtrace "" COMPILER_RT_HAS_LIBEXECINFO) # Look for terminfo library, used in unittests that depend on LLVMSupport. -if(LLVM_ENABLE_TERMINFO) - foreach(library terminfo tinfo curses ncurses ncursesw) - string(TOUPPER ${library} library_suffix) - check_library_exists( - ${library} setupterm "" COMPILER_RT_HAS_TERMINFO_${library_suffix}) - if(COMPILER_RT_HAS_TERMINFO_${library_suffix}) - set(COMPILER_RT_HAS_TERMINFO TRUE) - set(COMPILER_RT_TERMINFO_LIB "${library}") - break() - endif() - endforeach() +if(LLVM_ENABLE_TERMINFO STREQUAL FORCE_ON) + set(MAYBE_REQUIRED REQUIRED) +else() + set(MAYBE_REQUIRED) +endif() +find_library(COMPILER_RT_TERMINFO_LIB NAMES terminfo tinfo curses ncurses ncursesw ${MAYBE_REQUIRED}) +if(COMPILER_RT_TERMINFO_LIB) + set(LLVM_ENABLE_TERMINFO 1) +else() + set(LLVM_ENABLE_TERMINFO 0) endif() if (ANDROID AND COMPILER_RT_HAS_LIBDL) diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp index 29541eac5dc60b..df9ada45bb0391 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp @@ -425,26 +425,26 @@ size_t MutationDispatcher::Mutate_CrossOver(uint8_t *Data, size_t Size, if (!CrossOverWith) return 0; const Unit &O = *CrossOverWith; if (O.empty()) return 0; - MutateInPlaceHere.resize(MaxSize); - auto &U = MutateInPlaceHere; size_t NewSize = 0; switch(Rand(3)) { case 0: - NewSize = CrossOver(Data, Size, O.data(), O.size(), U.data(), U.size()); + MutateInPlaceHere.resize(MaxSize); + NewSize = CrossOver(Data, Size, O.data(), O.size(), + MutateInPlaceHere.data(), MaxSize); + memcpy(Data, MutateInPlaceHere.data(), NewSize); break; case 1: - NewSize = InsertPartOf(O.data(), O.size(), U.data(), U.size(), MaxSize); + NewSize = InsertPartOf(O.data(), O.size(), Data, Size, MaxSize); if (!NewSize) - NewSize = CopyPartOf(O.data(), O.size(), U.data(), U.size()); + NewSize = CopyPartOf(O.data(), O.size(), Data, Size); break; case 2: - NewSize = CopyPartOf(O.data(), O.size(), U.data(), U.size()); + NewSize = CopyPartOf(O.data(), O.size(), Data, Size); break; default: assert(0); } assert(NewSize > 0 && "CrossOver returned empty unit"); assert(NewSize <= MaxSize && "CrossOver returned overisized unit"); - memcpy(Data, U.data(), NewSize); return NewSize; } diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp index 292ea5fbb23931..5dacd3256abc9a 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp @@ -219,9 +219,6 @@ static void invoke_and_release_block(void *param) { DISPATCH_INTERCEPT(dispatch, false) DISPATCH_INTERCEPT(dispatch_barrier, true) -DISPATCH_INTERCEPT_SYNC_F(dispatch_async_and_wait_f, false) -DISPATCH_INTERCEPT_SYNC_B(dispatch_async_and_wait, false) - DECLARE_REAL(void, dispatch_after_f, dispatch_time_t when, dispatch_queue_t queue, void *context, dispatch_function_t work) @@ -749,8 +746,6 @@ void InitializeLibdispatchInterceptors() { INTERCEPT_FUNCTION(dispatch_barrier_async_f); INTERCEPT_FUNCTION(dispatch_barrier_sync); INTERCEPT_FUNCTION(dispatch_barrier_sync_f); - INTERCEPT_FUNCTION(dispatch_async_and_wait); - INTERCEPT_FUNCTION(dispatch_async_and_wait_f); INTERCEPT_FUNCTION(dispatch_after); INTERCEPT_FUNCTION(dispatch_after_f); INTERCEPT_FUNCTION(dispatch_once); diff --git a/compiler-rt/lib/xray/tests/CMakeLists.txt b/compiler-rt/lib/xray/tests/CMakeLists.txt index a1fbccaeb6d268..96a9db1ef87773 100644 --- a/compiler-rt/lib/xray/tests/CMakeLists.txt +++ b/compiler-rt/lib/xray/tests/CMakeLists.txt @@ -55,7 +55,7 @@ set(XRAY_UNITTEST_LINK_FLAGS if (NOT APPLE) # Needed by LLVMSupport. append_list_if( - COMPILER_RT_HAS_TERMINFO + LLVM_ENABLE_TERMINFO -l${COMPILER_RT_TERMINFO_LIB} XRAY_UNITTEST_LINK_FLAGS) if (COMPILER_RT_STANDALONE_BUILD) diff --git a/compiler-rt/test/fuzzer/CrossOverTest.cpp b/compiler-rt/test/fuzzer/CrossOverTest.cpp index a7643570a92b25..b4506f665dc762 100644 --- a/compiler-rt/test/fuzzer/CrossOverTest.cpp +++ b/compiler-rt/test/fuzzer/CrossOverTest.cpp @@ -4,10 +4,11 @@ // Test for a fuzzer. The fuzzer must find the string // ABCDEFGHIJ -// We use it as a test for CrossOver functionality -// by passing two inputs to it: -// ABCDE00000 -// ZZZZZFGHIJ +// We use it as a test for each of CrossOver functionalities +// by passing the following sets of two inputs to it: +// {ABCDE00000, ZZZZZFGHIJ} +// {ABCDEHIJ, ZFG} to specifically test InsertPartOf +// {ABCDE00HIJ, ZFG} to specifically test CopyPartOf // #include #include @@ -42,13 +43,11 @@ static const uint32_t ExpectedHash = 0xe1677acb; extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { // fprintf(stderr, "ExpectedHash: %x\n", ExpectedHash); - if (Size != 10) return 0; + if (Size == 10 && ExpectedHash == simple_hash(Data, Size)) + *NullPtr = 0; if (*Data == 'A') Sink++; if (*Data == 'Z') Sink--; - if (ExpectedHash == simple_hash(Data, Size)) - *NullPtr = 0; return 0; } - diff --git a/compiler-rt/test/fuzzer/cross_over.test b/compiler-rt/test/fuzzer/cross_over.test index 058b5eb2c85cd0..64e06e8cd3667b 100644 --- a/compiler-rt/test/fuzzer/cross_over.test +++ b/compiler-rt/test/fuzzer/cross_over.test @@ -12,7 +12,7 @@ RUN: echo -n ABCDE00000 > %t-corpus/A RUN: echo -n ZZZZZFGHIJ > %t-corpus/B -RUN: not %run %t-CrossOverTest -max_len=10 -seed=1 -runs=10000000 %t-corpus +RUN: not %run %t-CrossOverTest -max_len=10 -reduce_inputs=0 -seed=1 -runs=10000000 %t-corpus # Test the same thing but using -seed_inputs instead of passing the corpus dir. -RUN: not %run %t-CrossOverTest -max_len=10 -seed=1 -runs=10000000 -seed_inputs=%t-corpus/A,%t-corpus/B +RUN: not %run %t-CrossOverTest -max_len=10 -reduce_inputs=0 -seed=1 -runs=10000000 -seed_inputs=%t-corpus/A,%t-corpus/B diff --git a/compiler-rt/test/fuzzer/cross_over_copy.test b/compiler-rt/test/fuzzer/cross_over_copy.test new file mode 100644 index 00000000000000..24b2f9b3b11325 --- /dev/null +++ b/compiler-rt/test/fuzzer/cross_over_copy.test @@ -0,0 +1,20 @@ +# Tests CrossOver CopyPartOf. +# We want to make sure that the test can find the input +# ABCDEFGHIJ when given two other inputs in the seed corpus: +# ABCDE00HIJ and +# (Z) FG +# +RUN: %cpp_compiler %S/CrossOverTest.cpp -o %t-CrossOverTest + +RUN: rm -rf %t-corpus +RUN: mkdir %t-corpus +RUN: echo -n ABCDE00HIJ > %t-corpus/A +RUN: echo -n ZFG > %t-corpus/B + + +RUN: not %run %t-CrossOverTest -mutate_depth=1 -max_len=1024 -reduce_inputs=0 -seed=1 -runs=10000000 %t-corpus 2>&1 | FileCheck %s + +# Test the same thing but using -seed_inputs instead of passing the corpus dir. +RUN: not %run %t-CrossOverTest -mutate_depth=1 -max_len=1024 -reduce_inputs=0 -seed=1 -runs=10000000 -seed_inputs=%t-corpus/A,%t-corpus/B 2>&1 | FileCheck %s + +CHECK: MS: 1 CrossOver- diff --git a/compiler-rt/test/fuzzer/cross_over_insert.test b/compiler-rt/test/fuzzer/cross_over_insert.test new file mode 100644 index 00000000000000..cb7d4fab81ef7e --- /dev/null +++ b/compiler-rt/test/fuzzer/cross_over_insert.test @@ -0,0 +1,20 @@ +# Tests CrossOver InsertPartOf. +# We want to make sure that the test can find the input +# ABCDEFGHIJ when given two other inputs in the seed corpus: +# ABCDE HIJ and +# (Z) FG +# +RUN: %cpp_compiler %S/CrossOverTest.cpp -o %t-CrossOverTest + +RUN: rm -rf %t-corpus +RUN: mkdir %t-corpus +RUN: echo -n ABCDEHIJ > %t-corpus/A +RUN: echo -n ZFG > %t-corpus/B + + +RUN: not %run %t-CrossOverTest -mutate_depth=1 -max_len=1024 -reduce_inputs=0 -seed=1 -runs=10000000 %t-corpus 2>&1 | FileCheck %s + +# Test the same thing but using -seed_inputs instead of passing the corpus dir. +RUN: not %run %t-CrossOverTest -mutate_depth=1 -max_len=1024 -reduce_inputs=0 -seed=1 -runs=10000000 -seed_inputs=%t-corpus/A,%t-corpus/B 2>&1 | FileCheck %s + +CHECK: MS: 1 CrossOver- diff --git a/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test b/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test index 0c7198e3c4e9eb..52b51e6269f532 100644 --- a/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test +++ b/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test @@ -10,6 +10,9 @@ RUN: %run %t.driver %t.target RUN: llvm-cov gcov instrprof-gcov-parallel.target.gcda RUN: FileCheck --input-file instrprof-gcov-parallel.target.c.gcov %s +# Bug 42535 +# XFAIL: sparc-target-arch + # Test if the .gcda file is correctly created from one of child processes # and counters of all processes are recorded correctly. # 707 = CHILDREN * COUNT diff --git a/compiler-rt/test/tsan/libdispatch/async_and_wait.c b/compiler-rt/test/tsan/libdispatch/async_and_wait.c deleted file mode 100644 index 5e63c118aef53d..00000000000000 --- a/compiler-rt/test/tsan/libdispatch/async_and_wait.c +++ /dev/null @@ -1,31 +0,0 @@ -// RUN: %clang_tsan %s -o %t -// RUN: %run %t 2>&1 | FileCheck %s --implicit-check-not='ThreadSanitizer' - -#include "dispatch/dispatch.h" - -#include - -long global; - -int main() { - dispatch_queue_t q = dispatch_queue_create("my.queue", DISPATCH_QUEUE_SERIAL); - dispatch_semaphore_t s = dispatch_semaphore_create(0); - - // Force queue to context switch onto separate thread. - dispatch_async(q, ^{ - dispatch_semaphore_wait(s, DISPATCH_TIME_FOREVER); - }); - dispatch_semaphore_signal(s); - - global++; - dispatch_async_and_wait(q, ^{ - // The queue continues to execute on separate thread. This would cause a - // race if we had used `dispatch_async()` without the `_and_wait` part. - global++; - }); - global++; - - fprintf(stderr, "Done.\n"); -} - -// CHECK: Done. diff --git a/compiler-rt/test/ubsan/TestCases/Float/cast-overflow.cpp b/compiler-rt/test/ubsan/TestCases/Float/cast-overflow.cpp index 479c39f28428ad..1c680259a2471e 100644 --- a/compiler-rt/test/ubsan/TestCases/Float/cast-overflow.cpp +++ b/compiler-rt/test/ubsan/TestCases/Float/cast-overflow.cpp @@ -11,6 +11,9 @@ // FIXME: not %run %t 8 2>&1 | FileCheck %s --check-prefix=CHECK-8 // RUN: not %run %t 9 2>&1 | FileCheck %s --check-prefix=CHECK-9 +// Bug 42535 +// XFAIL: sparc-target-arch + // This test assumes float and double are IEEE-754 single- and double-precision. #if defined(__APPLE__) diff --git a/flang/README.md b/flang/README.md index f7797ed55bd3ed..44573ae4b9b6b0 100644 --- a/flang/README.md +++ b/flang/README.md @@ -8,30 +8,30 @@ F18 was subsequently accepted into the LLVM project and rechristened as Flang. ## Getting Started -Read more about flang in the [documentation directory](documentation). -Start with the [compiler overview](documentation/Overview.md). +Read more about flang in the [docs directory](docs). +Start with the [compiler overview](docs/Overview.md). To better understand Fortran as a language and the specific grammar accepted by flang, -read [Fortran For C Programmers](documentation/FortranForCProgrammers.md) +read [Fortran For C Programmers](docs/FortranForCProgrammers.md) and -flang's specifications of the [Fortran grammar](documentation/f2018-grammar.txt) +flang's specifications of the [Fortran grammar](docs/f2018-grammar.txt) and -the [OpenMP grammar](documentation/OpenMP-4.5-grammar.txt). +the [OpenMP grammar](docs/OpenMP-4.5-grammar.txt). Treatment of language extensions is covered -in [this document](documentation/Extensions.md). +in [this document](docs/Extensions.md). To understand the compilers handling of intrinsics, -see the [discussion of intrinsics](documentation/Intrinsics.md). +see the [discussion of intrinsics](docs/Intrinsics.md). To understand how a flang program communicates with libraries at runtime, -see the discussion of [runtime descriptors](documentation/RuntimeDescriptor.md). +see the discussion of [runtime descriptors](docs/RuntimeDescriptor.md). If you're interested in contributing to the compiler, -read the [style guide](documentation/C++style.md) +read the [style guide](docs/C++style.md) and -also review [how flang uses modern C++ features](documentation/C++17.md). +also review [how flang uses modern C++ features](docs/C++17.md). ## Supported C++ compilers diff --git a/flang/documentation/ArrayComposition.md b/flang/docs/ArrayComposition.md similarity index 99% rename from flang/documentation/ArrayComposition.md rename to flang/docs/ArrayComposition.md index 099909c5ef0d04..0f30af39f9e4bb 100644 --- a/flang/documentation/ArrayComposition.md +++ b/flang/docs/ArrayComposition.md @@ -1,4 +1,4 @@ - select Cond, AbsT, AbsF + if (isa(TVal) && isa(FVal)) { + CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal}); + CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal}); + return SelectInst::Create(Cond, AbsT, AbsF); + } + // fabs (select Cond, -FVal, FVal) --> fabs FVal + if (match(TVal, m_FNeg(m_Specific(FVal)))) + return replaceOperand(*II, 0, FVal); + // fabs (select Cond, TVal, -TVal) --> fabs TVal + if (match(FVal, m_FNeg(m_Specific(TVal)))) + return replaceOperand(*II, 0, TVal); } LLVM_FALLTHROUGH; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 45c6d804a57b65..a4b38e8082224b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -931,17 +931,24 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( // Arbitrary predecessor count limit. static const int PredCountLimit = 64; - // Don't bother if there are too many predecessors. - if (UseBB->hasNPredecessorsOrMore(PredCountLimit + 1)) - return nullptr; + + // Cache the (non-uniqified!) list of predecessors in a vector, + // checking the limit at the same time for efficiency. + SmallVector Preds; // May have duplicates! + for (BasicBlock *Pred : predecessors(UseBB)) { + // Don't bother if there are too many predecessors. + if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once? + return nullptr; + Preds.emplace_back(Pred); + } // For each predecessor, what is the source aggregate, // from which all the elements were originally extracted from? // Note that we want for the map to have stable iteration order! - SmallMapVector SourceAggregates; - for (BasicBlock *PredBB : predecessors(UseBB)) { + SmallDenseMap SourceAggregates; + for (BasicBlock *Pred : Preds) { std::pair IV = - SourceAggregates.insert({PredBB, nullptr}); + SourceAggregates.insert({Pred, nullptr}); // Did we already evaluate this predecessor? if (!IV.second) continue; @@ -949,7 +956,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( // Let's hope that when coming from predecessor Pred, all elements of the // aggregate produced by OrigIVI must have been originally extracted from // the same aggregate. Is that so? Can we find said original aggregate? - SourceAggregate = FindCommonSourceAggregate(UseBB, PredBB); + SourceAggregate = FindCommonSourceAggregate(UseBB, Pred); if (Describe(SourceAggregate) != AggregateDescription::Found) return nullptr; // Give up. IV.first->second = *SourceAggregate; @@ -958,13 +965,14 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( // All good! Now we just need to thread the source aggregates here. // Note that we have to insert the new PHI here, ourselves, because we can't // rely on InstCombinerImpl::run() inserting it into the right basic block. + // Note that the same block can be a predecessor more than once, + // and we need to preserve that invariant for the PHI node. BuilderTy::InsertPointGuard Guard(Builder); Builder.SetInsertPoint(UseBB->getFirstNonPHI()); - auto *PHI = Builder.CreatePHI(AggTy, SourceAggregates.size(), - OrigIVI.getName() + ".merged"); - for (const std::pair &SourceAggregate : - SourceAggregates) - PHI->addIncoming(SourceAggregate.second, SourceAggregate.first); + auto *PHI = + Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged"); + for (BasicBlock *Pred : Preds) + PHI->addIncoming(SourceAggregates[Pred], Pred); ++NumAggregateReconstructionsSimplified; OrigIVI.replaceAllUsesWith(PHI); diff --git a/llvm/test/Analysis/StackSafetyAnalysis/lifetime.ll b/llvm/test/Analysis/StackSafetyAnalysis/lifetime.ll index 470450a3a977de..d29029530b6cf7 100644 --- a/llvm/test/Analysis/StackSafetyAnalysis/lifetime.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/lifetime.ll @@ -699,6 +699,50 @@ l2: ; preds = %l2, %entry br label %l2 } +%struct.char_array = type { [500 x i8] } + +define dso_local void @gep_test(i32 %cond) { +; CHECK-LABEL: define dso_local void @gep_test +entry: +; CHECK: entry: +; CHECK-NEXT: Alive: <> + %a = alloca %struct.char_array, align 8 + %b = alloca %struct.char_array, align 8 + %tobool.not = icmp eq i32 %cond, 0 + br i1 %tobool.not, label %if.else, label %if.then + +if.then: ; preds = %entry +; CHECK: if.then: +; CHECK-NEXT: Alive: <> + %0 = getelementptr inbounds %struct.char_array, %struct.char_array* %a, i64 0, i32 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %0) +; CHECK: call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %0) +; CHECK-NEXT: Alive: + tail call void @capture8(i8* %0) + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %0) +; CHECK: call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %0) +; CHECK-NEXT: Alive: <> + br label %if.end + +if.else: ; preds = %entry +; CHECK: if.else: +; CHECK-NEXT: Alive: <> + %1 = getelementptr inbounds %struct.char_array, %struct.char_array* %b, i64 0, i32 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %1) +; CHECK: call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %1) +; CHECK-NEXT: Alive: + tail call void @capture8(i8* %1) + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %1) +; CHECK: call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %1) +; CHECK-NEXT: Alive: <> + br label %if.end + +if.end: ; preds = %if.else, %if.then +; CHECK: if.end: +; CHECK-NEXT: Alive: <> + ret void +} + define void @if_must(i1 %a) { ; CHECK-LABEL: define void @if_must entry: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll new file mode 100644 index 00000000000000..a9a93d1b7f7a93 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc < %s -mtriple arm64-apple-darwin -global-isel -stop-after=irtranslator -verify-machineinstrs | FileCheck %s + +; Check that we don't try to tail-call with a non-forwarded sret parameter. +declare void @test_explicit_sret(i64* sret) + +; Forwarded explicit sret pointer => we can tail call. +define void @can_tail_call_forwarded_explicit_sret_ptr(i64* sret %arg) { + ; CHECK-LABEL: name: can_tail_call_forwarded_explicit_sret_ptr + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x8 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x8 + ; CHECK: $x8 = COPY [[COPY]](p0) + ; CHECK: TCRETURNdi @test_explicit_sret, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x8 + tail call void @test_explicit_sret(i64* %arg) + ret void +} + +; Not marked as tail, so don't tail call. +define void @test_call_explicit_sret(i64* sret %arg) { + ; CHECK-LABEL: name: test_call_explicit_sret + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x8 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x8 + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x8 = COPY [[COPY]](p0) + ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR + call void @test_explicit_sret(i64* %arg) + ret void +} + +define void @dont_tail_call_explicit_sret_alloca_unused() { + ; CHECK-LABEL: name: dont_tail_call_explicit_sret_alloca_unused + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.l + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x8 = COPY [[FRAME_INDEX]](p0) + ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR + %l = alloca i64, align 8 + tail call void @test_explicit_sret(i64* %l) + ret void +} + +define void @dont_tail_call_explicit_sret_alloca_dummyusers(i64* %ptr) { + ; CHECK-LABEL: name: dont_tail_call_explicit_sret_alloca_dummyusers + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.l + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.ptr) + ; CHECK: G_STORE [[LOAD]](s64), [[FRAME_INDEX]](p0) :: (store 8 into %ir.l) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x8 = COPY [[FRAME_INDEX]](p0) + ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR + %l = alloca i64, align 8 + %r = load i64, i64* %ptr, align 8 + store i64 %r, i64* %l, align 8 + tail call void @test_explicit_sret(i64* %l) + ret void +} + +define void @dont_tail_call_tailcall_explicit_sret_gep(i64* %ptr) { + ; CHECK-LABEL: name: dont_tail_call_tailcall_explicit_sret_gep + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x8 = COPY [[PTR_ADD]](p0) + ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR + %ptr2 = getelementptr i64, i64* %ptr, i32 1 + tail call void @test_explicit_sret(i64* %ptr2) + ret void +} + +define i64 @dont_tail_call_sret_alloca_returned() { + ; CHECK-LABEL: name: dont_tail_call_sret_alloca_returned + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.l + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x8 = COPY [[FRAME_INDEX]](p0) + ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.l) + ; CHECK: $x0 = COPY [[LOAD]](s64) + ; CHECK: RET_ReallyLR implicit $x0 + %l = alloca i64, align 8 + tail call void @test_explicit_sret(i64* %l) + %r = load i64, i64* %l, align 8 + ret i64 %r +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll index ad38b2bb8b9c23..7eb21c21b86cc5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -151,6 +151,42 @@ define void @test_abi_exts_call(i8* %addr) { ret void } +; CHECK-LABEL: name: test_zext_in_callee +; CHECK: bb.1 (%ir-block.0): +; CHECK: liveins: $x0 +; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.addr) +; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp +; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) +; CHECK: $w0 = COPY [[ZEXT]](s32) +; CHECK: BL @has_zext_param, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0 +; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp +; CHECK: RET_ReallyLR +declare void @has_zext_param(i8 zeroext) +define void @test_zext_in_callee(i8* %addr) { + %val = load i8, i8* %addr + call void @has_zext_param(i8 %val) + ret void +} + +; CHECK-LABEL: name: test_sext_in_callee +; CHECK: bb.1 (%ir-block.0): +; CHECK: liveins: $x0 +; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.addr) +; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp +; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) +; CHECK: $w0 = COPY [[SEXT]](s32) +; CHECK: BL @has_sext_param, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0 +; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp +; CHECK: RET_ReallyLR +declare void @has_sext_param(i8 signext) +define void @test_sext_in_callee(i8* %addr) { + %val = load i8, i8* %addr + call void @has_sext_param(i8 %val) + ret void +} + ; CHECK-LABEL: name: test_abi_sext_ret ; CHECK: [[VAL:%[0-9]+]]:_(s8) = G_LOAD ; CHECK: [[SVAL:%[0-9]+]]:_(s32) = G_SEXT [[VAL]](s8) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ashr-shl-to-sext-inreg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ashr-shl-to-sext-inreg.mir new file mode 100644 index 00000000000000..14bda863d2c289 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ashr-shl-to-sext-inreg.mir @@ -0,0 +1,90 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +--- +name: ashr_shl_to_sext_inreg +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: ashr_shl_to_sext_inreg + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s16) = G_SEXT_INREG [[TRUNC]], 8 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXT_INREG]](s16) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %1(s32) + %2:_(s16) = G_CONSTANT i16 8 + %3:_(s16) = G_SHL %0, %2(s16) + %4:_(s16) = exact G_ASHR %3, %2(s16) + %5:_(s32) = G_ANYEXT %4(s16) + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: different_shift_amts +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: different_shift_amts + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 12 + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; CHECK: [[ASHR:%[0-9]+]]:_(s16) = exact G_ASHR [[SHL]], [[C1]](s16) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %1(s32) + %2:_(s16) = G_CONSTANT i16 12 + %4:_(s16) = G_CONSTANT i16 8 + %3:_(s16) = G_SHL %0, %2(s16) + %5:_(s16) = exact G_ASHR %3, %4(s16) + %6:_(s32) = G_ANYEXT %5(s16) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: ashr_shl_to_sext_inreg_vector +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +body: | + bb.1: + liveins: $d0 + ; Currently don't support this for vectors just yet, this will need updating + ; when we do. + ; CHECK-LABEL: name: ashr_shl_to_sext_inreg_vector + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16) + ; CHECK: [[SHL:%[0-9]+]]:_(<4 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<4 x s16>) + ; CHECK: [[ASHR:%[0-9]+]]:_(<4 x s16>) = exact G_ASHR [[SHL]], [[BUILD_VECTOR]](<4 x s16>) + ; CHECK: $d0 = COPY [[ASHR]](<4 x s16>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<4 x s16>) = COPY $d0 + %2:_(s16) = G_CONSTANT i16 8 + %1:_(<4 x s16>) = G_BUILD_VECTOR %2(s16), %2(s16), %2(s16), %2(s16) + %3:_(<4 x s16>) = G_SHL %0, %1(<4 x s16>) + %4:_(<4 x s16>) = exact G_ASHR %3, %1(<4 x s16>) + $d0 = COPY %4(<4 x s16>) + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir new file mode 100644 index 00000000000000..a216c5b74b3561 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir @@ -0,0 +1,103 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +--- +name: sextload_from_inreg +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: sextload_from_inreg + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s16) = G_SEXTLOAD [[COPY]](p0) :: (load 1, align 2) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXTLOAD]](s16) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s16) = G_LOAD %0(p0) :: (load 2) + %2:_(s16) = G_SEXT_INREG %1, 8 + %3:_(s32) = G_ANYEXT %2(s16) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: non_pow_2_inreg +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: non_pow_2_inreg + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 24 + ; CHECK: $w0 = COPY [[SEXT_INREG]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s32) = G_LOAD %0(p0) :: (load 4) + %2:_(s32) = G_SEXT_INREG %1, 24 + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: atomic +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: atomic + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load acquire 2) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s16) = G_SEXT_INREG [[LOAD]], 8 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXT_INREG]](s16) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s16) = G_LOAD %0(p0) :: (load acquire 2) + %2:_(s16) = G_SEXT_INREG %1, 8 + %3:_(s32) = G_ANYEXT %2(s16) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: volatile +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: volatile + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (volatile load 2) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s16) = G_SEXT_INREG [[LOAD]], 8 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXT_INREG]](s16) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s16) = G_LOAD %0(p0) :: (volatile load 2) + %2:_(s16) = G_SEXT_INREG %1, 8 + %3:_(s32) = G_ANYEXT %2(s16) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll index 4a3e5b04681476..a4a1747b05af9b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll @@ -513,3 +513,67 @@ a: %error = load %swift_error*, %swift_error** %error_ptr ret %swift_error* %error } + +; foo takes a swifterror parameter. We should be able to see that even when +; it isn't explicitly on the call. +define float @swifterror_param_not_on_call(i8* %error_ref) { +; CHECK-LABEL: swifterror_param_not_on_call: +; CHECK: mov [[ID:x[0-9]+]], x0 +; CHECK: bl {{.*}}foo +; CHECK: mov x0, x21 +; CHECK: cbnz x21 +; Access part of the error object and save it to error_ref +; CHECK: ldrb [[CODE:w[0-9]+]], [x0, #8] +; CHECK: strb [[CODE]], [{{.*}}[[ID]]] +; CHECK: bl {{.*}}free + +entry: + %error_ptr_ref = alloca swifterror %swift_error* + store %swift_error* null, %swift_error** %error_ptr_ref + %call = call float @foo(%swift_error** %error_ptr_ref) + %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref + %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null + %tmp = bitcast %swift_error* %error_from_foo to i8* + br i1 %had_error_from_foo, label %handler, label %cont +cont: + %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1 + %t = load i8, i8* %v1 + store i8 %t, i8* %error_ref + br label %handler +handler: + call void @free(i8* %tmp) + ret float 1.0 +} + +; foo_sret takes an sret parameter and a swifterror parameter. We should be +; able to see that, even if it's not explicitly on the call. +define float @swifterror_param_not_on_call2(i8* %error_ref) { +; CHECK-LABEL: swifterror_param_not_on_call2: +; CHECK: mov [[ID:x[0-9]+]], x0 +; CHECK: mov [[ZERO:x[0-9]+]], xzr +; CHECK: bl {{.*}}foo_sret +; CHECK: mov x0, x21 +; CHECK: cbnz x21 +; Access part of the error object and save it to error_ref +; CHECK: ldrb [[CODE:w[0-9]+]], [x0, #8] +; CHECK: strb [[CODE]], [{{.*}}[[ID]]] +; CHECK: bl {{.*}}free + +entry: + %s = alloca %struct.S, align 8 + %error_ptr_ref = alloca swifterror %swift_error* + store %swift_error* null, %swift_error** %error_ptr_ref + call void @foo_sret(%struct.S* %s, i32 1, %swift_error** %error_ptr_ref) + %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref + %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null + %tmp = bitcast %swift_error* %error_from_foo to i8* + br i1 %had_error_from_foo, label %handler, label %cont +cont: + %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1 + %t = load i8, i8* %v1 + store i8 %t, i8* %error_ref + br label %handler +handler: + call void @free(i8* %tmp) + ret float 1.0 +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/swiftself.ll b/llvm/test/CodeGen/AArch64/GlobalISel/swiftself.ll index 8ed06f23383c4f..0f090d488cf109 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/swiftself.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/swiftself.ll @@ -60,3 +60,14 @@ entry: store i8* %3, i8** %0, align 8 ret void } + +; Check that x20 is used to pass a swiftself argument when the parameter is +; only in the declaration's arguments. +; CHECK-LABEL: _swiftself_not_on_call_params: +; CHECK: mov x20, x0 +; CHECK: bl {{_?}}swiftself_param +; CHECK: ret +define i8 *@swiftself_not_on_call_params(i8* %arg) { + %res = call i8 *@swiftself_param(i8* %arg) + ret i8 *%res +} diff --git a/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll b/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll index e821e8504d962c..b0ed3d0490cc04 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll @@ -213,3 +213,28 @@ entry: store i32 %add, i32* %arrayidx1, align 4 ret void } + +; FIXME - The SU(4) and SU(7) can be clustered even with +; different preds +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: cluster_with_different_preds:%bb.0 +; CHECK-NOT:Cluster ld/st SU(4) - SU(7) +; CHECK:SU(3): STRWui %2:gpr32, %0:gpr64common, 0 :: +; CHECK:SU(4): %3:gpr32 = LDRWui %1:gpr64common, 0 :: +; CHECK:Predecessors: +; CHECK: SU(3): Ord Latency=1 Memory +; CHECK:SU(6): STRBBui %4:gpr32, %1:gpr64common, 4 :: +; CHECK:SU(7): %5:gpr32 = LDRWui %1:gpr64common, 1 :: +; CHECK:Predecessors: +; CHECK:SU(6): Ord Latency=1 Memory +define i32 @cluster_with_different_preds(i32* %p, i32* %q) { +entry: + store i32 3, i32* %p, align 4 + %0 = load i32, i32* %q, align 4 + %add.ptr = getelementptr inbounds i32, i32* %q, i64 1 + %1 = bitcast i32* %add.ptr to i8* + store i8 5, i8* %1, align 1 + %2 = load i32, i32* %add.ptr, align 4 + %add = add nsw i32 %2, %0 + ret i32 %add +} diff --git a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll index d43dcda36231aa..bcff9e056f18c9 100644 --- a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll +++ b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll @@ -131,8 +131,7 @@ define @srem_i8( %a, %b) ; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h ; CHECK-NEXT: uzp1 z2.b, z3.b, z2.b ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mul z1.b, p0/m, z1.b, z2.b -; CHECK-NEXT: sub z0.b, z0.b, z1.b +; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: ret %div = srem %a, %b ret %div @@ -151,8 +150,7 @@ define @srem_i16( %a, %b ; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z4.s ; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mul z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: ret %div = srem %a, %b ret %div @@ -164,8 +162,7 @@ define @srem_i32( %a, %b ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: mul z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s ; CHECK-NEXT: ret %div = srem %a, %b ret %div @@ -177,8 +174,7 @@ define @srem_i64( %a, %b ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: mul z1.d, p0/m, z1.d, z2.d -; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d ; CHECK-NEXT: ret %div = srem %a, %b ret %div @@ -315,8 +311,7 @@ define @urem_i8( %a, %b) ; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h ; CHECK-NEXT: uzp1 z2.b, z3.b, z2.b ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mul z1.b, p0/m, z1.b, z2.b -; CHECK-NEXT: sub z0.b, z0.b, z1.b +; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: ret %div = urem %a, %b ret %div @@ -335,8 +330,7 @@ define @urem_i16( %a, %b ; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z4.s ; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mul z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: ret %div = urem %a, %b ret %div @@ -348,8 +342,7 @@ define @urem_i32( %a, %b ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: mul z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s ; CHECK-NEXT: ret %div = urem %a, %b ret %div @@ -361,8 +354,7 @@ define @urem_i64( %a, %b ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: mul z1.d, p0/m, z1.d, z2.d -; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d ; CHECK-NEXT: ret %div = urem %a, %b ret %div diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll new file mode 100644 index 00000000000000..a4d8f281705df1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll @@ -0,0 +1,654 @@ +; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -check-prefix=NO_SVE +; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256 +; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK +; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048 + +target triple = "aarch64-unknown-linux-gnu" + +; Don't use SVE when its registers are no bigger than NEON. +; NO_SVE-NOT: ptrue + +; +; DUP (integer) +; + +; Don't use SVE for 64-bit vectors. +define <8 x i8> @splat_v8i8(i8 %a) #0 { +; CHECK-LABEL: splat_v8i8: +; CHECK: dup v0.8b, w0 +; CHECK-NEXT: ret + %insert = insertelement <8 x i8> undef, i8 %a, i64 0 + %splat = shufflevector <8 x i8> %insert, <8 x i8> undef, <8 x i32> zeroinitializer + ret <8 x i8> %splat +} + +; Don't use SVE for 128-bit vectors. +define <16 x i8> @splat_v16i8(i8 %a) #0 { +; CHECK-LABEL: splat_v16i8: +; CHECK: dup v0.16b, w0 +; CHECK-NEXT: ret + %insert = insertelement <16 x i8> undef, i8 %a, i64 0 + %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %splat +} + +define void @splat_v32i8(i8 %a, <32 x i8>* %b) #0 { +; CHECK-LABEL: splat_v32i8: +; CHECK-DAG: mov [[RES:z[0-9]+]].b, w0 +; CHECK-DAG: ptrue [[PG:p[0-9]+]].b, vl32 +; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x1] +; CHECK-NEXT: ret + %insert = insertelement <32 x i8> undef, i8 %a, i64 0 + %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer + store <32 x i8> %splat, <32 x i8>* %b + ret void +} + +define void @splat_v64i8(i8 %a, <64 x i8>* %b) #0 { +; CHECK-LABEL: splat_v64i8: +; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].b, w0 +; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].b, vl64 +; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x1] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].b, w0 +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 +; VBITS_EQ_256-DAG: mov w[[OFFSET_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1] +; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1, x[[OFFSET_HI]] +; VBITS_EQ_256-NEXT: ret + %insert = insertelement <64 x i8> undef, i8 %a, i64 0 + %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer + store <64 x i8> %splat, <64 x i8>* %b + ret void +} + +define void @splat_v128i8(i8 %a, <128 x i8>* %b) #0 { +; CHECK-LABEL: splat_v128i8: +; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].b, w0 +; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].b, vl128 +; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x1] +; VBITS_GE_1024-NEXT: ret + %insert = insertelement <128 x i8> undef, i8 %a, i64 0 + %splat = shufflevector <128 x i8> %insert, <128 x i8> undef, <128 x i32> zeroinitializer + store <128 x i8> %splat, <128 x i8>* %b + ret void +} + +define void @splat_v256i8(i8 %a, <256 x i8>* %b) #0 { +; CHECK-LABEL: splat_v256i8: +; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].b, w0 +; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].b, vl256 +; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x1] +; VBITS_GE_2048-NEXT: ret + %insert = insertelement <256 x i8> undef, i8 %a, i64 0 + %splat = shufflevector <256 x i8> %insert, <256 x i8> undef, <256 x i32> zeroinitializer + store <256 x i8> %splat, <256 x i8>* %b + ret void +} + +; Don't use SVE for 64-bit vectors. +define <4 x i16> @splat_v4i16(i16 %a) #0 { +; CHECK-LABEL: splat_v4i16: +; CHECK: dup v0.4h, w0 +; CHECK-NEXT: ret + %insert = insertelement <4 x i16> undef, i16 %a, i64 0 + %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer + ret <4 x i16> %splat +} + +; Don't use SVE for 128-bit vectors. +define <8 x i16> @splat_v8i16(i16 %a) #0 { +; CHECK-LABEL: splat_v8i16: +; CHECK: dup v0.8h, w0 +; CHECK-NEXT: ret + %insert = insertelement <8 x i16> undef, i16 %a, i64 0 + %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %splat +} + +define void @splat_v16i16(i16 %a, <16 x i16>* %b) #0 { +; CHECK-LABEL: splat_v16i16: +; CHECK-DAG: mov [[RES:z[0-9]+]].h, w0 +; CHECK-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x1] +; CHECK-NEXT: ret + %insert = insertelement <16 x i16> undef, i16 %a, i64 0 + %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer + store <16 x i16> %splat, <16 x i16>* %b + ret void +} + +define void @splat_v32i16(i16 %a, <32 x i16>* %b) #0 { +; CHECK-LABEL: splat_v32i16: +; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, w0 +; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x1] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].h, w0 +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x1] +; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x[[B_HI]] +; VBITS_EQ_256-NEXT: ret + %insert = insertelement <32 x i16> undef, i16 %a, i64 0 + %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer + store <32 x i16> %splat, <32 x i16>* %b + ret void +} + +define void @splat_v64i16(i16 %a, <64 x i16>* %b) #0 { +; CHECK-LABEL: splat_v64i16: +; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].h, w0 +; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x1] +; VBITS_GE_1024-NEXT: ret + %insert = insertelement <64 x i16> undef, i16 %a, i64 0 + %splat = shufflevector <64 x i16> %insert, <64 x i16> undef, <64 x i32> zeroinitializer + store <64 x i16> %splat, <64 x i16>* %b + ret void +} + +define void @splat_v128i16(i16 %a, <128 x i16>* %b) #0 { +; CHECK-LABEL: splat_v128i16: +; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].h, w0 +; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x1] +; VBITS_GE_2048-NEXT: ret + %insert = insertelement <128 x i16> undef, i16 %a, i64 0 + %splat = shufflevector <128 x i16> %insert, <128 x i16> undef, <128 x i32> zeroinitializer + store <128 x i16> %splat, <128 x i16>* %b + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x i32> @splat_v2i32(i32 %a) #0 { +; CHECK-LABEL: splat_v2i32: +; CHECK: dup v0.2s, w0 +; CHECK-NEXT: ret + %insert = insertelement <2 x i32> undef, i32 %a, i64 0 + %splat = shufflevector <2 x i32> %insert, <2 x i32> undef, <2 x i32> zeroinitializer + ret <2 x i32> %splat +} + +; Don't use SVE for 128-bit vectors. +define <4 x i32> @splat_v4i32(i32 %a) #0 { +; CHECK-LABEL: splat_v4i32: +; CHECK: dup v0.4s, w0 +; CHECK-NEXT: ret + %insert = insertelement <4 x i32> undef, i32 %a, i64 0 + %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat +} + +define void @splat_v8i32(i32 %a, <8 x i32>* %b) #0 { +; CHECK-LABEL: splat_v8i32: +; CHECK-DAG: mov [[RES:z[0-9]+]].s, w0 +; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x1] +; CHECK-NEXT: ret + %insert = insertelement <8 x i32> undef, i32 %a, i64 0 + %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer + store <8 x i32> %splat, <8 x i32>* %b + ret void +} + +define void @splat_v16i32(i32 %a, <16 x i32>* %b) #0 { +; CHECK-LABEL: splat_v16i32: +; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, w0 +; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x1] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].s, w0 +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x1] +; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x[[B_HI]] +; VBITS_EQ_256-NEXT: ret + %insert = insertelement <16 x i32> undef, i32 %a, i64 0 + %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer + store <16 x i32> %splat, <16 x i32>* %b + ret void +} + +define void @splat_v32i32(i32 %a, <32 x i32>* %b) #0 { +; CHECK-LABEL: splat_v32i32: +; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].s, w0 +; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x1] +; VBITS_GE_1024-NEXT: ret + %insert = insertelement <32 x i32> undef, i32 %a, i64 0 + %splat = shufflevector <32 x i32> %insert, <32 x i32> undef, <32 x i32> zeroinitializer + store <32 x i32> %splat, <32 x i32>* %b + ret void +} + +define void @splat_v64i32(i32 %a, <64 x i32>* %b) #0 { +; CHECK-LABEL: splat_v64i32: +; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].s, w0 +; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x1] +; VBITS_GE_2048-NEXT: ret + %insert = insertelement <64 x i32> undef, i32 %a, i64 0 + %splat = shufflevector <64 x i32> %insert, <64 x i32> undef, <64 x i32> zeroinitializer + store <64 x i32> %splat, <64 x i32>* %b + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x i64> @splat_v1i64(i64 %a) #0 { +; CHECK-LABEL: splat_v1i64: +; CHECK: fmov d0, x0 +; CHECK-NEXT: ret + %insert = insertelement <1 x i64> undef, i64 %a, i64 0 + %splat = shufflevector <1 x i64> %insert, <1 x i64> undef, <1 x i32> zeroinitializer + ret <1 x i64> %splat +} + +; Don't use SVE for 128-bit vectors. +define <2 x i64> @splat_v2i64(i64 %a) #0 { +; CHECK-LABEL: splat_v2i64: +; CHECK: dup v0.2d, x0 +; CHECK-NEXT: ret + %insert = insertelement <2 x i64> undef, i64 %a, i64 0 + %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat +} + +define void @splat_v4i64(i64 %a, <4 x i64>* %b) #0 { +; CHECK-LABEL: splat_v4i64: +; CHECK-DAG: mov [[RES:z[0-9]+]].d, x0 +; CHECK-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1] +; CHECK-NEXT: ret + %insert = insertelement <4 x i64> undef, i64 %a, i64 0 + %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer + store <4 x i64> %splat, <4 x i64>* %b + ret void +} + +define void @splat_v8i64(i64 %a, <8 x i64>* %b) #0 { +; CHECK-LABEL: splat_v8i64: +; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, x0 +; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x1] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].d, x0 +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x1] +; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x[[B_HI]] +; VBITS_EQ_256-NEXT: ret + %insert = insertelement <8 x i64> undef, i64 %a, i64 0 + %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer + store <8 x i64> %splat, <8 x i64>* %b + ret void +} + +define void @splat_v16i64(i64 %a, <16 x i64>* %b) #0 { +; CHECK-LABEL: splat_v16i64: +; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].d, x0 +; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x1] +; VBITS_GE_1024-NEXT: ret + %insert = insertelement <16 x i64> undef, i64 %a, i64 0 + %splat = shufflevector <16 x i64> %insert, <16 x i64> undef, <16 x i32> zeroinitializer + store <16 x i64> %splat, <16 x i64>* %b + ret void +} + +define void @splat_v32i64(i64 %a, <32 x i64>* %b) #0 { +; CHECK-LABEL: splat_v32i64: +; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].d, x0 +; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x1] +; VBITS_GE_2048-NEXT: ret + %insert = insertelement <32 x i64> undef, i64 %a, i64 0 + %splat = shufflevector <32 x i64> %insert, <32 x i64> undef, <32 x i32> zeroinitializer + store <32 x i64> %splat, <32 x i64>* %b + ret void +} + +; +; DUP (floating-point) +; + +; Don't use SVE for 64-bit vectors. +define <4 x half> @splat_v4f16(half %a) #0 { +; CHECK-LABEL: splat_v4f16: +; CHECK: dup v0.4h, v0.h[0] +; CHECK-NEXT: ret + %insert = insertelement <4 x half> undef, half %a, i64 0 + %splat = shufflevector <4 x half> %insert, <4 x half> undef, <4 x i32> zeroinitializer + ret <4 x half> %splat +} + +; Don't use SVE for 128-bit vectors. +define <8 x half> @splat_v8f16(half %a) #0 { +; CHECK-LABEL: splat_v8f16: +; CHECK: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %insert = insertelement <8 x half> undef, half %a, i64 0 + %splat = shufflevector <8 x half> %insert, <8 x half> undef, <8 x i32> zeroinitializer + ret <8 x half> %splat +} + +define void @splat_v16f16(half %a, <16 x half>* %b) #0 { +; CHECK-LABEL: splat_v16f16: +; CHECK-DAG: mov [[RES:z[0-9]+]].h, h0 +; CHECK-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK-NEXT: ret + %insert = insertelement <16 x half> undef, half %a, i64 0 + %splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer + store <16 x half> %splat, <16 x half>* %b + ret void +} + +define void @splat_v32f16(half %a, <32 x half>* %b) #0 { +; CHECK-LABEL: splat_v32f16: +; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, h0 +; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].h, h0 +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x[[B_HI]] +; VBITS_EQ_256-NEXT: ret + %insert = insertelement <32 x half> undef, half %a, i64 0 + %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer + store <32 x half> %splat, <32 x half>* %b + ret void +} + +define void @splat_v64f16(half %a, <64 x half>* %b) #0 { +; CHECK-LABEL: splat_v64f16: +; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].h, h0 +; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %insert = insertelement <64 x half> undef, half %a, i64 0 + %splat = shufflevector <64 x half> %insert, <64 x half> undef, <64 x i32> zeroinitializer + store <64 x half> %splat, <64 x half>* %b + ret void +} + +define void @splat_v128f16(half %a, <128 x half>* %b) #0 { +; CHECK-LABEL: splat_v128f16: +; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].h, h0 +; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %insert = insertelement <128 x half> undef, half %a, i64 0 + %splat = shufflevector <128 x half> %insert, <128 x half> undef, <128 x i32> zeroinitializer + store <128 x half> %splat, <128 x half>* %b + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) #0 { +; CHECK-LABEL: splat_v2f32: +; CHECK: dup v0.2s, v0.s[0] +; CHECK-NEXT: ret + %insert = insertelement <2 x float> undef, float %a, i64 0 + %splat = shufflevector <2 x float> %insert, <2 x float> undef, <2 x i32> zeroinitializer + ret <2 x float> %splat +} + +; Don't use SVE for 128-bit vectors. +define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) #0 { +; CHECK-LABEL: splat_v4f32: +; CHECK: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %insert = insertelement <4 x float> undef, float %a, i64 0 + %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %splat +} + +define void @splat_v8f32(float %a, <8 x float>* %b) #0 { +; CHECK-LABEL: splat_v8f32: +; CHECK-DAG: mov [[RES:z[0-9]+]].s, s0 +; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK-NEXT: ret + %insert = insertelement <8 x float> undef, float %a, i64 0 + %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer + store <8 x float> %splat, <8 x float>* %b + ret void +} + +define void @splat_v16f32(float %a, <16 x float>* %b) #0 { +; CHECK-LABEL: splat_v16f32: +; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, s0 +; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].s, s0 +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x[[B_HI]] +; VBITS_EQ_256-NEXT: ret + %insert = insertelement <16 x float> undef, float %a, i64 0 + %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer + store <16 x float> %splat, <16 x float>* %b + ret void +} + +define void @splat_v32f32(float %a, <32 x float>* %b) #0 { +; CHECK-LABEL: splat_v32f32: +; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].s, s0 +; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %insert = insertelement <32 x float> undef, float %a, i64 0 + %splat = shufflevector <32 x float> %insert, <32 x float> undef, <32 x i32> zeroinitializer + store <32 x float> %splat, <32 x float>* %b + ret void +} + +define void @splat_v64f32(float %a, <64 x float>* %b) #0 { +; CHECK-LABEL: splat_v64f32: +; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].s, s0 +; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %insert = insertelement <64 x float> undef, float %a, i64 0 + %splat = shufflevector <64 x float> %insert, <64 x float> undef, <64 x i32> zeroinitializer + store <64 x float> %splat, <64 x float>* %b + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) #0 { +; CHECK-LABEL: splat_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %insert = insertelement <1 x double> undef, double %a, i64 0 + %splat = shufflevector <1 x double> %insert, <1 x double> undef, <1 x i32> zeroinitializer + ret <1 x double> %splat +} + +; Don't use SVE for 128-bit vectors. +define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) #0 { +; CHECK-LABEL: splat_v2f64: +; CHECK: dup v0.2d, v0.d[0] +; CHECK-NEXT: ret + %insert = insertelement <2 x double> undef, double %a, i64 0 + %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer + ret <2 x double> %splat +} + +define void @splat_v4f64(double %a, <4 x double>* %b) #0 { +; CHECK-LABEL: splat_v4f64: +; CHECK-DAG: mov [[RES:z[0-9]+]].d, d0 +; CHECK-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK-NEXT: ret + %insert = insertelement <4 x double> undef, double %a, i64 0 + %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer + store <4 x double> %splat, <4 x double>* %b + ret void +} + +define void @splat_v8f64(double %a, <8 x double>* %b) #0 { +; CHECK-LABEL: splat_v8f64: +; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, d0 +; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].d, d0 +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x[[B_HI]] +; VBITS_EQ_256-NEXT: ret + %insert = insertelement <8 x double> undef, double %a, i64 0 + %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer + store <8 x double> %splat, <8 x double>* %b + ret void +} + +define void @splat_v16f64(double %a, <16 x double>* %b) #0 { +; CHECK-LABEL: splat_v16f64: +; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].d, d0 +; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %insert = insertelement <16 x double> undef, double %a, i64 0 + %splat = shufflevector <16 x double> %insert, <16 x double> undef, <16 x i32> zeroinitializer + store <16 x double> %splat, <16 x double>* %b + ret void +} + +define void @splat_v32f64(double %a, <32 x double>* %b) #0 { +; CHECK-LABEL: splat_v32f64: +; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].d, d0 +; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %insert = insertelement <32 x double> undef, double %a, i64 0 + %splat = shufflevector <32 x double> %insert, <32 x double> undef, <32 x i32> zeroinitializer + store <32 x double> %splat, <32 x double>* %b + ret void +} + +; +; DUP (integer immediate) +; + +define void @splat_imm_v64i8(<64 x i8>* %a) #0 { +; CHECK-LABEL: splat_imm_v64i8: +; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].b, #1 +; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].b, vl64 +; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + %insert = insertelement <64 x i8> undef, i8 1, i64 0 + %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer + store <64 x i8> %splat, <64 x i8>* %a + ret void +} + +define void @splat_imm_v32i16(<32 x i16>* %a) #0 { +; CHECK-LABEL: splat_imm_v32i16: +; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, #2 +; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + %insert = insertelement <32 x i16> undef, i16 2, i64 0 + %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer + store <32 x i16> %splat, <32 x i16>* %a + ret void +} + +define void @splat_imm_v16i32(<16 x i32>* %a) #0 { +; CHECK-LABEL: splat_imm_v16i32: +; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, #3 +; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + %insert = insertelement <16 x i32> undef, i32 3, i64 0 + %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer + store <16 x i32> %splat, <16 x i32>* %a + ret void +} + +define void @splat_imm_v8i64(<8 x i64>* %a) #0 { +; CHECK-LABEL: splat_imm_v8i64: +; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, #4 +; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + %insert = insertelement <8 x i64> undef, i64 4, i64 0 + %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer + store <8 x i64> %splat, <8 x i64>* %a + ret void +} + +; +; DUP (floating-point immediate) +; + +define void @splat_imm_v32f16(<32 x half>* %a) #0 { +; CHECK-LABEL: splat_imm_v32f16: +; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].h, #5.00000000 +; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + %insert = insertelement <32 x half> undef, half 5.0, i64 0 + %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer + store <32 x half> %splat, <32 x half>* %a + ret void +} + +define void @splat_imm_v16f32(<16 x float>* %a) #0 { +; CHECK-LABEL: splat_imm_v16f32: +; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].s, #6.00000000 +; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + %insert = insertelement <16 x float> undef, float 6.0, i64 0 + %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer + store <16 x float> %splat, <16 x float>* %a + ret void +} + +define void @splat_imm_v8f64(<8 x double>* %a) #0 { +; CHECK-LABEL: splat_imm_v8f64: +; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].d, #7.00000000 +; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + %insert = insertelement <8 x double> undef, double 7.0, i64 0 + %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer + store <8 x double> %splat, <8 x double>* %a + ret void +} +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll index 0d6b56e5b28281..857a2f6ff204ed 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll @@ -93,7 +93,7 @@ define <8 x i1> @no_warn_dropped_scalable(<8 x i32>* %in) #0 { ; CHECK-LABEL: no_warn_dropped_scalable: ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 ; CHECK: ld1w { [[A:z[0-9]+]].s }, [[PG]]/z, [x0] -; CHECK: cmpgt p{{[0-9]}}.s, [[PG]]/z, [[A]].s, z{{[0-9]+}}.s +; CHECK: cmpgt p{{[0-9]}}.s, [[PG]]/z, [[A]].s, #0 ; CHECK: ret %a = load <8 x i32>, <8 x i32>* %in br label %bb1 diff --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll index 4230a7fa287160..8f68a38e2cd20d 100644 --- a/llvm/test/CodeGen/AArch64/sve-gep.ll +++ b/llvm/test/CodeGen/AArch64/sve-gep.ll @@ -105,10 +105,11 @@ define *> @scalable_of_scalable_1( insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer %d = getelementptr , * %base, %idx @@ -119,9 +120,10 @@ define *> @scalable_of_scalable_2( insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer %d = getelementptr , *> %base, %idx @@ -135,8 +137,7 @@ define *> @scalable_of_scalable_3(, *> %base, %idx ret *> %d diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-int-arith.ll index d70e817085500f..bcd94d2d019334 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t @@ -5,200 +6,262 @@ ; WARN-NOT: warning define @add_i64( %a, %b) { -; CHECK-LABEL: add_i64 -; CHECK: add z0.d, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: add_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret %res = add %a, %b ret %res } define @add_i32( %a, %b) { -; CHECK-LABEL: add_i32 -; CHECK: add z0.s, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: ret %res = add %a, %b ret %res } define @add_i16( %a, %b) { -; CHECK-LABEL: add_i16 -; CHECK: add z0.h, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: add_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: ret %res = add %a, %b ret %res } define @add_i8( %a, %b) { -; CHECK-LABEL: add_i8 -; CHECK: add z0.b, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: add_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: ret %res = add %a, %b ret %res } define @sub_i64( %a, %b) { -; CHECK-LABEL: sub_i64 -; CHECK: sub z0.d, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: sub_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: ret %res = sub %a, %b ret %res } define @sub_i32( %a, %b) { -; CHECK-LABEL: sub_i32 -; CHECK: sub z0.s, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: sub_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: ret %res = sub %a, %b ret %res } define @sub_i16( %a, %b) { -; CHECK-LABEL: sub_i16 -; CHECK: sub z0.h, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: sub_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: ret %res = sub %a, %b ret %res } define @sub_i8( %a, %b) { -; CHECK-LABEL: sub_i8 -; CHECK: sub z0.b, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: sub_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.b, z0.b, z1.b +; CHECK-NEXT: ret %res = sub %a, %b ret %res } define @sqadd_i64( %a, %b) { -; CHECK-LABEL: sqadd_i64 -; CHECK: sqadd z0.d, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: sqadd_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.d, z0.d, z1.d +; CHECK-NEXT: ret %res = call @llvm.sadd.sat.nxv2i64( %a, %b) ret %res } define @sqadd_i32( %a, %b) { -; CHECK-LABEL: sqadd_i32 -; CHECK: sqadd z0.s, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: sqadd_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.s, z0.s, z1.s +; CHECK-NEXT: ret %res = call @llvm.sadd.sat.nxv4i32( %a, %b) ret %res } define @sqadd_i16( %a, %b) { -; CHECK-LABEL: sqadd_i16 -; CHECK: sqadd z0.h, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: sqadd_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.h, z0.h, z1.h +; CHECK-NEXT: ret %res = call @llvm.sadd.sat.nxv8i16( %a, %b) ret %res } define @sqadd_i8( %a, %b) { -; CHECK-LABEL: sqadd_i8 -; CHECK: sqadd z0.b, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: sqadd_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.b, z0.b, z1.b +; CHECK-NEXT: ret %res = call @llvm.sadd.sat.nxv16i8( %a, %b) ret %res } define @sqsub_i64( %a, %b) { -; CHECK-LABEL: sqsub_i64 -; CHECK: sqsub z0.d, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: sqsub_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.d, z0.d, z1.d +; CHECK-NEXT: ret %res = call @llvm.ssub.sat.nxv2i64( %a, %b) ret %res } define @sqsub_i32( %a, %b) { -; CHECK-LABEL: sqsub_i32 -; CHECK: sqsub z0.s, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: sqsub_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.s, z0.s, z1.s +; CHECK-NEXT: ret %res = call @llvm.ssub.sat.nxv4i32( %a, %b) ret %res } define @sqsub_i16( %a, %b) { -; CHECK-LABEL: sqsub_i16 -; CHECK: sqsub z0.h, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: sqsub_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.h, z0.h, z1.h +; CHECK-NEXT: ret %res = call @llvm.ssub.sat.nxv8i16( %a, %b) ret %res } define @sqsub_i8( %a, %b) { -; CHECK-LABEL: sqsub_i8 -; CHECK: sqsub z0.b, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: sqsub_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.b, z0.b, z1.b +; CHECK-NEXT: ret %res = call @llvm.ssub.sat.nxv16i8( %a, %b) ret %res } define @uqadd_i64( %a, %b) { -; CHECK-LABEL: uqadd_i64 -; CHECK: uqadd z0.d, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: uqadd_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.d, z0.d, z1.d +; CHECK-NEXT: ret %res = call @llvm.uadd.sat.nxv2i64( %a, %b) ret %res } define @uqadd_i32( %a, %b) { -; CHECK-LABEL: uqadd_i32 -; CHECK: uqadd z0.s, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: uqadd_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.s, z0.s, z1.s +; CHECK-NEXT: ret %res = call @llvm.uadd.sat.nxv4i32( %a, %b) ret %res } define @uqadd_i16( %a, %b) { -; CHECK-LABEL: uqadd_i16 -; CHECK: uqadd z0.h, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: uqadd_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.h, z0.h, z1.h +; CHECK-NEXT: ret %res = call @llvm.uadd.sat.nxv8i16( %a, %b) ret %res } define @uqadd_i8( %a, %b) { -; CHECK-LABEL: uqadd_i8 -; CHECK: uqadd z0.b, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: uqadd_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.b, z0.b, z1.b +; CHECK-NEXT: ret %res = call @llvm.uadd.sat.nxv16i8( %a, %b) ret %res } define @uqsub_i64( %a, %b) { -; CHECK-LABEL: uqsub_i64 -; CHECK: uqsub z0.d, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: uqsub_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.d, z0.d, z1.d +; CHECK-NEXT: ret %res = call @llvm.usub.sat.nxv2i64( %a, %b) ret %res } define @uqsub_i32( %a, %b) { -; CHECK-LABEL: uqsub_i32 -; CHECK: uqsub z0.s, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: uqsub_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.s, z0.s, z1.s +; CHECK-NEXT: ret %res = call @llvm.usub.sat.nxv4i32( %a, %b) ret %res } define @uqsub_i16( %a, %b) { -; CHECK-LABEL: uqsub_i16 -; CHECK: uqsub z0.h, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: uqsub_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.h, z0.h, z1.h +; CHECK-NEXT: ret %res = call @llvm.usub.sat.nxv8i16( %a, %b) ret %res } define @uqsub_i8( %a, %b) { -; CHECK-LABEL: uqsub_i8 -; CHECK: uqsub z0.b, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: uqsub_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.b, z0.b, z1.b +; CHECK-NEXT: ret %res = call @llvm.usub.sat.nxv16i8( %a, %b) ret %res } +define @mla_i8( %a, %b, %c) { +; CHECK-LABEL: mla_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mla z2.b, p0/m, z0.b, z1.b +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %prod = mul %a, %b + %res = add %c, %prod + ret %res +} + +define @mla_i8_multiuse( %a, %b, %c, * %p) { +; CHECK-LABEL: mla_i8_multiuse: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b +; CHECK-NEXT: add z0.b, z2.b, z1.b +; CHECK-NEXT: st1b { z1.b }, p0, [x0] +; CHECK-NEXT: ret + %prod = mul %a, %b + store %prod, * %p + %res = add %c, %prod + ret %res +} + +define @mls_i8( %a, %b, %c) { +; CHECK-LABEL: mls_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mls z2.b, p0/m, z0.b, z1.b +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %prod = mul %a, %b + %res = sub %c, %prod + ret %res +} + declare @llvm.sadd.sat.nxv16i8(, ) declare @llvm.sadd.sat.nxv8i16(, ) declare @llvm.sadd.sat.nxv4i32(, ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll index 0a6842921cbe3d..c70686d3447c85 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t @@ -8,8 +9,9 @@ define @smax_i8( %a) { ; CHECK-LABEL: smax_i8: -; CHECK: smax z0.b, z0.b, #-128 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.b, z0.b, #-128 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 -128, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -21,8 +23,9 @@ define @smax_i8( %a) { define @smax_i16( %a) { ; CHECK-LABEL: smax_i16: -; CHECK: smax z0.h, z0.h, #127 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.h, z0.h, #127 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -34,8 +37,9 @@ define @smax_i16( %a) { define @smax_i32( %a) { ; CHECK-LABEL: smax_i32: -; CHECK: smax z0.s, z0.s, #-128 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.s, z0.s, #-128 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 -128, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -47,8 +51,9 @@ define @smax_i32( %a) { define @smax_i64( %a) { ; CHECK-LABEL: smax_i64: -; CHECK: smax z0.d, z0.d, #127 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.d, z0.d, #127 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 127, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -62,8 +67,9 @@ define @smax_i64( %a) { define @smin_i8( %a) { ; CHECK-LABEL: smin_i8: -; CHECK: smin z0.b, z0.b, #127 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.b, z0.b, #127 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -75,8 +81,9 @@ define @smin_i8( %a) { define @smin_i16( %a) { ; CHECK-LABEL: smin_i16: -; CHECK: smin z0.h, z0.h, #-128 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.h, z0.h, #-128 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 -128, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -88,8 +95,9 @@ define @smin_i16( %a) { define @smin_i32( %a) { ; CHECK-LABEL: smin_i32: -; CHECK: smin z0.s, z0.s, #127 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.s, z0.s, #127 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -101,8 +109,9 @@ define @smin_i32( %a) { define @smin_i64( %a) { ; CHECK-LABEL: smin_i64: -; CHECK: smin z0.d, z0.d, #-128 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.d, z0.d, #-128 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 -128, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -116,8 +125,9 @@ define @smin_i64( %a) { define @umax_i8( %a) { ; CHECK-LABEL: umax_i8: -; CHECK: umax z0.b, z0.b, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.b, z0.b, #0 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 0, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -129,8 +139,9 @@ define @umax_i8( %a) { define @umax_i16( %a) { ; CHECK-LABEL: umax_i16: -; CHECK: umax z0.h, z0.h, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.h, z0.h, #255 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -142,8 +153,9 @@ define @umax_i16( %a) { define @umax_i32( %a) { ; CHECK-LABEL: umax_i32: -; CHECK: umax z0.s, z0.s, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.s, z0.s, #0 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 0, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -155,8 +167,9 @@ define @umax_i32( %a) { define @umax_i64( %a) { ; CHECK-LABEL: umax_i64: -; CHECK: umax z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.d, z0.d, #255 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 255, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -170,8 +183,9 @@ define @umax_i64( %a) { define @umin_i8( %a) { ; CHECK-LABEL: umin_i8: -; CHECK: umin z0.b, z0.b, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.b, z0.b, #255 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -183,8 +197,9 @@ define @umin_i8( %a) { define @umin_i16( %a) { ; CHECK-LABEL: umin_i16: -; CHECK: umin z0.h, z0.h, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.h, z0.h, #0 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 0, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -196,8 +211,9 @@ define @umin_i16( %a) { define @umin_i32( %a) { ; CHECK-LABEL: umin_i32: -; CHECK: umin z0.s, z0.s, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.s, z0.s, #255 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -209,8 +225,9 @@ define @umin_i32( %a) { define @umin_i64( %a) { ; CHECK-LABEL: umin_i64: -; CHECK: umin z0.d, z0.d, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.d, z0.d, #0 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 0, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -224,8 +241,9 @@ define @umin_i64( %a) { define @sqadd_b_lowimm( %a) { ; CHECK-LABEL: sqadd_b_lowimm: -; CHECK: sqadd z0.b, z0.b, #27 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.b, z0.b, #27 // =0x1b +; CHECK-NEXT: ret %elt = insertelement undef, i8 27, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv16i8( %a, @@ -235,8 +253,9 @@ define @sqadd_b_lowimm( %a) { define @sqadd_h_lowimm( %a) { ; CHECK-LABEL: sqadd_h_lowimm: -; CHECK: sqadd z0.h, z0.h, #43 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.h, z0.h, #43 // =0x2b +; CHECK-NEXT: ret %elt = insertelement undef, i16 43, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv8i16( %a, @@ -246,8 +265,9 @@ define @sqadd_h_lowimm( %a) { define @sqadd_h_highimm( %a) { ; CHECK-LABEL: sqadd_h_highimm: -; CHECK: sqadd z0.h, z0.h, #2048 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.h, z0.h, #2048 // =0x800 +; CHECK-NEXT: ret %elt = insertelement undef, i16 2048, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv8i16( %a, @@ -257,8 +277,9 @@ define @sqadd_h_highimm( %a) { define @sqadd_s_lowimm( %a) { ; CHECK-LABEL: sqadd_s_lowimm: -; CHECK: sqadd z0.s, z0.s, #1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.s, z0.s, #1 // =0x1 +; CHECK-NEXT: ret %elt = insertelement undef, i32 1, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv4i32( %a, @@ -268,8 +289,9 @@ define @sqadd_s_lowimm( %a) { define @sqadd_s_highimm( %a) { ; CHECK-LABEL: sqadd_s_highimm: -; CHECK: sqadd z0.s, z0.s, #8192 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.s, z0.s, #8192 // =0x2000 +; CHECK-NEXT: ret %elt = insertelement undef, i32 8192, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv4i32( %a, @@ -279,8 +301,9 @@ define @sqadd_s_highimm( %a) { define @sqadd_d_lowimm( %a) { ; CHECK-LABEL: sqadd_d_lowimm: -; CHECK: sqadd z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.d, z0.d, #255 // =0xff +; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv2i64( %a, @@ -290,8 +313,9 @@ define @sqadd_d_lowimm( %a) { define @sqadd_d_highimm( %a) { ; CHECK-LABEL: sqadd_d_highimm: -; CHECK: sqadd z0.d, z0.d, #65280 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.d, z0.d, #65280 // =0xff00 +; CHECK-NEXT: ret %elt = insertelement undef, i64 65280, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv2i64( %a, @@ -303,8 +327,9 @@ define @sqadd_d_highimm( %a) { define @sqsub_b_lowimm( %a) { ; CHECK-LABEL: sqsub_b_lowimm: -; CHECK: sqsub z0.b, z0.b, #27 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.b, z0.b, #27 // =0x1b +; CHECK-NEXT: ret %elt = insertelement undef, i8 27, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv16i8( %a, @@ -314,8 +339,9 @@ define @sqsub_b_lowimm( %a) { define @sqsub_h_lowimm( %a) { ; CHECK-LABEL: sqsub_h_lowimm: -; CHECK: sqsub z0.h, z0.h, #43 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.h, z0.h, #43 // =0x2b +; CHECK-NEXT: ret %elt = insertelement undef, i16 43, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv8i16( %a, @@ -325,8 +351,9 @@ define @sqsub_h_lowimm( %a) { define @sqsub_h_highimm( %a) { ; CHECK-LABEL: sqsub_h_highimm: -; CHECK: sqsub z0.h, z0.h, #2048 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.h, z0.h, #2048 // =0x800 +; CHECK-NEXT: ret %elt = insertelement undef, i16 2048, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv8i16( %a, @@ -336,8 +363,9 @@ define @sqsub_h_highimm( %a) { define @sqsub_s_lowimm( %a) { ; CHECK-LABEL: sqsub_s_lowimm: -; CHECK: sqsub z0.s, z0.s, #1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.s, z0.s, #1 // =0x1 +; CHECK-NEXT: ret %elt = insertelement undef, i32 1, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv4i32( %a, @@ -347,8 +375,9 @@ define @sqsub_s_lowimm( %a) { define @sqsub_s_highimm( %a) { ; CHECK-LABEL: sqsub_s_highimm: -; CHECK: sqsub z0.s, z0.s, #8192 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.s, z0.s, #8192 // =0x2000 +; CHECK-NEXT: ret %elt = insertelement undef, i32 8192, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv4i32( %a, @@ -358,8 +387,9 @@ define @sqsub_s_highimm( %a) { define @sqsub_d_lowimm( %a) { ; CHECK-LABEL: sqsub_d_lowimm: -; CHECK: sqsub z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.d, z0.d, #255 // =0xff +; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv2i64( %a, @@ -369,8 +399,9 @@ define @sqsub_d_lowimm( %a) { define @sqsub_d_highimm( %a) { ; CHECK-LABEL: sqsub_d_highimm: -; CHECK: sqsub z0.d, z0.d, #65280 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.d, z0.d, #65280 // =0xff00 +; CHECK-NEXT: ret %elt = insertelement undef, i64 65280, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv2i64( %a, @@ -382,8 +413,9 @@ define @sqsub_d_highimm( %a) { define @uqadd_b_lowimm( %a) { ; CHECK-LABEL: uqadd_b_lowimm: -; CHECK: uqadd z0.b, z0.b, #27 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.b, z0.b, #27 // =0x1b +; CHECK-NEXT: ret %elt = insertelement undef, i8 27, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv16i8( %a, @@ -393,8 +425,9 @@ define @uqadd_b_lowimm( %a) { define @uqadd_h_lowimm( %a) { ; CHECK-LABEL: uqadd_h_lowimm: -; CHECK: uqadd z0.h, z0.h, #43 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.h, z0.h, #43 // =0x2b +; CHECK-NEXT: ret %elt = insertelement undef, i16 43, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv8i16( %a, @@ -404,8 +437,9 @@ define @uqadd_h_lowimm( %a) { define @uqadd_h_highimm( %a) { ; CHECK-LABEL: uqadd_h_highimm: -; CHECK: uqadd z0.h, z0.h, #2048 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.h, z0.h, #2048 // =0x800 +; CHECK-NEXT: ret %elt = insertelement undef, i16 2048, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv8i16( %a, @@ -415,8 +449,9 @@ define @uqadd_h_highimm( %a) { define @uqadd_s_lowimm( %a) { ; CHECK-LABEL: uqadd_s_lowimm: -; CHECK: uqadd z0.s, z0.s, #1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.s, z0.s, #1 // =0x1 +; CHECK-NEXT: ret %elt = insertelement undef, i32 1, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv4i32( %a, @@ -428,8 +463,9 @@ define @uqadd_s_lowimm( %a) { define @uqsub_b_lowimm( %a) { ; CHECK-LABEL: uqsub_b_lowimm: -; CHECK: uqsub z0.b, z0.b, #27 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.b, z0.b, #27 // =0x1b +; CHECK-NEXT: ret %elt = insertelement undef, i8 27, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv16i8( %a, @@ -439,8 +475,9 @@ define @uqsub_b_lowimm( %a) { define @uqsub_h_lowimm( %a) { ; CHECK-LABEL: uqsub_h_lowimm: -; CHECK: uqsub z0.h, z0.h, #43 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.h, z0.h, #43 // =0x2b +; CHECK-NEXT: ret %elt = insertelement undef, i16 43, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv8i16( %a, @@ -450,8 +487,9 @@ define @uqsub_h_lowimm( %a) { define @uqsub_h_highimm( %a) { ; CHECK-LABEL: uqsub_h_highimm: -; CHECK: uqsub z0.h, z0.h, #2048 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.h, z0.h, #2048 // =0x800 +; CHECK-NEXT: ret %elt = insertelement undef, i16 2048, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv8i16( %a, @@ -461,8 +499,9 @@ define @uqsub_h_highimm( %a) { define @uqsub_s_lowimm( %a) { ; CHECK-LABEL: uqsub_s_lowimm: -; CHECK: uqsub z0.s, z0.s, #1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.s, z0.s, #1 // =0x1 +; CHECK-NEXT: ret %elt = insertelement undef, i32 1, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv4i32( %a, @@ -472,8 +511,9 @@ define @uqsub_s_lowimm( %a) { define @uqsub_s_highimm( %a) { ; CHECK-LABEL: uqsub_s_highimm: -; CHECK: uqsub z0.s, z0.s, #8192 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.s, z0.s, #8192 // =0x2000 +; CHECK-NEXT: ret %elt = insertelement undef, i32 8192, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv4i32( %a, @@ -483,8 +523,9 @@ define @uqsub_s_highimm( %a) { define @uqsub_d_lowimm( %a) { ; CHECK-LABEL: uqsub_d_lowimm: -; CHECK: uqsub z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.d, z0.d, #255 // =0xff +; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv2i64( %a, @@ -494,8 +535,9 @@ define @uqsub_d_lowimm( %a) { define @uqsub_d_highimm( %a) { ; CHECK-LABEL: uqsub_d_highimm: -; CHECK: uqsub z0.d, z0.d, #65280 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.d, z0.d, #65280 // =0xff00 +; CHECK-NEXT: ret %elt = insertelement undef, i64 65280, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv2i64( %a, @@ -506,8 +548,9 @@ define @uqsub_d_highimm( %a) { define @uqadd_s_highimm( %a) { ; CHECK-LABEL: uqadd_s_highimm: -; CHECK: uqadd z0.s, z0.s, #8192 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.s, z0.s, #8192 // =0x2000 +; CHECK-NEXT: ret %elt = insertelement undef, i32 8192, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv4i32( %a, @@ -517,8 +560,9 @@ define @uqadd_s_highimm( %a) { define @uqadd_d_lowimm( %a) { ; CHECK-LABEL: uqadd_d_lowimm: -; CHECK: uqadd z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.d, z0.d, #255 // =0xff +; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv2i64( %a, @@ -528,8 +572,9 @@ define @uqadd_d_lowimm( %a) { define @uqadd_d_highimm( %a) { ; CHECK-LABEL: uqadd_d_highimm: -; CHECK: uqadd z0.d, z0.d, #65280 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.d, z0.d, #65280 // =0xff00 +; CHECK-NEXT: ret %elt = insertelement undef, i64 65280, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv2i64( %a, @@ -539,10 +584,24 @@ define @uqadd_d_highimm( %a) { ; ASR -define @asr_i8( %a) { +define @asr_i8( %pg, %a) { ; CHECK-LABEL: asr_i8: -; CHECK: asr z0.b, z0.b, #8 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 9, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @asr_i8_all_active( %a) { +; CHECK-LABEL: asr_i8_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.b, z0.b, #8 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 8, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -552,10 +611,37 @@ define @asr_i8( %a) { ret %out } -define @asr_i16( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @asr_i8_too_small( %pg, %a) { +; CHECK-LABEL: asr_i8_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #0 // =0x0 +; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv16i8( %pg, + %a, + zeroinitializer) + ret %out +} + +define @asr_i16( %pg, %a) { ; CHECK-LABEL: asr_i16: -; CHECK: asr z0.h, z0.h, #16 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #16 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 17, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @asr_i16_all_active( %a) { +; CHECK-LABEL: asr_i16_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.h, z0.h, #16 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 16, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -565,10 +651,37 @@ define @asr_i16( %a) { ret %out } -define @asr_i32( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @asr_i16_too_small( %pg, %a) { +; CHECK-LABEL: asr_i16_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #0 // =0x0 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv8i16( %pg, + %a, + zeroinitializer) + ret %out +} + +define @asr_i32( %pg, %a) { ; CHECK-LABEL: asr_i32: -; CHECK: asr z0.s, z0.s, #32 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #32 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 33, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @asr_i32_all_active( %a) { +; CHECK-LABEL: asr_i32_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.s, z0.s, #32 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 32, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -578,10 +691,37 @@ define @asr_i32( %a) { ret %out } -define @asr_i64( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @asr_i32_too_small( %pg, %a) { +; CHECK-LABEL: asr_i32_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #0 // =0x0 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv4i32( %pg, + %a, + zeroinitializer) + ret %out +} + +define @asr_i64( %pg, %a) { ; CHECK-LABEL: asr_i64: -; CHECK: asr z0.d, z0.d, #64 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 65, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +define @asr_i64_all_active( %a) { +; CHECK-LABEL: asr_i64_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.d, z0.d, #64 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 64, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -591,12 +731,39 @@ define @asr_i64( %a) { ret %out } +; Ensure we don't match a right shift by zero to the immediate form. +define @asr_i64_too_small( %pg, %a) { +; CHECK-LABEL: asr_i64_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv2i64( %pg, + %a, + zeroinitializer) + ret %out +} + ; LSL -define @lsl_i8( %a) { +define @lsl_i8( %pg, %a) { ; CHECK-LABEL: lsl_i8: -; CHECK: lsl z0.b, z0.b, #7 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 7, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i8_all_active( %a) { +; CHECK-LABEL: lsl_i8_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.b, z0.b, #7 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 7, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -606,10 +773,50 @@ define @lsl_i8( %a) { ret %out } -define @lsl_i16( %a) { +; Ensure we don't match a left shift bigger than its bitwidth to the immediate form. +define @lsl_i8_too_big( %pg, %a) { +; CHECK-LABEL: lsl_i8_too_big: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #8 // =0x8 +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %elt = insertelement undef, i8 8, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i8_zero( %pg, %a) { +; CHECK-LABEL: lsl_i8_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsl_i16( %pg, %a) { ; CHECK-LABEL: lsl_i16: -; CHECK: lsl z0.h, z0.h, #15 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 15, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i16_all_active( %a) { +; CHECK-LABEL: lsl_i16_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 15, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -619,10 +826,50 @@ define @lsl_i16( %a) { ret %out } -define @lsl_i32( %a) { +; Ensure we don't match a left shift bigger than its bitwidth to the immediate form. +define @lsl_i16_too_big( %pg, %a) { +; CHECK-LABEL: lsl_i16_too_big: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #16 // =0x10 +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %elt = insertelement undef, i16 16, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i16_zero( %pg, %a) { +; CHECK-LABEL: lsl_i16_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsl_i32( %pg, %a) { ; CHECK-LABEL: lsl_i32: -; CHECK: lsl z0.s, z0.s, #31 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 31, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i32_all_active( %a) { +; CHECK-LABEL: lsl_i32_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #31 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 31, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -632,10 +879,50 @@ define @lsl_i32( %a) { ret %out } -define @lsl_i64( %a) { +; Ensure we don't match a left shift bigger than its bitwidth to the immediate form. +define @lsl_i32_too_big( %pg, %a) { +; CHECK-LABEL: lsl_i32_too_big: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #32 // =0x20 +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %elt = insertelement undef, i32 32, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i32_zero( %pg, %a) { +; CHECK-LABEL: lsl_i32_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsl_i64( %pg, %a) { ; CHECK-LABEL: lsl_i64: -; CHECK: lsl z0.d, z0.d, #63 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 63, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i64_all_active( %a) { +; CHECK-LABEL: lsl_i64_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.d, z0.d, #63 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 63, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -645,12 +932,52 @@ define @lsl_i64( %a) { ret %out } +; Ensure we don't match a left shift bigger than its bitwidth to the immediate form. +define @lsl_i64_too_big( %pg, %a) { +; CHECK-LABEL: lsl_i64_too_big: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #64 // =0x40 +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %elt = insertelement undef, i64 64, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i64_zero( %pg, %a) { +; CHECK-LABEL: lsl_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %a, + zeroinitializer) + ret %out +} + ; LSR -define @lsr_i8( %a) { +define @lsr_i8( %pg, %a) { ; CHECK-LABEL: lsr_i8: -; CHECK: lsr z0.b, z0.b, #8 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 9, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i8_all_active( %a) { +; CHECK-LABEL: lsr_i8_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.b, z0.b, #8 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 8, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -660,10 +987,37 @@ define @lsr_i8( %a) { ret %out } -define @lsr_i16( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @lsr_i8_too_small( %pg, %a) { +; CHECK-LABEL: lsr_i8_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #0 // =0x0 +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsr_i16( %pg, %a) { ; CHECK-LABEL: lsr_i16: -; CHECK: lsr z0.h, z0.h, #16 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #16 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 17, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i16_all_active( %a) { +; CHECK-LABEL: lsr_i16_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.h, z0.h, #16 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 16, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -673,10 +1027,37 @@ define @lsr_i16( %a) { ret %out } -define @lsr_i32( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @lsr_i16_too_small( %pg, %a) { +; CHECK-LABEL: lsr_i16_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #0 // =0x0 +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsr_i32( %pg, %a) { ; CHECK-LABEL: lsr_i32: -; CHECK: lsr z0.s, z0.s, #32 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #32 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 33, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i32_all_active( %a) { +; CHECK-LABEL: lsr_i32_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.s, z0.s, #32 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 32, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -686,10 +1067,37 @@ define @lsr_i32( %a) { ret %out } -define @lsr_i64( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @lsr_i32_too_small( %pg, %a) { +; CHECK-LABEL: lsr_i32_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #0 // =0x0 +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsr_i64( %pg, %a) { ; CHECK-LABEL: lsr_i64: -; CHECK: lsr z0.d, z0.d, #64 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 65, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i64_all_active( %a) { +; CHECK-LABEL: lsr_i64_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.d, z0.d, #64 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 64, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -699,6 +1107,19 @@ define @lsr_i64( %a) { ret %out } +; Ensure we don't match a right shift by zero to the immediate form. +define @lsr_i64_too_small( %pg, %a) { +; CHECK-LABEL: lsr_i64_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, + %a, + zeroinitializer) + ret %out +} + declare @llvm.aarch64.sve.sqadd.x.nxv16i8(, ) declare @llvm.aarch64.sve.sqadd.x.nxv8i16(, ) declare @llvm.aarch64.sve.sqadd.x.nxv4i32(, ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll index 92877233b2c9c1..f3fcddbaa2fcf0 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -108,6 +108,17 @@ define void @st2d_f64( %v0, %v1, %v0, %v1, %pred, i8** %addr) { +; CHECK-LABEL: st2d_ptr: +; CHECK: st2d { z0.d, z1.d }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st2.nxv2p0i8( %v0, + %v1, + %pred, + i8** %addr) + ret void +} + ; ; ST3B ; @@ -220,6 +231,18 @@ define void @st3d_f64( %v0, %v1, %v0, %v1, %v2, %pred, i8** %addr) { +; CHECK-LABEL: st3d_ptr: +; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st3.nxv2p0i8( %v0, + %v1, + %v2, + %pred, + i8** %addr) + ret void +} + ; ; ST4B ; @@ -340,6 +363,18 @@ define void @st4d_f64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, i8** %addr) { +; CHECK-LABEL: st4d_ptr: +; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st4.nxv2p0i8( %v0, + %v1, + %v2, + %v3, + %pred, + i8** %addr) + ret void +} ; ; STNT1B ; @@ -508,6 +543,7 @@ declare void @llvm.aarch64.sve.st2.nxv8f16(, , , , bfloat*) declare void @llvm.aarch64.sve.st2.nxv4f32(, , , float*) declare void @llvm.aarch64.sve.st2.nxv2f64(, , , double*) +declare void @llvm.aarch64.sve.st2.nxv2p0i8(, , , i8** nocapture) declare void @llvm.aarch64.sve.st3.nxv16i8(, , , , i8*) declare void @llvm.aarch64.sve.st3.nxv8i16(, , , , i16*) @@ -517,6 +553,7 @@ declare void @llvm.aarch64.sve.st3.nxv8f16(, , , , , bfloat*) declare void @llvm.aarch64.sve.st3.nxv4f32(, , , , float*) declare void @llvm.aarch64.sve.st3.nxv2f64(, , , , double*) +declare void @llvm.aarch64.sve.st3.nxv2p0i8(, , , , i8** nocapture) declare void @llvm.aarch64.sve.st4.nxv16i8(, , , , , i8*) declare void @llvm.aarch64.sve.st4.nxv8i16(, , , , , i16*) @@ -526,6 +563,7 @@ declare void @llvm.aarch64.sve.st4.nxv8f16(, , , , , , bfloat*) declare void @llvm.aarch64.sve.st4.nxv4f32(, , , , , float*) declare void @llvm.aarch64.sve.st4.nxv2f64(, , , , , double*) +declare void @llvm.aarch64.sve.st4.nxv2p0i8(, , , , , i8** nocapture) declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , i8*) declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , i16*) diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll index 7a765002ac9f8f..be8ec87e7a56bf 100644 --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -303,7 +303,7 @@ define @splat_nxv2f64_zero() { define @splat_nxv8f16_imm() { ; CHECK-LABEL: splat_nxv8f16_imm: -; CHECK: mov z0.h, #1.0 +; CHECK: fmov z0.h, #1.00000000 ; CHECK-NEXT: ret %1 = insertelement undef, half 1.0, i32 0 %2 = shufflevector %1, undef, zeroinitializer @@ -312,7 +312,7 @@ define @splat_nxv8f16_imm() { define @splat_nxv4f16_imm() { ; CHECK-LABEL: splat_nxv4f16_imm: -; CHECK: mov z0.h, #1.0 +; CHECK: fmov z0.h, #1.00000000 ; CHECK-NEXT: ret %1 = insertelement undef, half 1.0, i32 0 %2 = shufflevector %1, undef, zeroinitializer @@ -321,7 +321,7 @@ define @splat_nxv4f16_imm() { define @splat_nxv2f16_imm() { ; CHECK-LABEL: splat_nxv2f16_imm: -; CHECK: mov z0.h, #1.0 +; CHECK: fmov z0.h, #1.00000000 ; CHECK-NEXT: ret %1 = insertelement undef, half 1.0, i32 0 %2 = shufflevector %1, undef, zeroinitializer @@ -330,7 +330,7 @@ define @splat_nxv2f16_imm() { define @splat_nxv4f32_imm() { ; CHECK-LABEL: splat_nxv4f32_imm: -; CHECK: mov z0.s, #1.0 +; CHECK: fmov z0.s, #1.00000000 ; CHECK-NEXT: ret %1 = insertelement undef, float 1.0, i32 0 %2 = shufflevector %1, undef, zeroinitializer @@ -339,7 +339,7 @@ define @splat_nxv4f32_imm() { define @splat_nxv2f32_imm() { ; CHECK-LABEL: splat_nxv2f32_imm: -; CHECK: mov z0.s, #1.0 +; CHECK: fmov z0.s, #1.00000000 ; CHECK-NEXT: ret %1 = insertelement undef, float 1.0, i32 0 %2 = shufflevector %1, undef, zeroinitializer @@ -348,7 +348,7 @@ define @splat_nxv2f32_imm() { define @splat_nxv2f64_imm() { ; CHECK-LABEL: splat_nxv2f64_imm: -; CHECK: mov z0.d, #1.0 +; CHECK: fmov z0.d, #1.00000000 ; CHECK-NEXT: ret %1 = insertelement undef, double 1.0, i32 0 %2 = shufflevector %1, undef, zeroinitializer diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll index 1f9c3bc60876e2..7564251c755d93 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -135,24 +135,24 @@ define void @constrained_if_register_class() { ; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 -; CHECK-NEXT: s_cselect_b32 s5, 1, 0 -; CHECK-NEXT: s_xor_b32 s5, s5, -1 -; CHECK-NEXT: s_and_b32 s5, s5, 1 -; CHECK-NEXT: s_mov_b32 s4, -1 -; CHECK-NEXT: s_cmp_lg_u32 s5, 0 +; CHECK-NEXT: s_cselect_b32 s4, 1, 0 +; CHECK-NEXT: s_xor_b32 s4, s4, -1 +; CHECK-NEXT: s_and_b32 s4, s4, 1 +; CHECK-NEXT: s_cmp_lg_u32 s4, 0 ; CHECK-NEXT: s_cbranch_scc0 BB4_6 ; CHECK-NEXT: ; %bb.1: ; %bb2 -; CHECK-NEXT: s_getpc_b64 s[6:7] -; CHECK-NEXT: s_add_u32 s6, s6, const.ptr@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s7, s7, const.ptr@gotpcrel32@hi+4 -; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 +; CHECK-NEXT: s_getpc_b64 s[4:5] +; CHECK-NEXT: s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4 +; CHECK-NEXT: s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+4 +; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, 1 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 +; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v0, s6 -; CHECK-NEXT: v_mov_b32_e32 v1, s7 +; CHECK-NEXT: v_mov_b32_e32 v0, s4 +; CHECK-NEXT: v_mov_b32_e32 v1, s5 ; CHECK-NEXT: flat_load_dword v0, v[0:1] -; CHECK-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, 1 +; CHECK-NEXT: s_mov_b32 s4, -1 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0 ; CHECK-NEXT: s_xor_b64 s[8:9], vcc, s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll new file mode 100644 index 00000000000000..5d1468eba04ea7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll @@ -0,0 +1,137 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, <64 x i32> addrspace(1)* %ptr.out) #0 { +; GCN-LABEL: v_insert_v64i32_37: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-NEXT: v_lshlrev_b64 v[0:1], 8, v[0:1] +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_add_co_u32_e32 v8, vcc, v2, v0 +; GCN-NEXT: s_mov_b32 s1, 0 +; GCN-NEXT: v_addc_co_u32_e32 v9, vcc, v3, v1, vcc +; GCN-NEXT: s_movk_i32 s0, 0x80 +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_add_co_u32_e32 v12, vcc, v8, v2 +; GCN-NEXT: s_movk_i32 s0, 0xc0 +; GCN-NEXT: v_mov_b32_e32 v65, s1 +; GCN-NEXT: v_mov_b32_e32 v5, s3 +; GCN-NEXT: v_mov_b32_e32 v64, s0 +; GCN-NEXT: s_movk_i32 s0, 0x50 +; GCN-NEXT: v_mov_b32_e32 v69, s1 +; GCN-NEXT: v_addc_co_u32_e32 v13, vcc, v9, v3, vcc +; GCN-NEXT: v_mov_b32_e32 v4, s2 +; GCN-NEXT: v_add_co_u32_e32 v66, vcc, v4, v0 +; GCN-NEXT: v_mov_b32_e32 v68, s0 +; GCN-NEXT: s_movk_i32 s0, 0x60 +; GCN-NEXT: v_mov_b32_e32 v71, s1 +; GCN-NEXT: v_addc_co_u32_e32 v67, vcc, v5, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v70, s0 +; GCN-NEXT: s_movk_i32 s0, 0x70 +; GCN-NEXT: v_mov_b32_e32 v73, s1 +; GCN-NEXT: v_add_co_u32_e32 v74, vcc, v66, v2 +; GCN-NEXT: v_mov_b32_e32 v72, s0 +; GCN-NEXT: s_movk_i32 s0, 0x90 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_addc_co_u32_e32 v75, vcc, v67, v3, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_add_co_u32_e32 v76, vcc, v66, v0 +; GCN-NEXT: v_addc_co_u32_e32 v77, vcc, v67, v1, vcc +; GCN-NEXT: global_load_dwordx4 v[4:7], v[12:13], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[0:3], v[12:13], off +; GCN-NEXT: v_add_co_u32_e32 v10, vcc, 64, v8 +; GCN-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v9, vcc +; GCN-NEXT: v_add_co_u32_e32 v28, vcc, v8, v64 +; GCN-NEXT: v_addc_co_u32_e32 v29, vcc, v9, v65, vcc +; GCN-NEXT: global_load_dwordx4 v[32:35], v[8:9], off +; GCN-NEXT: global_load_dwordx4 v[36:39], v[8:9], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[40:43], v[8:9], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[44:47], v[8:9], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[48:51], v[10:11], off +; GCN-NEXT: global_load_dwordx4 v[52:55], v[10:11], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[56:59], v[10:11], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[60:63], v[10:11], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[8:11], v[12:13], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[16:19], v[28:29], off +; GCN-NEXT: global_load_dwordx4 v[20:23], v[28:29], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[24:27], v[28:29], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[28:31], v[28:29], off offset:48 +; GCN-NEXT: s_movk_i32 s0, 0xa0 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: v_mov_b32_e32 v5, 0x3e7 +; GCN-NEXT: s_waitcnt vmcnt(14) +; GCN-NEXT: global_store_dwordx4 v[74:75], v[0:3], off +; GCN-NEXT: global_store_dwordx4 v[76:77], v[4:7], off +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v0 +; GCN-NEXT: s_movk_i32 s0, 0xb0 +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v1, vcc +; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v66, v2 +; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v67, v3, vcc +; GCN-NEXT: s_waitcnt vmcnt(7) +; GCN-NEXT: global_store_dwordx4 v[0:1], v[8:11], off +; GCN-NEXT: s_waitcnt vmcnt(7) +; GCN-NEXT: global_store_dwordx4 v[2:3], v[12:15], off +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v64 +; GCN-NEXT: s_movk_i32 s0, 0xd0 +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v65, vcc +; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v66, v2 +; GCN-NEXT: s_movk_i32 s0, 0xe0 +; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v67, v3, vcc +; GCN-NEXT: s_waitcnt vmcnt(7) +; GCN-NEXT: global_store_dwordx4 v[0:1], v[16:19], off +; GCN-NEXT: s_waitcnt vmcnt(7) +; GCN-NEXT: global_store_dwordx4 v[2:3], v[20:23], off +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v0 +; GCN-NEXT: s_movk_i32 s0, 0xf0 +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v66, v2 +; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v67, v3, vcc +; GCN-NEXT: s_waitcnt vmcnt(7) +; GCN-NEXT: global_store_dwordx4 v[0:1], v[24:27], off +; GCN-NEXT: s_waitcnt vmcnt(7) +; GCN-NEXT: global_store_dwordx4 v[2:3], v[28:31], off +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, 64, v66 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v67, vcc +; GCN-NEXT: global_store_dwordx4 v[0:1], v[36:39], off offset:-48 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[40:43], off offset:-32 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[44:47], off offset:-16 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[48:51], off +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v68 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v69, vcc +; GCN-NEXT: global_store_dwordx4 v[66:67], v[32:35], off +; GCN-NEXT: global_store_dwordx4 v[0:1], v[52:55], off +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v70 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v71, vcc +; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v66, v72 +; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v67, v73, vcc +; GCN-NEXT: global_store_dwordx4 v[0:1], v[56:59], off +; GCN-NEXT: global_store_dwordx4 v[2:3], v[60:63], off +; GCN-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.workitem.id.x() + %gep.in = getelementptr <64 x i32>, <64 x i32> addrspace(1)* %ptr.in, i32 %id + %vec = load <64 x i32>, <64 x i32> addrspace(1)* %gep.in + %insert = insertelement <64 x i32> %vec, i32 999, i32 37 + %gep.out = getelementptr <64 x i32>, <64 x i32> addrspace(1)* %ptr.out, i32 %id + store <64 x i32> %insert, <64 x i32> addrspace(1)* %gep.out + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { "amdgpu-waves-per-eu"="1,10" } +attributes #1 = { nounwind readnone speculatable willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir new file mode 100644 index 00000000000000..4e45fe689dd74f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir @@ -0,0 +1,46 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSAPAL %s +# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSAPAL %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MESA %s + +--- +name: groupstaticsize_v +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + ldsSize: 4096 + +body: | + bb.0: + + ; HSAPAL-LABEL: name: groupstaticsize_v + ; HSAPAL: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; HSAPAL: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; MESA-LABEL: name: groupstaticsize_v + ; MESA: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @llvm.amdgcn.groupstaticsize, implicit $exec + ; MESA: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + S_ENDPGM 0, implicit %0 +... + +--- +name: groupstaticsize_s +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + ldsSize: 1024 + +body: | + bb.0: + + ; HSAPAL-LABEL: name: groupstaticsize_s + ; HSAPAL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; HSAPAL: S_ENDPGM 0, implicit [[S_MOV_B32_]] + ; MESA-LABEL: name: groupstaticsize_s + ; MESA: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @llvm.amdgcn.groupstaticsize + ; MESA: S_ENDPGM 0, implicit [[S_MOV_B32_]] + %0:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + S_ENDPGM 0, implicit %0 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir index 81437acbbbc53b..7907608432ff19 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir @@ -420,34 +420,35 @@ regBankSelected: true tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr0 ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 - ; WAVE64: liveins: $vgpr0 + ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B64_]] ; WAVE64: S_ENDPGM 0, implicit [[COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 - ; WAVE32: liveins: $vgpr0 + ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_1]] ; WAVE32: S_ENDPGM 0, implicit [[COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) - %2:sgpr(s1) = G_CONSTANT i1 true + %sgpr0:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s1) = G_TRUNC %sgpr0 %6:sgpr(s32) = G_CONSTANT i32 0 %7:sgpr(p1) = G_IMPLICIT_DEF %9:vcc(s1) = COPY %0(s1) @@ -466,34 +467,35 @@ regBankSelected: true tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr0 ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 - ; WAVE64: liveins: $vgpr0 + ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_AND_B64_]] ; WAVE64: S_ENDPGM 0, implicit [[COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 - ; WAVE32: liveins: $vgpr0 + ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_AND_B32_1]] ; WAVE32: S_ENDPGM 0, implicit [[COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) - %2:sgpr(s1) = G_CONSTANT i1 true + %sgpr0:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s1) = G_TRUNC %sgpr0 %6:sgpr(s32) = G_CONSTANT i32 0 %7:sgpr(p1) = G_IMPLICIT_DEF %9:vcc(s1) = COPY %0(s1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir index c8762c0d578eb0..20b886ebdadfa3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=WAVE64 +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=WAVE32 --- name: constant_v_s32 @@ -9,13 +10,21 @@ tracksRegLiveness: true body: | bb.0: - ; GCN-LABEL: name: constant_v_s32 - ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GCN: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec - ; GCN: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + ; WAVE64-LABEL: name: constant_v_s32 + ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + ; WAVE32-LABEL: name: constant_v_s32 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] %0:vgpr(s32) = G_CONSTANT i32 0 %1:vgpr(s32) = G_CONSTANT i32 1 %2:vgpr(s32) = G_CONSTANT i32 -1 @@ -32,13 +41,21 @@ tracksRegLiveness: true body: | bb.0: - ; GCN-LABEL: name: constant_s_s32 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 - ; GCN: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 - ; GCN: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + ; WAVE64-LABEL: name: constant_s_s32 + ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE64: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 + ; WAVE64: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 + ; WAVE64: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + ; WAVE32-LABEL: name: constant_s_s32 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE32: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 + ; WAVE32: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 + ; WAVE32: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] %0:sgpr(s32) = G_CONSTANT i32 0 %1:sgpr(s32) = G_CONSTANT i32 1 %2:sgpr(s32) = G_CONSTANT i32 -1 @@ -47,22 +64,67 @@ body: | S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 ... -# FIXME -# --- -# name: constant_v_s16 -# legalized: true -# regBankSelected: true -# tracksRegLiveness: true - -# body: | -# bb.0: -# %0:vgpry(s16) = G_CONSTANT i16 0 -# %1:vgpr(s16) = G_CONSTANT i16 1 -# %2:vgpr(s16) = G_CONSTANT i16 -1 -# %3:vgpr(s16) = G_CONSTANT i16 -54 -# %4:vgpr(s16) = G_CONSTANT i16 27 -# S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 -# ... +--- +name: constant_v_s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + ; WAVE64-LABEL: name: constant_v_s16 + ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + ; WAVE32-LABEL: name: constant_v_s16 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + %0:vgpr(s16) = G_CONSTANT i16 0 + %1:vgpr(s16) = G_CONSTANT i16 1 + %2:vgpr(s16) = G_CONSTANT i16 -1 + %3:vgpr(s16) = G_CONSTANT i16 -54 + %4:vgpr(s16) = G_CONSTANT i16 27 + S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 +... + +--- +name: constant_s_s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + ; WAVE64-LABEL: name: constant_s_s16 + ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE64: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 + ; WAVE64: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 + ; WAVE64: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + ; WAVE32-LABEL: name: constant_s_s16 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE32: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 + ; WAVE32: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 + ; WAVE32: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + %0:sgpr(s16) = G_CONSTANT i16 0 + %1:sgpr(s16) = G_CONSTANT i16 1 + %2:sgpr(s16) = G_CONSTANT i16 -1 + %3:sgpr(s16) = G_CONSTANT i16 -54 + %4:sgpr(s16) = G_CONSTANT i16 27 + S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 +... --- name: constant_v_s64 @@ -72,32 +134,59 @@ tracksRegLiveness: true body: | bb.0: - ; GCN-LABEL: name: constant_v_s64 - ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1 - ; GCN: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; GCN: [[V_MOV_B32_e32_5:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GCN: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_4]], %subreg.sub0, [[V_MOV_B32_e32_5]], %subreg.sub1 - ; GCN: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967242, implicit $exec - ; GCN: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GCN: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 - ; GCN: [[V_MOV_B32_e32_8:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec - ; GCN: [[V_MOV_B32_e32_9:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_8]], %subreg.sub0, [[V_MOV_B32_e32_9]], %subreg.sub1 - ; GCN: [[V_MOV_B32_e32_10:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; GCN: [[V_MOV_B32_e32_11:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_10]], %subreg.sub0, [[V_MOV_B32_e32_11]], %subreg.sub1 - ; GCN: [[V_MOV_B32_e32_12:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[V_MOV_B32_e32_13:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: [[REG_SEQUENCE6:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_12]], %subreg.sub0, [[V_MOV_B32_e32_13]], %subreg.sub1 - ; GCN: [[V_MOV_B32_e32_14:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec - ; GCN: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; GCN: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 - ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] + ; WAVE64-LABEL: name: constant_v_s64 + ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; WAVE64: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1 + ; WAVE64: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_5:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE64: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_4]], %subreg.sub0, [[V_MOV_B32_e32_5]], %subreg.sub1 + ; WAVE64: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967242, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE64: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 + ; WAVE64: [[V_MOV_B32_e32_8:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_9:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_8]], %subreg.sub0, [[V_MOV_B32_e32_9]], %subreg.sub1 + ; WAVE64: [[V_MOV_B32_e32_10:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_11:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_10]], %subreg.sub0, [[V_MOV_B32_e32_11]], %subreg.sub1 + ; WAVE64: [[V_MOV_B32_e32_12:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_13:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE64: [[REG_SEQUENCE6:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_12]], %subreg.sub0, [[V_MOV_B32_e32_13]], %subreg.sub1 + ; WAVE64: [[V_MOV_B32_e32_14:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec + ; WAVE64: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 + ; WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] + ; WAVE32-LABEL: name: constant_v_s64 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; WAVE32: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1 + ; WAVE32: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_5:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE32: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_4]], %subreg.sub0, [[V_MOV_B32_e32_5]], %subreg.sub1 + ; WAVE32: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967242, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE32: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 + ; WAVE32: [[V_MOV_B32_e32_8:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_9:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_8]], %subreg.sub0, [[V_MOV_B32_e32_9]], %subreg.sub1 + ; WAVE32: [[V_MOV_B32_e32_10:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_11:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_10]], %subreg.sub0, [[V_MOV_B32_e32_11]], %subreg.sub1 + ; WAVE32: [[V_MOV_B32_e32_12:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_13:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE32: [[REG_SEQUENCE6:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_12]], %subreg.sub0, [[V_MOV_B32_e32_13]], %subreg.sub1 + ; WAVE32: [[V_MOV_B32_e32_14:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec + ; WAVE32: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 + ; WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] %0:vgpr(s64) = G_CONSTANT i64 0 %1:vgpr(s64) = G_CONSTANT i64 1 %2:vgpr(s64) = G_CONSTANT i64 -1 @@ -117,24 +206,43 @@ tracksRegLiveness: true body: | bb.0: - ; GCN-LABEL: name: constant_s_s64 - ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GCN: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 - ; GCN: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967242 - ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GCN: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27 - ; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 - ; GCN: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GCN: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_4]], %subreg.sub0, [[S_MOV_B32_5]], %subreg.sub1 - ; GCN: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 - ; GCN: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 - ; GCN: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 - ; GCN: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; WAVE64-LABEL: name: constant_s_s64 + ; WAVE64: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; WAVE64: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 + ; WAVE64: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967242 + ; WAVE64: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; WAVE64: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27 + ; WAVE64: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; WAVE64: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 + ; WAVE64: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE64: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_4]], %subreg.sub0, [[S_MOV_B32_5]], %subreg.sub1 + ; WAVE64: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 + ; WAVE64: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 + ; WAVE64: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 + ; WAVE64: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; WAVE32-LABEL: name: constant_s_s64 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; WAVE32: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 + ; WAVE32: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967242 + ; WAVE32: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; WAVE32: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27 + ; WAVE32: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; WAVE32: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 + ; WAVE32: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE32: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_4]], %subreg.sub0, [[S_MOV_B32_5]], %subreg.sub1 + ; WAVE32: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 + ; WAVE32: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 + ; WAVE32: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 + ; WAVE32: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] %0:sgpr(s64) = G_CONSTANT i64 0 %1:sgpr(s64) = G_CONSTANT i64 1 %2:sgpr(s64) = G_CONSTANT i64 -1 @@ -145,3 +253,27 @@ body: | %7:sgpr(s64) = G_CONSTANT i64 18446744004990098135 S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7 ... + +--- + +name: constant_i1_vcc +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + ; WAVE64-LABEL: name: constant_i1_vcc + ; WAVE64: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; WAVE64: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; WAVE64: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]] + ; WAVE32-LABEL: name: constant_i1_vcc + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]] + %0:vcc(s1) = G_CONSTANT i1 true + %1:vcc(s1) = G_CONSTANT i1 false + S_ENDPGM 0 , implicit %0 , implicit %1 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir index 9afa4b08c0ecb8..96e65617e33608 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir @@ -14,12 +14,16 @@ body: | ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1090519040, implicit $exec ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec ; GCN: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1090519040, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]] + ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]] + ; GCN: $vgpr1 = COPY [[V_MOV_B32_e32_1]] + ; GCN: S_ENDPGM 0, implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]] %0:vgpr(s32) = G_FCONSTANT float 1.0 %1:vgpr(s32) = G_FCONSTANT float 8.0 %2:vgpr(s32) = G_FCONSTANT float 1.0 %3:vgpr(s32) = G_FCONSTANT float 8.0 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2 , implicit %3 + $vgpr0 = COPY %0 + $vgpr1 = COPY %1 + S_ENDPGM 0, implicit %2 , implicit %3 ... --- @@ -37,14 +41,14 @@ body: | ; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 3238002688 ; GCN: $sgpr0 = COPY [[S_MOV_B32_]] ; GCN: $sgpr1 = COPY [[S_MOV_B32_1]] - ; GCN: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]] + ; GCN: S_ENDPGM 0, implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]] %0:sgpr(s32) = G_FCONSTANT float 1.0 %1:sgpr(s32) = G_FCONSTANT float 8.0 %2:sgpr(s32) = G_FCONSTANT float -1.0 %3:sgpr(s32) = G_FCONSTANT float -8.0 $sgpr0 = COPY %0 $sgpr1 = COPY %1 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2 , implicit %3 + S_ENDPGM 0, implicit %2 , implicit %3 ... @@ -71,14 +75,14 @@ body: | ; GCN: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 ; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] ; GCN: $vgpr2_vgpr3 = COPY [[REG_SEQUENCE1]] - ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] %0:vgpr(s64) = G_FCONSTANT double 1.0 %1:vgpr(s64) = G_FCONSTANT double 8.0 %2:vgpr(s64) = G_FCONSTANT double -2.0 %3:vgpr(s64) = G_FCONSTANT double 10.0 $vgpr0_vgpr1 = COPY %0 $vgpr2_vgpr3 = COPY %1 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2 , implicit %3 + S_ENDPGM 0, implicit %2 , implicit %3 ... @@ -122,14 +126,22 @@ body: | ; GCN-LABEL: name: fconstant_v_s16 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 15360, implicit $exec ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 18432, implicit $exec + ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 15360, implicit $exec + ; GCN: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 18432, implicit $exec ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]] ; GCN: $vgpr1 = COPY [[V_MOV_B32_e32_1]] + ; GCN: S_ENDPGM 0, implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]] %0:vgpr(s16) = G_FCONSTANT half 1.0 %1:vgpr(s16) = G_FCONSTANT half 8.0 %2:vgpr(s32) = G_ANYEXT %0 %3:vgpr(s32) = G_ANYEXT %1 + + ; Test without already assigned register class + %4:vgpr(s16) = G_FCONSTANT half 1.0 + %5:vgpr(s16) = G_FCONSTANT half 8.0 $vgpr0 = COPY %2 $vgpr1 = COPY %3 + S_ENDPGM 0, implicit %4, implicit %5 ... @@ -146,14 +158,21 @@ body: | ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 18432 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 15360 + ; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 18432 ; GCN: $sgpr0 = COPY [[COPY]] ; GCN: $sgpr1 = COPY [[COPY1]] + ; GCN: S_ENDPGM 0, implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]] %0:sgpr(s16) = G_FCONSTANT half 1.0 %1:sgpr(s16) = G_FCONSTANT half 8.0 %2:vgpr(s32) = G_ANYEXT %0 %3:vgpr(s32) = G_ANYEXT %1 + + ; Test without already assigned register class + %4:sgpr(s16) = G_FCONSTANT half 1.0 + %5:sgpr(s16) = G_FCONSTANT half 8.0 $sgpr0 = COPY %2 $sgpr1 = COPY %3 + S_ENDPGM 0, implicit %4, implicit %5 ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir index 7f1f52d2c522ac..966bb8c629500a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir @@ -420,34 +420,35 @@ regBankSelected: true tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr0 ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 - ; WAVE64: liveins: $vgpr0 + ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B64_]] ; WAVE64: S_ENDPGM 0, implicit [[COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 - ; WAVE32: liveins: $vgpr0 + ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]] ; WAVE32: S_ENDPGM 0, implicit [[COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) - %2:sgpr(s1) = G_CONSTANT i1 true + %sgpr0:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s1) = G_TRUNC %sgpr0 %6:sgpr(s32) = G_CONSTANT i32 0 %7:sgpr(p1) = G_IMPLICIT_DEF %9:vcc(s1) = COPY %0(s1) @@ -466,34 +467,35 @@ regBankSelected: true tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr0 ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 - ; WAVE64: liveins: $vgpr0 + ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B64_]] ; WAVE64: S_ENDPGM 0, implicit [[COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 - ; WAVE32: liveins: $vgpr0 + ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B32_]] ; WAVE32: S_ENDPGM 0, implicit [[COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) - %2:sgpr(s1) = G_CONSTANT i1 true + %sgpr0:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s1) = G_TRUNC %sgpr0 %6:sgpr(s32) = G_CONSTANT i32 0 %7:sgpr(p1) = G_IMPLICIT_DEF %9:vcc(s1) = COPY %0(s1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir index f923a4c9f02b81..0364cb736c601a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir @@ -421,34 +421,35 @@ regBankSelected: true tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr0 ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 - ; WAVE64: liveins: $vgpr0 + ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B64_]] ; WAVE64: S_ENDPGM 0, implicit [[COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 - ; WAVE32: liveins: $vgpr0 + ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]] ; WAVE32: S_ENDPGM 0, implicit [[COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) - %2:sgpr(s1) = G_CONSTANT i1 true + %sgpr0:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s1) = G_TRUNC %sgpr0 %6:sgpr(s32) = G_CONSTANT i32 0 %7:sgpr(p1) = G_IMPLICIT_DEF %9:vcc(s1) = COPY %0(s1) @@ -467,34 +468,35 @@ regBankSelected: true tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr0 ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 - ; WAVE64: liveins: $vgpr0 + ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_XOR_B64_]] ; WAVE64: S_ENDPGM 0, implicit [[COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 - ; WAVE32: liveins: $vgpr0 + ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_XOR_B32_]] ; WAVE32: S_ENDPGM 0, implicit [[COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) - %2:sgpr(s1) = G_CONSTANT i1 true + %sgpr0:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s1) = G_TRUNC %sgpr0 %6:sgpr(s32) = G_CONSTANT i32 0 %7:sgpr(p1) = G_IMPLICIT_DEF %9:vcc(s1) = COPY %0(s1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll index d53cfe688f53cf..f244a840476daf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll @@ -49,9 +49,12 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C5]](s32) ; GCN: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN: $vgpr0 = COPY [[FRAME_INDEX1]](p5) - ; GCN: $vgpr1 = COPY [[FRAME_INDEX]](p5) - ; GCN: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; GCN: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg + ; GCN: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32) + ; GCN: G_STORE [[FRAME_INDEX]](p5), [[PTR_ADD2]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; GCN: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) ; GCN: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; GCN: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; GCN: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) @@ -60,11 +63,11 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN: $sgpr13 = COPY [[COPY16]](s32) ; GCN: $sgpr14 = COPY [[COPY17]](s32) ; GCN: $vgpr31 = COPY [[OR1]](s32) - ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN: ADJCALLSTACKDOWN 0, 8, implicit-def $scc + ; GCN: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) ; GCN: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load 1 from %ir.out.gep02, addrspace 5) - ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (dereferenceable load 4 from %ir.out.gep1, addrspace 5) + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (dereferenceable load 4 from %ir.out.gep1, addrspace 5) ; GCN: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) ; GCN: G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index 3f3ec6216585b2..6d0d24e2373eaa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -190,712 +190,94 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_64_65_v64s32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 4, addrspace 4) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 64 + 64, align 4, addrspace 4) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 64 + 128, align 4, addrspace 4) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 + 192, align 4, addrspace 4) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) - ; CHECK: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store 4 into %stack.1, align 256, addrspace 5) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[PTR_ADD3]](p5) - ; CHECK: G_STORE [[UV1]](s32), [[COPY1]](p5) :: (store 4 into %stack.1 + 4, align 256, addrspace 5) - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p5) = COPY [[PTR_ADD4]](p5) - ; CHECK: G_STORE [[UV2]](s32), [[COPY2]](p5) :: (store 4 into %stack.1 + 8, align 256, addrspace 5) - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(p5) = COPY [[PTR_ADD5]](p5) - ; CHECK: G_STORE [[UV3]](s32), [[COPY3]](p5) :: (store 4 into %stack.1 + 12, align 256, addrspace 5) - ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(p5) = COPY [[PTR_ADD6]](p5) - ; CHECK: G_STORE [[UV4]](s32), [[COPY4]](p5) :: (store 4 into %stack.1 + 16, align 256, addrspace 5) - ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(p5) = COPY [[PTR_ADD7]](p5) - ; CHECK: G_STORE [[UV5]](s32), [[COPY5]](p5) :: (store 4 into %stack.1 + 20, align 256, addrspace 5) - ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(p5) = COPY [[PTR_ADD8]](p5) - ; CHECK: G_STORE [[UV6]](s32), [[COPY6]](p5) :: (store 4 into %stack.1 + 24, align 256, addrspace 5) - ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(p5) = COPY [[PTR_ADD9]](p5) - ; CHECK: G_STORE [[UV7]](s32), [[COPY7]](p5) :: (store 4 into %stack.1 + 28, align 256, addrspace 5) - ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(p5) = COPY [[PTR_ADD10]](p5) - ; CHECK: G_STORE [[UV8]](s32), [[COPY8]](p5) :: (store 4 into %stack.1 + 32, align 256, addrspace 5) - ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(p5) = COPY [[PTR_ADD11]](p5) - ; CHECK: G_STORE [[UV9]](s32), [[COPY9]](p5) :: (store 4 into %stack.1 + 36, align 256, addrspace 5) - ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(p5) = COPY [[PTR_ADD12]](p5) - ; CHECK: G_STORE [[UV10]](s32), [[COPY10]](p5) :: (store 4 into %stack.1 + 40, align 256, addrspace 5) - ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(p5) = COPY [[PTR_ADD13]](p5) - ; CHECK: G_STORE [[UV11]](s32), [[COPY11]](p5) :: (store 4 into %stack.1 + 44, align 256, addrspace 5) - ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(p5) = COPY [[PTR_ADD14]](p5) - ; CHECK: G_STORE [[UV12]](s32), [[COPY12]](p5) :: (store 4 into %stack.1 + 48, align 256, addrspace 5) - ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) - ; CHECK: [[COPY13:%[0-9]+]]:_(p5) = COPY [[PTR_ADD15]](p5) - ; CHECK: G_STORE [[UV13]](s32), [[COPY13]](p5) :: (store 4 into %stack.1 + 52, align 256, addrspace 5) - ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(p5) = COPY [[PTR_ADD16]](p5) - ; CHECK: G_STORE [[UV14]](s32), [[COPY14]](p5) :: (store 4 into %stack.1 + 56, align 256, addrspace 5) - ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) - ; CHECK: [[COPY15:%[0-9]+]]:_(p5) = COPY [[PTR_ADD17]](p5) - ; CHECK: G_STORE [[UV15]](s32), [[COPY15]](p5) :: (store 4 into %stack.1 + 60, align 256, addrspace 5) - ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:_(p5) = COPY [[PTR_ADD18]](p5) - ; CHECK: G_STORE [[UV16]](s32), [[COPY16]](p5) :: (store 4 into %stack.1 + 64, align 256, addrspace 5) - ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 - ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) - ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY [[PTR_ADD19]](p5) - ; CHECK: G_STORE [[UV17]](s32), [[COPY17]](p5) :: (store 4 into %stack.1 + 68, align 256, addrspace 5) - ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 - ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) - ; CHECK: [[COPY18:%[0-9]+]]:_(p5) = COPY [[PTR_ADD20]](p5) - ; CHECK: G_STORE [[UV18]](s32), [[COPY18]](p5) :: (store 4 into %stack.1 + 72, align 256, addrspace 5) - ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 - ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) - ; CHECK: [[COPY19:%[0-9]+]]:_(p5) = COPY [[PTR_ADD21]](p5) - ; CHECK: G_STORE [[UV19]](s32), [[COPY19]](p5) :: (store 4 into %stack.1 + 76, align 256, addrspace 5) - ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 - ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) - ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY [[PTR_ADD22]](p5) - ; CHECK: G_STORE [[UV20]](s32), [[COPY20]](p5) :: (store 4 into %stack.1 + 80, align 256, addrspace 5) - ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 - ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) - ; CHECK: [[COPY21:%[0-9]+]]:_(p5) = COPY [[PTR_ADD23]](p5) - ; CHECK: G_STORE [[UV21]](s32), [[COPY21]](p5) :: (store 4 into %stack.1 + 84, align 256, addrspace 5) - ; CHECK: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 - ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) - ; CHECK: [[COPY22:%[0-9]+]]:_(p5) = COPY [[PTR_ADD24]](p5) - ; CHECK: G_STORE [[UV22]](s32), [[COPY22]](p5) :: (store 4 into %stack.1 + 88, align 256, addrspace 5) - ; CHECK: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 - ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) - ; CHECK: [[COPY23:%[0-9]+]]:_(p5) = COPY [[PTR_ADD25]](p5) - ; CHECK: G_STORE [[UV23]](s32), [[COPY23]](p5) :: (store 4 into %stack.1 + 92, align 256, addrspace 5) - ; CHECK: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 - ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) - ; CHECK: [[COPY24:%[0-9]+]]:_(p5) = COPY [[PTR_ADD26]](p5) - ; CHECK: G_STORE [[UV24]](s32), [[COPY24]](p5) :: (store 4 into %stack.1 + 96, align 256, addrspace 5) - ; CHECK: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) - ; CHECK: [[COPY25:%[0-9]+]]:_(p5) = COPY [[PTR_ADD27]](p5) - ; CHECK: G_STORE [[UV25]](s32), [[COPY25]](p5) :: (store 4 into %stack.1 + 100, align 256, addrspace 5) - ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 - ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) - ; CHECK: [[COPY26:%[0-9]+]]:_(p5) = COPY [[PTR_ADD28]](p5) - ; CHECK: G_STORE [[UV26]](s32), [[COPY26]](p5) :: (store 4 into %stack.1 + 104, align 256, addrspace 5) - ; CHECK: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 - ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) - ; CHECK: [[COPY27:%[0-9]+]]:_(p5) = COPY [[PTR_ADD29]](p5) - ; CHECK: G_STORE [[UV27]](s32), [[COPY27]](p5) :: (store 4 into %stack.1 + 108, align 256, addrspace 5) - ; CHECK: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 - ; CHECK: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) - ; CHECK: [[COPY28:%[0-9]+]]:_(p5) = COPY [[PTR_ADD30]](p5) - ; CHECK: G_STORE [[UV28]](s32), [[COPY28]](p5) :: (store 4 into %stack.1 + 112, align 256, addrspace 5) - ; CHECK: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 - ; CHECK: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) - ; CHECK: [[COPY29:%[0-9]+]]:_(p5) = COPY [[PTR_ADD31]](p5) - ; CHECK: G_STORE [[UV29]](s32), [[COPY29]](p5) :: (store 4 into %stack.1 + 116, align 256, addrspace 5) - ; CHECK: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 - ; CHECK: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) - ; CHECK: [[COPY30:%[0-9]+]]:_(p5) = COPY [[PTR_ADD32]](p5) - ; CHECK: G_STORE [[UV30]](s32), [[COPY30]](p5) :: (store 4 into %stack.1 + 120, align 256, addrspace 5) - ; CHECK: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 - ; CHECK: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) - ; CHECK: [[COPY31:%[0-9]+]]:_(p5) = COPY [[PTR_ADD33]](p5) - ; CHECK: G_STORE [[UV31]](s32), [[COPY31]](p5) :: (store 4 into %stack.1 + 124, align 256, addrspace 5) - ; CHECK: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) - ; CHECK: [[COPY32:%[0-9]+]]:_(p5) = COPY [[PTR_ADD34]](p5) - ; CHECK: G_STORE [[UV32]](s32), [[COPY32]](p5) :: (store 4 into %stack.1 + 128, align 256, addrspace 5) - ; CHECK: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 - ; CHECK: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) - ; CHECK: [[COPY33:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) - ; CHECK: G_STORE [[UV33]](s32), [[COPY33]](p5) :: (store 4 into %stack.1 + 132, align 256, addrspace 5) - ; CHECK: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) - ; CHECK: [[COPY34:%[0-9]+]]:_(p5) = COPY [[PTR_ADD36]](p5) - ; CHECK: G_STORE [[UV34]](s32), [[COPY34]](p5) :: (store 4 into %stack.1 + 136, align 256, addrspace 5) - ; CHECK: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 - ; CHECK: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) - ; CHECK: [[COPY35:%[0-9]+]]:_(p5) = COPY [[PTR_ADD37]](p5) - ; CHECK: G_STORE [[UV35]](s32), [[COPY35]](p5) :: (store 4 into %stack.1 + 140, align 256, addrspace 5) - ; CHECK: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) - ; CHECK: [[COPY36:%[0-9]+]]:_(p5) = COPY [[PTR_ADD38]](p5) - ; CHECK: G_STORE [[UV36]](s32), [[COPY36]](p5) :: (store 4 into %stack.1 + 144, align 256, addrspace 5) - ; CHECK: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 - ; CHECK: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) - ; CHECK: [[COPY37:%[0-9]+]]:_(p5) = COPY [[PTR_ADD39]](p5) - ; CHECK: G_STORE [[UV37]](s32), [[COPY37]](p5) :: (store 4 into %stack.1 + 148, align 256, addrspace 5) - ; CHECK: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 - ; CHECK: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) - ; CHECK: [[COPY38:%[0-9]+]]:_(p5) = COPY [[PTR_ADD40]](p5) - ; CHECK: G_STORE [[UV38]](s32), [[COPY38]](p5) :: (store 4 into %stack.1 + 152, align 256, addrspace 5) - ; CHECK: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 - ; CHECK: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) - ; CHECK: [[COPY39:%[0-9]+]]:_(p5) = COPY [[PTR_ADD41]](p5) - ; CHECK: G_STORE [[UV39]](s32), [[COPY39]](p5) :: (store 4 into %stack.1 + 156, align 256, addrspace 5) - ; CHECK: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 - ; CHECK: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) - ; CHECK: [[COPY40:%[0-9]+]]:_(p5) = COPY [[PTR_ADD42]](p5) - ; CHECK: G_STORE [[UV40]](s32), [[COPY40]](p5) :: (store 4 into %stack.1 + 160, align 256, addrspace 5) - ; CHECK: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 - ; CHECK: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) - ; CHECK: [[COPY41:%[0-9]+]]:_(p5) = COPY [[PTR_ADD43]](p5) - ; CHECK: G_STORE [[UV41]](s32), [[COPY41]](p5) :: (store 4 into %stack.1 + 164, align 256, addrspace 5) - ; CHECK: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 - ; CHECK: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) - ; CHECK: [[COPY42:%[0-9]+]]:_(p5) = COPY [[PTR_ADD44]](p5) - ; CHECK: G_STORE [[UV42]](s32), [[COPY42]](p5) :: (store 4 into %stack.1 + 168, align 256, addrspace 5) - ; CHECK: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 - ; CHECK: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) - ; CHECK: [[COPY43:%[0-9]+]]:_(p5) = COPY [[PTR_ADD45]](p5) - ; CHECK: G_STORE [[UV43]](s32), [[COPY43]](p5) :: (store 4 into %stack.1 + 172, align 256, addrspace 5) - ; CHECK: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 - ; CHECK: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) - ; CHECK: [[COPY44:%[0-9]+]]:_(p5) = COPY [[PTR_ADD46]](p5) - ; CHECK: G_STORE [[UV44]](s32), [[COPY44]](p5) :: (store 4 into %stack.1 + 176, align 256, addrspace 5) - ; CHECK: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 - ; CHECK: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) - ; CHECK: [[COPY45:%[0-9]+]]:_(p5) = COPY [[PTR_ADD47]](p5) - ; CHECK: G_STORE [[UV45]](s32), [[COPY45]](p5) :: (store 4 into %stack.1 + 180, align 256, addrspace 5) - ; CHECK: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 - ; CHECK: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) - ; CHECK: [[COPY46:%[0-9]+]]:_(p5) = COPY [[PTR_ADD48]](p5) - ; CHECK: G_STORE [[UV46]](s32), [[COPY46]](p5) :: (store 4 into %stack.1 + 184, align 256, addrspace 5) - ; CHECK: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 - ; CHECK: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) - ; CHECK: [[COPY47:%[0-9]+]]:_(p5) = COPY [[PTR_ADD49]](p5) - ; CHECK: G_STORE [[UV47]](s32), [[COPY47]](p5) :: (store 4 into %stack.1 + 188, align 256, addrspace 5) - ; CHECK: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 - ; CHECK: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) - ; CHECK: [[COPY48:%[0-9]+]]:_(p5) = COPY [[PTR_ADD50]](p5) - ; CHECK: G_STORE [[UV48]](s32), [[COPY48]](p5) :: (store 4 into %stack.1 + 192, align 256, addrspace 5) - ; CHECK: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 - ; CHECK: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) - ; CHECK: [[COPY49:%[0-9]+]]:_(p5) = COPY [[PTR_ADD51]](p5) - ; CHECK: G_STORE [[UV49]](s32), [[COPY49]](p5) :: (store 4 into %stack.1 + 196, align 256, addrspace 5) - ; CHECK: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 - ; CHECK: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) - ; CHECK: [[COPY50:%[0-9]+]]:_(p5) = COPY [[PTR_ADD52]](p5) - ; CHECK: G_STORE [[UV50]](s32), [[COPY50]](p5) :: (store 4 into %stack.1 + 200, align 256, addrspace 5) - ; CHECK: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 - ; CHECK: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) - ; CHECK: [[COPY51:%[0-9]+]]:_(p5) = COPY [[PTR_ADD53]](p5) - ; CHECK: G_STORE [[UV51]](s32), [[COPY51]](p5) :: (store 4 into %stack.1 + 204, align 256, addrspace 5) - ; CHECK: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 - ; CHECK: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) - ; CHECK: [[COPY52:%[0-9]+]]:_(p5) = COPY [[PTR_ADD54]](p5) - ; CHECK: G_STORE [[UV52]](s32), [[COPY52]](p5) :: (store 4 into %stack.1 + 208, align 256, addrspace 5) - ; CHECK: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 - ; CHECK: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) - ; CHECK: [[COPY53:%[0-9]+]]:_(p5) = COPY [[PTR_ADD55]](p5) - ; CHECK: G_STORE [[UV53]](s32), [[COPY53]](p5) :: (store 4 into %stack.1 + 212, align 256, addrspace 5) - ; CHECK: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 - ; CHECK: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) - ; CHECK: [[COPY54:%[0-9]+]]:_(p5) = COPY [[PTR_ADD56]](p5) - ; CHECK: G_STORE [[UV54]](s32), [[COPY54]](p5) :: (store 4 into %stack.1 + 216, align 256, addrspace 5) - ; CHECK: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 - ; CHECK: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) - ; CHECK: [[COPY55:%[0-9]+]]:_(p5) = COPY [[PTR_ADD57]](p5) - ; CHECK: G_STORE [[UV55]](s32), [[COPY55]](p5) :: (store 4 into %stack.1 + 220, align 256, addrspace 5) - ; CHECK: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 - ; CHECK: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) - ; CHECK: [[COPY56:%[0-9]+]]:_(p5) = COPY [[PTR_ADD58]](p5) - ; CHECK: G_STORE [[UV56]](s32), [[COPY56]](p5) :: (store 4 into %stack.1 + 224, align 256, addrspace 5) - ; CHECK: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 - ; CHECK: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) - ; CHECK: [[COPY57:%[0-9]+]]:_(p5) = COPY [[PTR_ADD59]](p5) - ; CHECK: G_STORE [[UV57]](s32), [[COPY57]](p5) :: (store 4 into %stack.1 + 228, align 256, addrspace 5) - ; CHECK: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 - ; CHECK: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) - ; CHECK: [[COPY58:%[0-9]+]]:_(p5) = COPY [[PTR_ADD60]](p5) - ; CHECK: G_STORE [[UV58]](s32), [[COPY58]](p5) :: (store 4 into %stack.1 + 232, align 256, addrspace 5) - ; CHECK: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 - ; CHECK: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) - ; CHECK: [[COPY59:%[0-9]+]]:_(p5) = COPY [[PTR_ADD61]](p5) - ; CHECK: G_STORE [[UV59]](s32), [[COPY59]](p5) :: (store 4 into %stack.1 + 236, align 256, addrspace 5) - ; CHECK: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 - ; CHECK: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) - ; CHECK: [[COPY60:%[0-9]+]]:_(p5) = COPY [[PTR_ADD62]](p5) - ; CHECK: G_STORE [[UV60]](s32), [[COPY60]](p5) :: (store 4 into %stack.1 + 240, align 256, addrspace 5) - ; CHECK: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 - ; CHECK: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) - ; CHECK: [[COPY61:%[0-9]+]]:_(p5) = COPY [[PTR_ADD63]](p5) - ; CHECK: G_STORE [[UV61]](s32), [[COPY61]](p5) :: (store 4 into %stack.1 + 244, align 256, addrspace 5) - ; CHECK: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 - ; CHECK: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) - ; CHECK: [[COPY62:%[0-9]+]]:_(p5) = COPY [[PTR_ADD64]](p5) - ; CHECK: G_STORE [[UV62]](s32), [[COPY62]](p5) :: (store 4 into %stack.1 + 248, align 256, addrspace 5) - ; CHECK: [[C66:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 - ; CHECK: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C66]](s32) - ; CHECK: [[COPY63:%[0-9]+]]:_(p5) = COPY [[PTR_ADD65]](p5) - ; CHECK: G_STORE [[UV63]](s32), [[COPY63]](p5) :: (store 4 into %stack.1 + 252, align 256, addrspace 5) - ; CHECK: [[C67:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: [[PTR_ADD66:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C67]](s32) - ; CHECK: G_STORE [[C4]](s32), [[PTR_ADD66]](p5) :: (store 4 into %stack.1 + 256, align 256, addrspace 5) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load 4 from %stack.1 + 256, align 256, addrspace 5) - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from %stack.1 + 260, align 256, addrspace 5) - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from %stack.1 + 264, align 256, addrspace 5) - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 4 from %stack.1 + 268, align 256, addrspace 5) - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 4 from %stack.1 + 272, align 256, addrspace 5) - ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 4 from %stack.1 + 276, align 256, addrspace 5) - ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 4 from %stack.1 + 280, align 256, addrspace 5) - ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 4 from %stack.1 + 284, align 256, addrspace 5) - ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 4 from %stack.1 + 288, align 256, addrspace 5) - ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 4 from %stack.1 + 292, align 256, addrspace 5) - ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 4 from %stack.1 + 296, align 256, addrspace 5) - ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 4 from %stack.1 + 300, align 256, addrspace 5) - ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 4 from %stack.1 + 304, align 256, addrspace 5) - ; CHECK: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load 4 from %stack.1 + 308, align 256, addrspace 5) - ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load 4 from %stack.1 + 312, align 256, addrspace 5) - ; CHECK: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load 4 from %stack.1 + 316, align 256, addrspace 5) - ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load 4 from %stack.1 + 320, align 256, addrspace 5) - ; CHECK: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load 4 from %stack.1 + 324, align 256, addrspace 5) - ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load 4 from %stack.1 + 328, align 256, addrspace 5) - ; CHECK: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load 4 from %stack.1 + 332, align 256, addrspace 5) - ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load 4 from %stack.1 + 336, align 256, addrspace 5) - ; CHECK: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p5) :: (load 4 from %stack.1 + 340, align 256, addrspace 5) - ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p5) :: (load 4 from %stack.1 + 344, align 256, addrspace 5) - ; CHECK: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p5) :: (load 4 from %stack.1 + 348, align 256, addrspace 5) - ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p5) :: (load 4 from %stack.1 + 352, align 256, addrspace 5) - ; CHECK: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p5) :: (load 4 from %stack.1 + 356, align 256, addrspace 5) - ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p5) :: (load 4 from %stack.1 + 360, align 256, addrspace 5) - ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p5) :: (load 4 from %stack.1 + 364, align 256, addrspace 5) - ; CHECK: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p5) :: (load 4 from %stack.1 + 368, align 256, addrspace 5) - ; CHECK: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD31]](p5) :: (load 4 from %stack.1 + 372, align 256, addrspace 5) - ; CHECK: [[LOAD34:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD32]](p5) :: (load 4 from %stack.1 + 376, align 256, addrspace 5) - ; CHECK: [[LOAD35:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD33]](p5) :: (load 4 from %stack.1 + 380, align 256, addrspace 5) - ; CHECK: [[LOAD36:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD34]](p5) :: (load 4 from %stack.1 + 384, align 256, addrspace 5) - ; CHECK: [[LOAD37:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD35]](p5) :: (load 4 from %stack.1 + 388, align 256, addrspace 5) - ; CHECK: [[LOAD38:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD36]](p5) :: (load 4 from %stack.1 + 392, align 256, addrspace 5) - ; CHECK: [[LOAD39:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD37]](p5) :: (load 4 from %stack.1 + 396, align 256, addrspace 5) - ; CHECK: [[LOAD40:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD38]](p5) :: (load 4 from %stack.1 + 400, align 256, addrspace 5) - ; CHECK: [[LOAD41:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD39]](p5) :: (load 4 from %stack.1 + 404, align 256, addrspace 5) - ; CHECK: [[LOAD42:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD40]](p5) :: (load 4 from %stack.1 + 408, align 256, addrspace 5) - ; CHECK: [[LOAD43:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD41]](p5) :: (load 4 from %stack.1 + 412, align 256, addrspace 5) - ; CHECK: [[LOAD44:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD42]](p5) :: (load 4 from %stack.1 + 416, align 256, addrspace 5) - ; CHECK: [[LOAD45:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD43]](p5) :: (load 4 from %stack.1 + 420, align 256, addrspace 5) - ; CHECK: [[LOAD46:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD44]](p5) :: (load 4 from %stack.1 + 424, align 256, addrspace 5) - ; CHECK: [[LOAD47:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD45]](p5) :: (load 4 from %stack.1 + 428, align 256, addrspace 5) - ; CHECK: [[LOAD48:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD46]](p5) :: (load 4 from %stack.1 + 432, align 256, addrspace 5) - ; CHECK: [[LOAD49:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD47]](p5) :: (load 4 from %stack.1 + 436, align 256, addrspace 5) - ; CHECK: [[LOAD50:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD48]](p5) :: (load 4 from %stack.1 + 440, align 256, addrspace 5) - ; CHECK: [[LOAD51:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD49]](p5) :: (load 4 from %stack.1 + 444, align 256, addrspace 5) - ; CHECK: [[LOAD52:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD50]](p5) :: (load 4 from %stack.1 + 448, align 256, addrspace 5) - ; CHECK: [[LOAD53:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD51]](p5) :: (load 4 from %stack.1 + 452, align 256, addrspace 5) - ; CHECK: [[LOAD54:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD52]](p5) :: (load 4 from %stack.1 + 456, align 256, addrspace 5) - ; CHECK: [[LOAD55:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD53]](p5) :: (load 4 from %stack.1 + 460, align 256, addrspace 5) - ; CHECK: [[LOAD56:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD54]](p5) :: (load 4 from %stack.1 + 464, align 256, addrspace 5) - ; CHECK: [[LOAD57:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD55]](p5) :: (load 4 from %stack.1 + 468, align 256, addrspace 5) - ; CHECK: [[LOAD58:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD56]](p5) :: (load 4 from %stack.1 + 472, align 256, addrspace 5) - ; CHECK: [[LOAD59:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD57]](p5) :: (load 4 from %stack.1 + 476, align 256, addrspace 5) - ; CHECK: [[LOAD60:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD58]](p5) :: (load 4 from %stack.1 + 480, align 256, addrspace 5) - ; CHECK: [[LOAD61:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD59]](p5) :: (load 4 from %stack.1 + 484, align 256, addrspace 5) - ; CHECK: [[LOAD62:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD60]](p5) :: (load 4 from %stack.1 + 488, align 256, addrspace 5) - ; CHECK: [[LOAD63:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD61]](p5) :: (load 4 from %stack.1 + 492, align 256, addrspace 5) - ; CHECK: [[LOAD64:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD62]](p5) :: (load 4 from %stack.1 + 496, align 256, addrspace 5) - ; CHECK: [[LOAD65:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD63]](p5) :: (load 4 from %stack.1 + 500, align 256, addrspace 5) - ; CHECK: [[LOAD66:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD64]](p5) :: (load 4 from %stack.1 + 504, align 256, addrspace 5) - ; CHECK: [[LOAD67:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD65]](p5) :: (load 4 from %stack.1 + 508, align 256, addrspace 5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 - ; CHECK: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) - ; CHECK: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) - ; CHECK: G_STORE [[UV64]](s32), [[FRAME_INDEX1]](p5) :: (store 4 into %stack.0, align 256, addrspace 5) - ; CHECK: [[PTR_ADD67:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C5]](s32) - ; CHECK: [[COPY64:%[0-9]+]]:_(p5) = COPY [[PTR_ADD67]](p5) - ; CHECK: G_STORE [[UV65]](s32), [[COPY64]](p5) :: (store 4 into %stack.0 + 4, align 256, addrspace 5) - ; CHECK: [[PTR_ADD68:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C6]](s32) - ; CHECK: [[COPY65:%[0-9]+]]:_(p5) = COPY [[PTR_ADD68]](p5) - ; CHECK: G_STORE [[UV66]](s32), [[COPY65]](p5) :: (store 4 into %stack.0 + 8, align 256, addrspace 5) - ; CHECK: [[PTR_ADD69:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C7]](s32) - ; CHECK: [[COPY66:%[0-9]+]]:_(p5) = COPY [[PTR_ADD69]](p5) - ; CHECK: G_STORE [[UV67]](s32), [[COPY66]](p5) :: (store 4 into %stack.0 + 12, align 256, addrspace 5) - ; CHECK: [[PTR_ADD70:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C8]](s32) - ; CHECK: [[COPY67:%[0-9]+]]:_(p5) = COPY [[PTR_ADD70]](p5) - ; CHECK: G_STORE [[UV68]](s32), [[COPY67]](p5) :: (store 4 into %stack.0 + 16, align 256, addrspace 5) - ; CHECK: [[PTR_ADD71:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C9]](s32) - ; CHECK: [[COPY68:%[0-9]+]]:_(p5) = COPY [[PTR_ADD71]](p5) - ; CHECK: G_STORE [[UV69]](s32), [[COPY68]](p5) :: (store 4 into %stack.0 + 20, align 256, addrspace 5) - ; CHECK: [[PTR_ADD72:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C10]](s32) - ; CHECK: [[COPY69:%[0-9]+]]:_(p5) = COPY [[PTR_ADD72]](p5) - ; CHECK: G_STORE [[UV70]](s32), [[COPY69]](p5) :: (store 4 into %stack.0 + 24, align 256, addrspace 5) - ; CHECK: [[PTR_ADD73:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C11]](s32) - ; CHECK: [[COPY70:%[0-9]+]]:_(p5) = COPY [[PTR_ADD73]](p5) - ; CHECK: G_STORE [[UV71]](s32), [[COPY70]](p5) :: (store 4 into %stack.0 + 28, align 256, addrspace 5) - ; CHECK: [[PTR_ADD74:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C12]](s32) - ; CHECK: [[COPY71:%[0-9]+]]:_(p5) = COPY [[PTR_ADD74]](p5) - ; CHECK: G_STORE [[UV72]](s32), [[COPY71]](p5) :: (store 4 into %stack.0 + 32, align 256, addrspace 5) - ; CHECK: [[PTR_ADD75:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C13]](s32) - ; CHECK: [[COPY72:%[0-9]+]]:_(p5) = COPY [[PTR_ADD75]](p5) - ; CHECK: G_STORE [[UV73]](s32), [[COPY72]](p5) :: (store 4 into %stack.0 + 36, align 256, addrspace 5) - ; CHECK: [[PTR_ADD76:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C14]](s32) - ; CHECK: [[COPY73:%[0-9]+]]:_(p5) = COPY [[PTR_ADD76]](p5) - ; CHECK: G_STORE [[UV74]](s32), [[COPY73]](p5) :: (store 4 into %stack.0 + 40, align 256, addrspace 5) - ; CHECK: [[PTR_ADD77:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C15]](s32) - ; CHECK: [[COPY74:%[0-9]+]]:_(p5) = COPY [[PTR_ADD77]](p5) - ; CHECK: G_STORE [[UV75]](s32), [[COPY74]](p5) :: (store 4 into %stack.0 + 44, align 256, addrspace 5) - ; CHECK: [[PTR_ADD78:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C16]](s32) - ; CHECK: [[COPY75:%[0-9]+]]:_(p5) = COPY [[PTR_ADD78]](p5) - ; CHECK: G_STORE [[UV76]](s32), [[COPY75]](p5) :: (store 4 into %stack.0 + 48, align 256, addrspace 5) - ; CHECK: [[PTR_ADD79:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C17]](s32) - ; CHECK: [[COPY76:%[0-9]+]]:_(p5) = COPY [[PTR_ADD79]](p5) - ; CHECK: G_STORE [[UV77]](s32), [[COPY76]](p5) :: (store 4 into %stack.0 + 52, align 256, addrspace 5) - ; CHECK: [[PTR_ADD80:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C18]](s32) - ; CHECK: [[COPY77:%[0-9]+]]:_(p5) = COPY [[PTR_ADD80]](p5) - ; CHECK: G_STORE [[UV78]](s32), [[COPY77]](p5) :: (store 4 into %stack.0 + 56, align 256, addrspace 5) - ; CHECK: [[PTR_ADD81:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C19]](s32) - ; CHECK: [[COPY78:%[0-9]+]]:_(p5) = COPY [[PTR_ADD81]](p5) - ; CHECK: G_STORE [[UV79]](s32), [[COPY78]](p5) :: (store 4 into %stack.0 + 60, align 256, addrspace 5) - ; CHECK: [[PTR_ADD82:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C]](s32) - ; CHECK: [[COPY79:%[0-9]+]]:_(p5) = COPY [[PTR_ADD82]](p5) - ; CHECK: G_STORE [[UV80]](s32), [[COPY79]](p5) :: (store 4 into %stack.0 + 64, align 256, addrspace 5) - ; CHECK: [[PTR_ADD83:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C20]](s32) - ; CHECK: [[COPY80:%[0-9]+]]:_(p5) = COPY [[PTR_ADD83]](p5) - ; CHECK: G_STORE [[UV81]](s32), [[COPY80]](p5) :: (store 4 into %stack.0 + 68, align 256, addrspace 5) - ; CHECK: [[PTR_ADD84:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C21]](s32) - ; CHECK: [[COPY81:%[0-9]+]]:_(p5) = COPY [[PTR_ADD84]](p5) - ; CHECK: G_STORE [[UV82]](s32), [[COPY81]](p5) :: (store 4 into %stack.0 + 72, align 256, addrspace 5) - ; CHECK: [[PTR_ADD85:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C22]](s32) - ; CHECK: [[COPY82:%[0-9]+]]:_(p5) = COPY [[PTR_ADD85]](p5) - ; CHECK: G_STORE [[UV83]](s32), [[COPY82]](p5) :: (store 4 into %stack.0 + 76, align 256, addrspace 5) - ; CHECK: [[PTR_ADD86:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C23]](s32) - ; CHECK: [[COPY83:%[0-9]+]]:_(p5) = COPY [[PTR_ADD86]](p5) - ; CHECK: G_STORE [[UV84]](s32), [[COPY83]](p5) :: (store 4 into %stack.0 + 80, align 256, addrspace 5) - ; CHECK: [[PTR_ADD87:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C24]](s32) - ; CHECK: [[COPY84:%[0-9]+]]:_(p5) = COPY [[PTR_ADD87]](p5) - ; CHECK: G_STORE [[UV85]](s32), [[COPY84]](p5) :: (store 4 into %stack.0 + 84, align 256, addrspace 5) - ; CHECK: [[PTR_ADD88:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C25]](s32) - ; CHECK: [[COPY85:%[0-9]+]]:_(p5) = COPY [[PTR_ADD88]](p5) - ; CHECK: G_STORE [[UV86]](s32), [[COPY85]](p5) :: (store 4 into %stack.0 + 88, align 256, addrspace 5) - ; CHECK: [[PTR_ADD89:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C26]](s32) - ; CHECK: [[COPY86:%[0-9]+]]:_(p5) = COPY [[PTR_ADD89]](p5) - ; CHECK: G_STORE [[UV87]](s32), [[COPY86]](p5) :: (store 4 into %stack.0 + 92, align 256, addrspace 5) - ; CHECK: [[PTR_ADD90:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C27]](s32) - ; CHECK: [[COPY87:%[0-9]+]]:_(p5) = COPY [[PTR_ADD90]](p5) - ; CHECK: G_STORE [[UV88]](s32), [[COPY87]](p5) :: (store 4 into %stack.0 + 96, align 256, addrspace 5) - ; CHECK: [[PTR_ADD91:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C28]](s32) - ; CHECK: [[COPY88:%[0-9]+]]:_(p5) = COPY [[PTR_ADD91]](p5) - ; CHECK: G_STORE [[UV89]](s32), [[COPY88]](p5) :: (store 4 into %stack.0 + 100, align 256, addrspace 5) - ; CHECK: [[PTR_ADD92:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C29]](s32) - ; CHECK: [[COPY89:%[0-9]+]]:_(p5) = COPY [[PTR_ADD92]](p5) - ; CHECK: G_STORE [[UV90]](s32), [[COPY89]](p5) :: (store 4 into %stack.0 + 104, align 256, addrspace 5) - ; CHECK: [[PTR_ADD93:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C30]](s32) - ; CHECK: [[COPY90:%[0-9]+]]:_(p5) = COPY [[PTR_ADD93]](p5) - ; CHECK: G_STORE [[UV91]](s32), [[COPY90]](p5) :: (store 4 into %stack.0 + 108, align 256, addrspace 5) - ; CHECK: [[PTR_ADD94:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C31]](s32) - ; CHECK: [[COPY91:%[0-9]+]]:_(p5) = COPY [[PTR_ADD94]](p5) - ; CHECK: G_STORE [[UV92]](s32), [[COPY91]](p5) :: (store 4 into %stack.0 + 112, align 256, addrspace 5) - ; CHECK: [[PTR_ADD95:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C32]](s32) - ; CHECK: [[COPY92:%[0-9]+]]:_(p5) = COPY [[PTR_ADD95]](p5) - ; CHECK: G_STORE [[UV93]](s32), [[COPY92]](p5) :: (store 4 into %stack.0 + 116, align 256, addrspace 5) - ; CHECK: [[PTR_ADD96:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C33]](s32) - ; CHECK: [[COPY93:%[0-9]+]]:_(p5) = COPY [[PTR_ADD96]](p5) - ; CHECK: G_STORE [[UV94]](s32), [[COPY93]](p5) :: (store 4 into %stack.0 + 120, align 256, addrspace 5) - ; CHECK: [[PTR_ADD97:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C34]](s32) - ; CHECK: [[COPY94:%[0-9]+]]:_(p5) = COPY [[PTR_ADD97]](p5) - ; CHECK: G_STORE [[UV95]](s32), [[COPY94]](p5) :: (store 4 into %stack.0 + 124, align 256, addrspace 5) - ; CHECK: [[PTR_ADD98:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C35]](s32) - ; CHECK: [[COPY95:%[0-9]+]]:_(p5) = COPY [[PTR_ADD98]](p5) - ; CHECK: G_STORE [[UV96]](s32), [[COPY95]](p5) :: (store 4 into %stack.0 + 128, align 256, addrspace 5) - ; CHECK: [[PTR_ADD99:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C36]](s32) - ; CHECK: [[COPY96:%[0-9]+]]:_(p5) = COPY [[PTR_ADD99]](p5) - ; CHECK: G_STORE [[UV97]](s32), [[COPY96]](p5) :: (store 4 into %stack.0 + 132, align 256, addrspace 5) - ; CHECK: [[PTR_ADD100:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C37]](s32) - ; CHECK: [[COPY97:%[0-9]+]]:_(p5) = COPY [[PTR_ADD100]](p5) - ; CHECK: G_STORE [[UV98]](s32), [[COPY97]](p5) :: (store 4 into %stack.0 + 136, align 256, addrspace 5) - ; CHECK: [[PTR_ADD101:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C38]](s32) - ; CHECK: [[COPY98:%[0-9]+]]:_(p5) = COPY [[PTR_ADD101]](p5) - ; CHECK: G_STORE [[UV99]](s32), [[COPY98]](p5) :: (store 4 into %stack.0 + 140, align 256, addrspace 5) - ; CHECK: [[PTR_ADD102:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C39]](s32) - ; CHECK: [[COPY99:%[0-9]+]]:_(p5) = COPY [[PTR_ADD102]](p5) - ; CHECK: G_STORE [[UV100]](s32), [[COPY99]](p5) :: (store 4 into %stack.0 + 144, align 256, addrspace 5) - ; CHECK: [[PTR_ADD103:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C40]](s32) - ; CHECK: [[COPY100:%[0-9]+]]:_(p5) = COPY [[PTR_ADD103]](p5) - ; CHECK: G_STORE [[UV101]](s32), [[COPY100]](p5) :: (store 4 into %stack.0 + 148, align 256, addrspace 5) - ; CHECK: [[PTR_ADD104:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C41]](s32) - ; CHECK: [[COPY101:%[0-9]+]]:_(p5) = COPY [[PTR_ADD104]](p5) - ; CHECK: G_STORE [[UV102]](s32), [[COPY101]](p5) :: (store 4 into %stack.0 + 152, align 256, addrspace 5) - ; CHECK: [[PTR_ADD105:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C42]](s32) - ; CHECK: [[COPY102:%[0-9]+]]:_(p5) = COPY [[PTR_ADD105]](p5) - ; CHECK: G_STORE [[UV103]](s32), [[COPY102]](p5) :: (store 4 into %stack.0 + 156, align 256, addrspace 5) - ; CHECK: [[PTR_ADD106:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C43]](s32) - ; CHECK: [[COPY103:%[0-9]+]]:_(p5) = COPY [[PTR_ADD106]](p5) - ; CHECK: G_STORE [[UV104]](s32), [[COPY103]](p5) :: (store 4 into %stack.0 + 160, align 256, addrspace 5) - ; CHECK: [[PTR_ADD107:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C44]](s32) - ; CHECK: [[COPY104:%[0-9]+]]:_(p5) = COPY [[PTR_ADD107]](p5) - ; CHECK: G_STORE [[UV105]](s32), [[COPY104]](p5) :: (store 4 into %stack.0 + 164, align 256, addrspace 5) - ; CHECK: [[PTR_ADD108:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C45]](s32) - ; CHECK: [[COPY105:%[0-9]+]]:_(p5) = COPY [[PTR_ADD108]](p5) - ; CHECK: G_STORE [[UV106]](s32), [[COPY105]](p5) :: (store 4 into %stack.0 + 168, align 256, addrspace 5) - ; CHECK: [[PTR_ADD109:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C46]](s32) - ; CHECK: [[COPY106:%[0-9]+]]:_(p5) = COPY [[PTR_ADD109]](p5) - ; CHECK: G_STORE [[UV107]](s32), [[COPY106]](p5) :: (store 4 into %stack.0 + 172, align 256, addrspace 5) - ; CHECK: [[PTR_ADD110:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C47]](s32) - ; CHECK: [[COPY107:%[0-9]+]]:_(p5) = COPY [[PTR_ADD110]](p5) - ; CHECK: G_STORE [[UV108]](s32), [[COPY107]](p5) :: (store 4 into %stack.0 + 176, align 256, addrspace 5) - ; CHECK: [[PTR_ADD111:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C48]](s32) - ; CHECK: [[COPY108:%[0-9]+]]:_(p5) = COPY [[PTR_ADD111]](p5) - ; CHECK: G_STORE [[UV109]](s32), [[COPY108]](p5) :: (store 4 into %stack.0 + 180, align 256, addrspace 5) - ; CHECK: [[PTR_ADD112:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C49]](s32) - ; CHECK: [[COPY109:%[0-9]+]]:_(p5) = COPY [[PTR_ADD112]](p5) - ; CHECK: G_STORE [[UV110]](s32), [[COPY109]](p5) :: (store 4 into %stack.0 + 184, align 256, addrspace 5) - ; CHECK: [[PTR_ADD113:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C50]](s32) - ; CHECK: [[COPY110:%[0-9]+]]:_(p5) = COPY [[PTR_ADD113]](p5) - ; CHECK: G_STORE [[UV111]](s32), [[COPY110]](p5) :: (store 4 into %stack.0 + 188, align 256, addrspace 5) - ; CHECK: [[PTR_ADD114:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C51]](s32) - ; CHECK: [[COPY111:%[0-9]+]]:_(p5) = COPY [[PTR_ADD114]](p5) - ; CHECK: G_STORE [[UV112]](s32), [[COPY111]](p5) :: (store 4 into %stack.0 + 192, align 256, addrspace 5) - ; CHECK: [[PTR_ADD115:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C52]](s32) - ; CHECK: [[COPY112:%[0-9]+]]:_(p5) = COPY [[PTR_ADD115]](p5) - ; CHECK: G_STORE [[UV113]](s32), [[COPY112]](p5) :: (store 4 into %stack.0 + 196, align 256, addrspace 5) - ; CHECK: [[PTR_ADD116:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C53]](s32) - ; CHECK: [[COPY113:%[0-9]+]]:_(p5) = COPY [[PTR_ADD116]](p5) - ; CHECK: G_STORE [[UV114]](s32), [[COPY113]](p5) :: (store 4 into %stack.0 + 200, align 256, addrspace 5) - ; CHECK: [[PTR_ADD117:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C54]](s32) - ; CHECK: [[COPY114:%[0-9]+]]:_(p5) = COPY [[PTR_ADD117]](p5) - ; CHECK: G_STORE [[UV115]](s32), [[COPY114]](p5) :: (store 4 into %stack.0 + 204, align 256, addrspace 5) - ; CHECK: [[PTR_ADD118:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C55]](s32) - ; CHECK: [[COPY115:%[0-9]+]]:_(p5) = COPY [[PTR_ADD118]](p5) - ; CHECK: G_STORE [[UV116]](s32), [[COPY115]](p5) :: (store 4 into %stack.0 + 208, align 256, addrspace 5) - ; CHECK: [[PTR_ADD119:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C56]](s32) - ; CHECK: [[COPY116:%[0-9]+]]:_(p5) = COPY [[PTR_ADD119]](p5) - ; CHECK: G_STORE [[UV117]](s32), [[COPY116]](p5) :: (store 4 into %stack.0 + 212, align 256, addrspace 5) - ; CHECK: [[PTR_ADD120:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C57]](s32) - ; CHECK: [[COPY117:%[0-9]+]]:_(p5) = COPY [[PTR_ADD120]](p5) - ; CHECK: G_STORE [[UV118]](s32), [[COPY117]](p5) :: (store 4 into %stack.0 + 216, align 256, addrspace 5) - ; CHECK: [[PTR_ADD121:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C58]](s32) - ; CHECK: [[COPY118:%[0-9]+]]:_(p5) = COPY [[PTR_ADD121]](p5) - ; CHECK: G_STORE [[UV119]](s32), [[COPY118]](p5) :: (store 4 into %stack.0 + 220, align 256, addrspace 5) - ; CHECK: [[PTR_ADD122:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C59]](s32) - ; CHECK: [[COPY119:%[0-9]+]]:_(p5) = COPY [[PTR_ADD122]](p5) - ; CHECK: G_STORE [[UV120]](s32), [[COPY119]](p5) :: (store 4 into %stack.0 + 224, align 256, addrspace 5) - ; CHECK: [[PTR_ADD123:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C60]](s32) - ; CHECK: [[COPY120:%[0-9]+]]:_(p5) = COPY [[PTR_ADD123]](p5) - ; CHECK: G_STORE [[UV121]](s32), [[COPY120]](p5) :: (store 4 into %stack.0 + 228, align 256, addrspace 5) - ; CHECK: [[PTR_ADD124:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C61]](s32) - ; CHECK: [[COPY121:%[0-9]+]]:_(p5) = COPY [[PTR_ADD124]](p5) - ; CHECK: G_STORE [[UV122]](s32), [[COPY121]](p5) :: (store 4 into %stack.0 + 232, align 256, addrspace 5) - ; CHECK: [[PTR_ADD125:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C62]](s32) - ; CHECK: [[COPY122:%[0-9]+]]:_(p5) = COPY [[PTR_ADD125]](p5) - ; CHECK: G_STORE [[UV123]](s32), [[COPY122]](p5) :: (store 4 into %stack.0 + 236, align 256, addrspace 5) - ; CHECK: [[PTR_ADD126:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C63]](s32) - ; CHECK: [[COPY123:%[0-9]+]]:_(p5) = COPY [[PTR_ADD126]](p5) - ; CHECK: G_STORE [[UV124]](s32), [[COPY123]](p5) :: (store 4 into %stack.0 + 240, align 256, addrspace 5) - ; CHECK: [[PTR_ADD127:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C64]](s32) - ; CHECK: [[COPY124:%[0-9]+]]:_(p5) = COPY [[PTR_ADD127]](p5) - ; CHECK: G_STORE [[UV125]](s32), [[COPY124]](p5) :: (store 4 into %stack.0 + 244, align 256, addrspace 5) - ; CHECK: [[PTR_ADD128:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C65]](s32) - ; CHECK: [[COPY125:%[0-9]+]]:_(p5) = COPY [[PTR_ADD128]](p5) - ; CHECK: G_STORE [[UV126]](s32), [[COPY125]](p5) :: (store 4 into %stack.0 + 248, align 256, addrspace 5) - ; CHECK: [[PTR_ADD129:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C66]](s32) - ; CHECK: [[COPY126:%[0-9]+]]:_(p5) = COPY [[PTR_ADD129]](p5) - ; CHECK: G_STORE [[UV127]](s32), [[COPY126]](p5) :: (store 4 into %stack.0 + 252, align 256, addrspace 5) - ; CHECK: [[C68:%[0-9]+]]:_(s32) = G_CONSTANT i32 260 - ; CHECK: [[PTR_ADD130:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C68]](s32) - ; CHECK: G_STORE [[C4]](s32), [[PTR_ADD130]](p5) :: (store 4 into %stack.0 + 260, addrspace 5) - ; CHECK: [[LOAD68:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (load 4 from %stack.0 + 260, align 256, addrspace 5) - ; CHECK: [[LOAD69:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD67]](p5) :: (load 4 from %stack.0 + 264, align 256, addrspace 5) - ; CHECK: [[LOAD70:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD68]](p5) :: (load 4 from %stack.0 + 268, align 256, addrspace 5) - ; CHECK: [[LOAD71:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD69]](p5) :: (load 4 from %stack.0 + 272, align 256, addrspace 5) - ; CHECK: [[LOAD72:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD70]](p5) :: (load 4 from %stack.0 + 276, align 256, addrspace 5) - ; CHECK: [[LOAD73:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD71]](p5) :: (load 4 from %stack.0 + 280, align 256, addrspace 5) - ; CHECK: [[LOAD74:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD72]](p5) :: (load 4 from %stack.0 + 284, align 256, addrspace 5) - ; CHECK: [[LOAD75:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD73]](p5) :: (load 4 from %stack.0 + 288, align 256, addrspace 5) - ; CHECK: [[LOAD76:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD74]](p5) :: (load 4 from %stack.0 + 292, align 256, addrspace 5) - ; CHECK: [[LOAD77:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD75]](p5) :: (load 4 from %stack.0 + 296, align 256, addrspace 5) - ; CHECK: [[LOAD78:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD76]](p5) :: (load 4 from %stack.0 + 300, align 256, addrspace 5) - ; CHECK: [[LOAD79:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD77]](p5) :: (load 4 from %stack.0 + 304, align 256, addrspace 5) - ; CHECK: [[LOAD80:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD78]](p5) :: (load 4 from %stack.0 + 308, align 256, addrspace 5) - ; CHECK: [[LOAD81:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD79]](p5) :: (load 4 from %stack.0 + 312, align 256, addrspace 5) - ; CHECK: [[LOAD82:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD80]](p5) :: (load 4 from %stack.0 + 316, align 256, addrspace 5) - ; CHECK: [[LOAD83:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD81]](p5) :: (load 4 from %stack.0 + 320, align 256, addrspace 5) - ; CHECK: [[LOAD84:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD82]](p5) :: (load 4 from %stack.0 + 324, align 256, addrspace 5) - ; CHECK: [[LOAD85:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD83]](p5) :: (load 4 from %stack.0 + 328, align 256, addrspace 5) - ; CHECK: [[LOAD86:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD84]](p5) :: (load 4 from %stack.0 + 332, align 256, addrspace 5) - ; CHECK: [[LOAD87:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD85]](p5) :: (load 4 from %stack.0 + 336, align 256, addrspace 5) - ; CHECK: [[LOAD88:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD86]](p5) :: (load 4 from %stack.0 + 340, align 256, addrspace 5) - ; CHECK: [[LOAD89:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD87]](p5) :: (load 4 from %stack.0 + 344, align 256, addrspace 5) - ; CHECK: [[LOAD90:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD88]](p5) :: (load 4 from %stack.0 + 348, align 256, addrspace 5) - ; CHECK: [[LOAD91:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD89]](p5) :: (load 4 from %stack.0 + 352, align 256, addrspace 5) - ; CHECK: [[LOAD92:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD90]](p5) :: (load 4 from %stack.0 + 356, align 256, addrspace 5) - ; CHECK: [[LOAD93:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD91]](p5) :: (load 4 from %stack.0 + 360, align 256, addrspace 5) - ; CHECK: [[LOAD94:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD92]](p5) :: (load 4 from %stack.0 + 364, align 256, addrspace 5) - ; CHECK: [[LOAD95:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD93]](p5) :: (load 4 from %stack.0 + 368, align 256, addrspace 5) - ; CHECK: [[LOAD96:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD94]](p5) :: (load 4 from %stack.0 + 372, align 256, addrspace 5) - ; CHECK: [[LOAD97:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD95]](p5) :: (load 4 from %stack.0 + 376, align 256, addrspace 5) - ; CHECK: [[LOAD98:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD96]](p5) :: (load 4 from %stack.0 + 380, align 256, addrspace 5) - ; CHECK: [[LOAD99:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD97]](p5) :: (load 4 from %stack.0 + 384, align 256, addrspace 5) - ; CHECK: [[LOAD100:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD98]](p5) :: (load 4 from %stack.0 + 388, align 256, addrspace 5) - ; CHECK: [[LOAD101:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD99]](p5) :: (load 4 from %stack.0 + 392, align 256, addrspace 5) - ; CHECK: [[LOAD102:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD100]](p5) :: (load 4 from %stack.0 + 396, align 256, addrspace 5) - ; CHECK: [[LOAD103:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD101]](p5) :: (load 4 from %stack.0 + 400, align 256, addrspace 5) - ; CHECK: [[LOAD104:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD102]](p5) :: (load 4 from %stack.0 + 404, align 256, addrspace 5) - ; CHECK: [[LOAD105:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD103]](p5) :: (load 4 from %stack.0 + 408, align 256, addrspace 5) - ; CHECK: [[LOAD106:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD104]](p5) :: (load 4 from %stack.0 + 412, align 256, addrspace 5) - ; CHECK: [[LOAD107:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD105]](p5) :: (load 4 from %stack.0 + 416, align 256, addrspace 5) - ; CHECK: [[LOAD108:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD106]](p5) :: (load 4 from %stack.0 + 420, align 256, addrspace 5) - ; CHECK: [[LOAD109:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD107]](p5) :: (load 4 from %stack.0 + 424, align 256, addrspace 5) - ; CHECK: [[LOAD110:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD108]](p5) :: (load 4 from %stack.0 + 428, align 256, addrspace 5) - ; CHECK: [[LOAD111:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD109]](p5) :: (load 4 from %stack.0 + 432, align 256, addrspace 5) - ; CHECK: [[LOAD112:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD110]](p5) :: (load 4 from %stack.0 + 436, align 256, addrspace 5) - ; CHECK: [[LOAD113:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD111]](p5) :: (load 4 from %stack.0 + 440, align 256, addrspace 5) - ; CHECK: [[LOAD114:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD112]](p5) :: (load 4 from %stack.0 + 444, align 256, addrspace 5) - ; CHECK: [[LOAD115:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD113]](p5) :: (load 4 from %stack.0 + 448, align 256, addrspace 5) - ; CHECK: [[LOAD116:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD114]](p5) :: (load 4 from %stack.0 + 452, align 256, addrspace 5) - ; CHECK: [[LOAD117:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD115]](p5) :: (load 4 from %stack.0 + 456, align 256, addrspace 5) - ; CHECK: [[LOAD118:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD116]](p5) :: (load 4 from %stack.0 + 460, align 256, addrspace 5) - ; CHECK: [[LOAD119:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD117]](p5) :: (load 4 from %stack.0 + 464, align 256, addrspace 5) - ; CHECK: [[LOAD120:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD118]](p5) :: (load 4 from %stack.0 + 468, align 256, addrspace 5) - ; CHECK: [[LOAD121:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD119]](p5) :: (load 4 from %stack.0 + 472, align 256, addrspace 5) - ; CHECK: [[LOAD122:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD120]](p5) :: (load 4 from %stack.0 + 476, align 256, addrspace 5) - ; CHECK: [[LOAD123:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD121]](p5) :: (load 4 from %stack.0 + 480, align 256, addrspace 5) - ; CHECK: [[LOAD124:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD122]](p5) :: (load 4 from %stack.0 + 484, align 256, addrspace 5) - ; CHECK: [[LOAD125:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD123]](p5) :: (load 4 from %stack.0 + 488, align 256, addrspace 5) - ; CHECK: [[LOAD126:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD124]](p5) :: (load 4 from %stack.0 + 492, align 256, addrspace 5) - ; CHECK: [[LOAD127:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD125]](p5) :: (load 4 from %stack.0 + 496, align 256, addrspace 5) - ; CHECK: [[LOAD128:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD126]](p5) :: (load 4 from %stack.0 + 500, align 256, addrspace 5) - ; CHECK: [[LOAD129:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD127]](p5) :: (load 4 from %stack.0 + 504, align 256, addrspace 5) - ; CHECK: [[LOAD130:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD128]](p5) :: (load 4 from %stack.0 + 508, align 256, addrspace 5) - ; CHECK: [[LOAD131:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD129]](p5) :: (load 4 from %stack.0 + 512, align 256, addrspace 5) - ; CHECK: [[COPY127:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY128:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32) - ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32) - ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32) - ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32) - ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD32]](s32), [[LOAD33]](s32), [[LOAD34]](s32), [[LOAD35]](s32) - ; CHECK: [[BUILD_VECTOR8:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD36]](s32), [[LOAD37]](s32), [[LOAD38]](s32), [[LOAD39]](s32) - ; CHECK: [[BUILD_VECTOR9:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD40]](s32), [[LOAD41]](s32), [[LOAD42]](s32), [[LOAD43]](s32) - ; CHECK: [[BUILD_VECTOR10:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD44]](s32), [[LOAD45]](s32), [[LOAD46]](s32), [[LOAD47]](s32) - ; CHECK: [[BUILD_VECTOR11:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD48]](s32), [[LOAD49]](s32), [[LOAD50]](s32), [[LOAD51]](s32) - ; CHECK: [[BUILD_VECTOR12:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD52]](s32), [[LOAD53]](s32), [[LOAD54]](s32), [[LOAD55]](s32) - ; CHECK: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD56]](s32), [[LOAD57]](s32), [[LOAD58]](s32), [[LOAD59]](s32) - ; CHECK: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD60]](s32), [[LOAD61]](s32), [[LOAD62]](s32), [[LOAD63]](s32) - ; CHECK: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD64]](s32), [[LOAD65]](s32), [[LOAD66]](s32), [[LOAD67]](s32) - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY127]](p1) :: (store 16, align 4, addrspace 1) - ; CHECK: [[C69:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD131:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C69]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD131]](p1) :: (store 16 + 16, align 4, addrspace 1) - ; CHECK: [[C70:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD132:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C70]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD132]](p1) :: (store 16 + 32, align 4, addrspace 1) - ; CHECK: [[C71:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD133:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C71]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD133]](p1) :: (store 16 + 48, align 4, addrspace 1) - ; CHECK: [[PTR_ADD134:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C1]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD134]](p1) :: (store 16 + 64, align 4, addrspace 1) - ; CHECK: [[C72:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 - ; CHECK: [[PTR_ADD135:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C72]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD135]](p1) :: (store 16 + 80, align 4, addrspace 1) - ; CHECK: [[C73:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 - ; CHECK: [[PTR_ADD136:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C73]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD136]](p1) :: (store 16 + 96, align 4, addrspace 1) - ; CHECK: [[C74:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 - ; CHECK: [[PTR_ADD137:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C74]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD137]](p1) :: (store 16 + 112, align 4, addrspace 1) - ; CHECK: [[PTR_ADD138:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C2]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR8]](<4 x s32>), [[PTR_ADD138]](p1) :: (store 16 + 128, align 4, addrspace 1) - ; CHECK: [[C75:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK: [[PTR_ADD139:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C75]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR9]](<4 x s32>), [[PTR_ADD139]](p1) :: (store 16 + 144, align 4, addrspace 1) - ; CHECK: [[C76:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 - ; CHECK: [[PTR_ADD140:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C76]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR10]](<4 x s32>), [[PTR_ADD140]](p1) :: (store 16 + 160, align 4, addrspace 1) - ; CHECK: [[C77:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 - ; CHECK: [[PTR_ADD141:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C77]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR11]](<4 x s32>), [[PTR_ADD141]](p1) :: (store 16 + 176, align 4, addrspace 1) - ; CHECK: [[PTR_ADD142:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C3]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR12]](<4 x s32>), [[PTR_ADD142]](p1) :: (store 16 + 192, align 4, addrspace 1) - ; CHECK: [[C78:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 - ; CHECK: [[PTR_ADD143:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C78]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR13]](<4 x s32>), [[PTR_ADD143]](p1) :: (store 16 + 208, align 4, addrspace 1) - ; CHECK: [[C79:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 - ; CHECK: [[PTR_ADD144:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C79]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR14]](<4 x s32>), [[PTR_ADD144]](p1) :: (store 16 + 224, align 4, addrspace 1) - ; CHECK: [[C80:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 - ; CHECK: [[PTR_ADD145:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C80]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR15]](<4 x s32>), [[PTR_ADD145]](p1) :: (store 16 + 240, align 4, addrspace 1) - ; CHECK: [[BUILD_VECTOR16:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD68]](s32), [[LOAD69]](s32), [[LOAD70]](s32), [[LOAD71]](s32) - ; CHECK: [[BUILD_VECTOR17:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD72]](s32), [[LOAD73]](s32), [[LOAD74]](s32), [[LOAD75]](s32) - ; CHECK: [[BUILD_VECTOR18:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD76]](s32), [[LOAD77]](s32), [[LOAD78]](s32), [[LOAD79]](s32) - ; CHECK: [[BUILD_VECTOR19:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD80]](s32), [[LOAD81]](s32), [[LOAD82]](s32), [[LOAD83]](s32) - ; CHECK: [[BUILD_VECTOR20:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD84]](s32), [[LOAD85]](s32), [[LOAD86]](s32), [[LOAD87]](s32) - ; CHECK: [[BUILD_VECTOR21:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD88]](s32), [[LOAD89]](s32), [[LOAD90]](s32), [[LOAD91]](s32) - ; CHECK: [[BUILD_VECTOR22:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD92]](s32), [[LOAD93]](s32), [[LOAD94]](s32), [[LOAD95]](s32) - ; CHECK: [[BUILD_VECTOR23:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD96]](s32), [[LOAD97]](s32), [[LOAD98]](s32), [[LOAD99]](s32) - ; CHECK: [[BUILD_VECTOR24:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD100]](s32), [[LOAD101]](s32), [[LOAD102]](s32), [[LOAD103]](s32) - ; CHECK: [[BUILD_VECTOR25:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD104]](s32), [[LOAD105]](s32), [[LOAD106]](s32), [[LOAD107]](s32) - ; CHECK: [[BUILD_VECTOR26:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD108]](s32), [[LOAD109]](s32), [[LOAD110]](s32), [[LOAD111]](s32) - ; CHECK: [[BUILD_VECTOR27:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD112]](s32), [[LOAD113]](s32), [[LOAD114]](s32), [[LOAD115]](s32) - ; CHECK: [[BUILD_VECTOR28:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD116]](s32), [[LOAD117]](s32), [[LOAD118]](s32), [[LOAD119]](s32) - ; CHECK: [[BUILD_VECTOR29:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD120]](s32), [[LOAD121]](s32), [[LOAD122]](s32), [[LOAD123]](s32) - ; CHECK: [[BUILD_VECTOR30:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD124]](s32), [[LOAD125]](s32), [[LOAD126]](s32), [[LOAD127]](s32) - ; CHECK: [[BUILD_VECTOR31:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD128]](s32), [[LOAD129]](s32), [[LOAD130]](s32), [[LOAD131]](s32) - ; CHECK: G_STORE [[BUILD_VECTOR16]](<4 x s32>), [[COPY128]](p1) :: (store 16, align 4, addrspace 1) - ; CHECK: [[PTR_ADD146:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C69]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR17]](<4 x s32>), [[PTR_ADD146]](p1) :: (store 16 + 16, align 4, addrspace 1) - ; CHECK: [[PTR_ADD147:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C70]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR18]](<4 x s32>), [[PTR_ADD147]](p1) :: (store 16 + 32, align 4, addrspace 1) - ; CHECK: [[PTR_ADD148:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C71]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR19]](<4 x s32>), [[PTR_ADD148]](p1) :: (store 16 + 48, align 4, addrspace 1) - ; CHECK: [[PTR_ADD149:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C1]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR20]](<4 x s32>), [[PTR_ADD149]](p1) :: (store 16 + 64, align 4, addrspace 1) - ; CHECK: [[PTR_ADD150:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C72]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR21]](<4 x s32>), [[PTR_ADD150]](p1) :: (store 16 + 80, align 4, addrspace 1) - ; CHECK: [[PTR_ADD151:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C73]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR22]](<4 x s32>), [[PTR_ADD151]](p1) :: (store 16 + 96, align 4, addrspace 1) - ; CHECK: [[PTR_ADD152:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C74]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR23]](<4 x s32>), [[PTR_ADD152]](p1) :: (store 16 + 112, align 4, addrspace 1) - ; CHECK: [[PTR_ADD153:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C2]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR24]](<4 x s32>), [[PTR_ADD153]](p1) :: (store 16 + 128, align 4, addrspace 1) - ; CHECK: [[PTR_ADD154:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C75]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR25]](<4 x s32>), [[PTR_ADD154]](p1) :: (store 16 + 144, align 4, addrspace 1) - ; CHECK: [[PTR_ADD155:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C76]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR26]](<4 x s32>), [[PTR_ADD155]](p1) :: (store 16 + 160, align 4, addrspace 1) - ; CHECK: [[PTR_ADD156:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C77]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR27]](<4 x s32>), [[PTR_ADD156]](p1) :: (store 16 + 176, align 4, addrspace 1) - ; CHECK: [[PTR_ADD157:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C3]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR28]](<4 x s32>), [[PTR_ADD157]](p1) :: (store 16 + 192, align 4, addrspace 1) - ; CHECK: [[PTR_ADD158:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C78]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR29]](<4 x s32>), [[PTR_ADD158]](p1) :: (store 16 + 208, align 4, addrspace 1) - ; CHECK: [[PTR_ADD159:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C79]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR30]](<4 x s32>), [[PTR_ADD159]](p1) :: (store 16 + 224, align 4, addrspace 1) - ; CHECK: [[PTR_ADD160:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C80]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR31]](<4 x s32>), [[PTR_ADD160]](p1) :: (store 16 + 240, align 4, addrspace 1) + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: G_STORE [[UV]](<4 x s32>), [[COPY1]](p1) :: (store 16, align 4, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, align 4, addrspace 1) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD1]](p1) :: (store 16 + 32, align 4, addrspace 1) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD2]](p1) :: (store 16 + 48, align 4, addrspace 1) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD3]](p1) :: (store 16 + 64, align 4, addrspace 1) + ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 + ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64) + ; CHECK: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD4]](p1) :: (store 16 + 80, align 4, addrspace 1) + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 + ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64) + ; CHECK: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD5]](p1) :: (store 16 + 96, align 4, addrspace 1) + ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 + ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64) + ; CHECK: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD6]](p1) :: (store 16 + 112, align 4, addrspace 1) + ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64) + ; CHECK: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD7]](p1) :: (store 16 + 128, align 4, addrspace 1) + ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 + ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64) + ; CHECK: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD8]](p1) :: (store 16 + 144, align 4, addrspace 1) + ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 + ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64) + ; CHECK: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD9]](p1) :: (store 16 + 160, align 4, addrspace 1) + ; CHECK: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 + ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64) + ; CHECK: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD10]](p1) :: (store 16 + 176, align 4, addrspace 1) + ; CHECK: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 + ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64) + ; CHECK: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD11]](p1) :: (store 16 + 192, align 4, addrspace 1) + ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 + ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64) + ; CHECK: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD12]](p1) :: (store 16 + 208, align 4, addrspace 1) + ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 + ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64) + ; CHECK: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD13]](p1) :: (store 16 + 224, align 4, addrspace 1) + ; CHECK: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 + ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64) + ; CHECK: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD14]](p1) :: (store 16 + 240, align 4, addrspace 1) + ; CHECK: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: G_STORE [[UV16]](<4 x s32>), [[COPY2]](p1) :: (store 16, align 4, addrspace 1) + ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C]](s64) + ; CHECK: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD15]](p1) :: (store 16 + 16, align 4, addrspace 1) + ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C1]](s64) + ; CHECK: G_STORE [[UV18]](<4 x s32>), [[PTR_ADD16]](p1) :: (store 16 + 32, align 4, addrspace 1) + ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C2]](s64) + ; CHECK: G_STORE [[UV19]](<4 x s32>), [[PTR_ADD17]](p1) :: (store 16 + 48, align 4, addrspace 1) + ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C3]](s64) + ; CHECK: G_STORE [[UV20]](<4 x s32>), [[PTR_ADD18]](p1) :: (store 16 + 64, align 4, addrspace 1) + ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C4]](s64) + ; CHECK: G_STORE [[UV21]](<4 x s32>), [[PTR_ADD19]](p1) :: (store 16 + 80, align 4, addrspace 1) + ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C5]](s64) + ; CHECK: G_STORE [[UV22]](<4 x s32>), [[PTR_ADD20]](p1) :: (store 16 + 96, align 4, addrspace 1) + ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C6]](s64) + ; CHECK: G_STORE [[UV23]](<4 x s32>), [[PTR_ADD21]](p1) :: (store 16 + 112, align 4, addrspace 1) + ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C7]](s64) + ; CHECK: G_STORE [[UV24]](<4 x s32>), [[PTR_ADD22]](p1) :: (store 16 + 128, align 4, addrspace 1) + ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C8]](s64) + ; CHECK: G_STORE [[UV25]](<4 x s32>), [[PTR_ADD23]](p1) :: (store 16 + 144, align 4, addrspace 1) + ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C9]](s64) + ; CHECK: G_STORE [[UV26]](<4 x s32>), [[PTR_ADD24]](p1) :: (store 16 + 160, align 4, addrspace 1) + ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C10]](s64) + ; CHECK: G_STORE [[UV27]](<4 x s32>), [[PTR_ADD25]](p1) :: (store 16 + 176, align 4, addrspace 1) + ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C11]](s64) + ; CHECK: G_STORE [[UV28]](<4 x s32>), [[PTR_ADD26]](p1) :: (store 16 + 192, align 4, addrspace 1) + ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C12]](s64) + ; CHECK: G_STORE [[UV29]](<4 x s32>), [[PTR_ADD27]](p1) :: (store 16 + 208, align 4, addrspace 1) + ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C13]](s64) + ; CHECK: G_STORE [[UV30]](<4 x s32>), [[PTR_ADD28]](p1) :: (store 16 + 224, align 4, addrspace 1) + ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C14]](s64) + ; CHECK: G_STORE [[UV31]](<4 x s32>), [[PTR_ADD29]](p1) :: (store 16 + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 64 %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) @@ -929,390 +311,55 @@ body: | ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 + 192, align 4, addrspace 4) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) - ; CHECK: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store 4 into %stack.0, align 256, addrspace 5) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[PTR_ADD3]](p5) - ; CHECK: G_STORE [[UV1]](s32), [[COPY1]](p5) :: (store 4 into %stack.0 + 4, align 256, addrspace 5) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p5) = COPY [[PTR_ADD4]](p5) - ; CHECK: G_STORE [[UV2]](s32), [[COPY2]](p5) :: (store 4 into %stack.0 + 8, align 256, addrspace 5) - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(p5) = COPY [[PTR_ADD5]](p5) - ; CHECK: G_STORE [[UV3]](s32), [[COPY3]](p5) :: (store 4 into %stack.0 + 12, align 256, addrspace 5) - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(p5) = COPY [[PTR_ADD6]](p5) - ; CHECK: G_STORE [[UV4]](s32), [[COPY4]](p5) :: (store 4 into %stack.0 + 16, align 256, addrspace 5) - ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(p5) = COPY [[PTR_ADD7]](p5) - ; CHECK: G_STORE [[UV5]](s32), [[COPY5]](p5) :: (store 4 into %stack.0 + 20, align 256, addrspace 5) - ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(p5) = COPY [[PTR_ADD8]](p5) - ; CHECK: G_STORE [[UV6]](s32), [[COPY6]](p5) :: (store 4 into %stack.0 + 24, align 256, addrspace 5) - ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(p5) = COPY [[PTR_ADD9]](p5) - ; CHECK: G_STORE [[UV7]](s32), [[COPY7]](p5) :: (store 4 into %stack.0 + 28, align 256, addrspace 5) - ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(p5) = COPY [[PTR_ADD10]](p5) - ; CHECK: G_STORE [[UV8]](s32), [[COPY8]](p5) :: (store 4 into %stack.0 + 32, align 256, addrspace 5) - ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(p5) = COPY [[PTR_ADD11]](p5) - ; CHECK: G_STORE [[UV9]](s32), [[COPY9]](p5) :: (store 4 into %stack.0 + 36, align 256, addrspace 5) - ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(p5) = COPY [[PTR_ADD12]](p5) - ; CHECK: G_STORE [[UV10]](s32), [[COPY10]](p5) :: (store 4 into %stack.0 + 40, align 256, addrspace 5) - ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(p5) = COPY [[PTR_ADD13]](p5) - ; CHECK: G_STORE [[UV11]](s32), [[COPY11]](p5) :: (store 4 into %stack.0 + 44, align 256, addrspace 5) - ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(p5) = COPY [[PTR_ADD14]](p5) - ; CHECK: G_STORE [[UV12]](s32), [[COPY12]](p5) :: (store 4 into %stack.0 + 48, align 256, addrspace 5) - ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) - ; CHECK: [[COPY13:%[0-9]+]]:_(p5) = COPY [[PTR_ADD15]](p5) - ; CHECK: G_STORE [[UV13]](s32), [[COPY13]](p5) :: (store 4 into %stack.0 + 52, align 256, addrspace 5) - ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(p5) = COPY [[PTR_ADD16]](p5) - ; CHECK: G_STORE [[UV14]](s32), [[COPY14]](p5) :: (store 4 into %stack.0 + 56, align 256, addrspace 5) - ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) - ; CHECK: [[COPY15:%[0-9]+]]:_(p5) = COPY [[PTR_ADD17]](p5) - ; CHECK: G_STORE [[UV15]](s32), [[COPY15]](p5) :: (store 4 into %stack.0 + 60, align 256, addrspace 5) - ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:_(p5) = COPY [[PTR_ADD18]](p5) - ; CHECK: G_STORE [[UV16]](s32), [[COPY16]](p5) :: (store 4 into %stack.0 + 64, align 256, addrspace 5) - ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 - ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) - ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY [[PTR_ADD19]](p5) - ; CHECK: G_STORE [[UV17]](s32), [[COPY17]](p5) :: (store 4 into %stack.0 + 68, align 256, addrspace 5) - ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 - ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) - ; CHECK: [[COPY18:%[0-9]+]]:_(p5) = COPY [[PTR_ADD20]](p5) - ; CHECK: G_STORE [[UV18]](s32), [[COPY18]](p5) :: (store 4 into %stack.0 + 72, align 256, addrspace 5) - ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 - ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) - ; CHECK: [[COPY19:%[0-9]+]]:_(p5) = COPY [[PTR_ADD21]](p5) - ; CHECK: G_STORE [[UV19]](s32), [[COPY19]](p5) :: (store 4 into %stack.0 + 76, align 256, addrspace 5) - ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 - ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) - ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY [[PTR_ADD22]](p5) - ; CHECK: G_STORE [[UV20]](s32), [[COPY20]](p5) :: (store 4 into %stack.0 + 80, align 256, addrspace 5) - ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 - ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) - ; CHECK: [[COPY21:%[0-9]+]]:_(p5) = COPY [[PTR_ADD23]](p5) - ; CHECK: G_STORE [[UV21]](s32), [[COPY21]](p5) :: (store 4 into %stack.0 + 84, align 256, addrspace 5) - ; CHECK: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 - ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) - ; CHECK: [[COPY22:%[0-9]+]]:_(p5) = COPY [[PTR_ADD24]](p5) - ; CHECK: G_STORE [[UV22]](s32), [[COPY22]](p5) :: (store 4 into %stack.0 + 88, align 256, addrspace 5) - ; CHECK: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 - ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) - ; CHECK: [[COPY23:%[0-9]+]]:_(p5) = COPY [[PTR_ADD25]](p5) - ; CHECK: G_STORE [[UV23]](s32), [[COPY23]](p5) :: (store 4 into %stack.0 + 92, align 256, addrspace 5) - ; CHECK: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 - ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) - ; CHECK: [[COPY24:%[0-9]+]]:_(p5) = COPY [[PTR_ADD26]](p5) - ; CHECK: G_STORE [[UV24]](s32), [[COPY24]](p5) :: (store 4 into %stack.0 + 96, align 256, addrspace 5) - ; CHECK: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) - ; CHECK: [[COPY25:%[0-9]+]]:_(p5) = COPY [[PTR_ADD27]](p5) - ; CHECK: G_STORE [[UV25]](s32), [[COPY25]](p5) :: (store 4 into %stack.0 + 100, align 256, addrspace 5) - ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 - ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) - ; CHECK: [[COPY26:%[0-9]+]]:_(p5) = COPY [[PTR_ADD28]](p5) - ; CHECK: G_STORE [[UV26]](s32), [[COPY26]](p5) :: (store 4 into %stack.0 + 104, align 256, addrspace 5) - ; CHECK: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 - ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) - ; CHECK: [[COPY27:%[0-9]+]]:_(p5) = COPY [[PTR_ADD29]](p5) - ; CHECK: G_STORE [[UV27]](s32), [[COPY27]](p5) :: (store 4 into %stack.0 + 108, align 256, addrspace 5) - ; CHECK: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 - ; CHECK: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) - ; CHECK: [[COPY28:%[0-9]+]]:_(p5) = COPY [[PTR_ADD30]](p5) - ; CHECK: G_STORE [[UV28]](s32), [[COPY28]](p5) :: (store 4 into %stack.0 + 112, align 256, addrspace 5) - ; CHECK: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 - ; CHECK: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) - ; CHECK: [[COPY29:%[0-9]+]]:_(p5) = COPY [[PTR_ADD31]](p5) - ; CHECK: G_STORE [[UV29]](s32), [[COPY29]](p5) :: (store 4 into %stack.0 + 116, align 256, addrspace 5) - ; CHECK: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 - ; CHECK: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) - ; CHECK: [[COPY30:%[0-9]+]]:_(p5) = COPY [[PTR_ADD32]](p5) - ; CHECK: G_STORE [[UV30]](s32), [[COPY30]](p5) :: (store 4 into %stack.0 + 120, align 256, addrspace 5) - ; CHECK: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 - ; CHECK: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) - ; CHECK: [[COPY31:%[0-9]+]]:_(p5) = COPY [[PTR_ADD33]](p5) - ; CHECK: G_STORE [[UV31]](s32), [[COPY31]](p5) :: (store 4 into %stack.0 + 124, align 256, addrspace 5) - ; CHECK: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) - ; CHECK: [[COPY32:%[0-9]+]]:_(p5) = COPY [[PTR_ADD34]](p5) - ; CHECK: G_STORE [[UV32]](s32), [[COPY32]](p5) :: (store 4 into %stack.0 + 128, align 256, addrspace 5) - ; CHECK: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 - ; CHECK: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) - ; CHECK: [[COPY33:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) - ; CHECK: G_STORE [[UV33]](s32), [[COPY33]](p5) :: (store 4 into %stack.0 + 132, align 256, addrspace 5) - ; CHECK: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) - ; CHECK: [[COPY34:%[0-9]+]]:_(p5) = COPY [[PTR_ADD36]](p5) - ; CHECK: G_STORE [[UV34]](s32), [[COPY34]](p5) :: (store 4 into %stack.0 + 136, align 256, addrspace 5) - ; CHECK: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 - ; CHECK: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) - ; CHECK: [[COPY35:%[0-9]+]]:_(p5) = COPY [[PTR_ADD37]](p5) - ; CHECK: G_STORE [[UV35]](s32), [[COPY35]](p5) :: (store 4 into %stack.0 + 140, align 256, addrspace 5) - ; CHECK: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) - ; CHECK: [[COPY36:%[0-9]+]]:_(p5) = COPY [[PTR_ADD38]](p5) - ; CHECK: G_STORE [[UV36]](s32), [[COPY36]](p5) :: (store 4 into %stack.0 + 144, align 256, addrspace 5) - ; CHECK: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 - ; CHECK: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) - ; CHECK: [[COPY37:%[0-9]+]]:_(p5) = COPY [[PTR_ADD39]](p5) - ; CHECK: G_STORE [[UV37]](s32), [[COPY37]](p5) :: (store 4 into %stack.0 + 148, align 256, addrspace 5) - ; CHECK: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 - ; CHECK: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) - ; CHECK: [[COPY38:%[0-9]+]]:_(p5) = COPY [[PTR_ADD40]](p5) - ; CHECK: G_STORE [[UV38]](s32), [[COPY38]](p5) :: (store 4 into %stack.0 + 152, align 256, addrspace 5) - ; CHECK: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 - ; CHECK: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) - ; CHECK: [[COPY39:%[0-9]+]]:_(p5) = COPY [[PTR_ADD41]](p5) - ; CHECK: G_STORE [[UV39]](s32), [[COPY39]](p5) :: (store 4 into %stack.0 + 156, align 256, addrspace 5) - ; CHECK: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 - ; CHECK: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) - ; CHECK: [[COPY40:%[0-9]+]]:_(p5) = COPY [[PTR_ADD42]](p5) - ; CHECK: G_STORE [[UV40]](s32), [[COPY40]](p5) :: (store 4 into %stack.0 + 160, align 256, addrspace 5) - ; CHECK: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 - ; CHECK: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) - ; CHECK: [[COPY41:%[0-9]+]]:_(p5) = COPY [[PTR_ADD43]](p5) - ; CHECK: G_STORE [[UV41]](s32), [[COPY41]](p5) :: (store 4 into %stack.0 + 164, align 256, addrspace 5) - ; CHECK: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 - ; CHECK: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) - ; CHECK: [[COPY42:%[0-9]+]]:_(p5) = COPY [[PTR_ADD44]](p5) - ; CHECK: G_STORE [[UV42]](s32), [[COPY42]](p5) :: (store 4 into %stack.0 + 168, align 256, addrspace 5) - ; CHECK: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 - ; CHECK: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) - ; CHECK: [[COPY43:%[0-9]+]]:_(p5) = COPY [[PTR_ADD45]](p5) - ; CHECK: G_STORE [[UV43]](s32), [[COPY43]](p5) :: (store 4 into %stack.0 + 172, align 256, addrspace 5) - ; CHECK: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 - ; CHECK: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) - ; CHECK: [[COPY44:%[0-9]+]]:_(p5) = COPY [[PTR_ADD46]](p5) - ; CHECK: G_STORE [[UV44]](s32), [[COPY44]](p5) :: (store 4 into %stack.0 + 176, align 256, addrspace 5) - ; CHECK: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 - ; CHECK: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) - ; CHECK: [[COPY45:%[0-9]+]]:_(p5) = COPY [[PTR_ADD47]](p5) - ; CHECK: G_STORE [[UV45]](s32), [[COPY45]](p5) :: (store 4 into %stack.0 + 180, align 256, addrspace 5) - ; CHECK: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 - ; CHECK: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) - ; CHECK: [[COPY46:%[0-9]+]]:_(p5) = COPY [[PTR_ADD48]](p5) - ; CHECK: G_STORE [[UV46]](s32), [[COPY46]](p5) :: (store 4 into %stack.0 + 184, align 256, addrspace 5) - ; CHECK: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 - ; CHECK: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) - ; CHECK: [[COPY47:%[0-9]+]]:_(p5) = COPY [[PTR_ADD49]](p5) - ; CHECK: G_STORE [[UV47]](s32), [[COPY47]](p5) :: (store 4 into %stack.0 + 188, align 256, addrspace 5) - ; CHECK: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 - ; CHECK: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) - ; CHECK: [[COPY48:%[0-9]+]]:_(p5) = COPY [[PTR_ADD50]](p5) - ; CHECK: G_STORE [[UV48]](s32), [[COPY48]](p5) :: (store 4 into %stack.0 + 192, align 256, addrspace 5) - ; CHECK: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 - ; CHECK: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) - ; CHECK: [[COPY49:%[0-9]+]]:_(p5) = COPY [[PTR_ADD51]](p5) - ; CHECK: G_STORE [[UV49]](s32), [[COPY49]](p5) :: (store 4 into %stack.0 + 196, align 256, addrspace 5) - ; CHECK: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 - ; CHECK: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) - ; CHECK: [[COPY50:%[0-9]+]]:_(p5) = COPY [[PTR_ADD52]](p5) - ; CHECK: G_STORE [[UV50]](s32), [[COPY50]](p5) :: (store 4 into %stack.0 + 200, align 256, addrspace 5) - ; CHECK: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 - ; CHECK: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) - ; CHECK: [[COPY51:%[0-9]+]]:_(p5) = COPY [[PTR_ADD53]](p5) - ; CHECK: G_STORE [[UV51]](s32), [[COPY51]](p5) :: (store 4 into %stack.0 + 204, align 256, addrspace 5) - ; CHECK: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 - ; CHECK: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) - ; CHECK: [[COPY52:%[0-9]+]]:_(p5) = COPY [[PTR_ADD54]](p5) - ; CHECK: G_STORE [[UV52]](s32), [[COPY52]](p5) :: (store 4 into %stack.0 + 208, align 256, addrspace 5) - ; CHECK: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 - ; CHECK: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) - ; CHECK: [[COPY53:%[0-9]+]]:_(p5) = COPY [[PTR_ADD55]](p5) - ; CHECK: G_STORE [[UV53]](s32), [[COPY53]](p5) :: (store 4 into %stack.0 + 212, align 256, addrspace 5) - ; CHECK: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 - ; CHECK: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) - ; CHECK: [[COPY54:%[0-9]+]]:_(p5) = COPY [[PTR_ADD56]](p5) - ; CHECK: G_STORE [[UV54]](s32), [[COPY54]](p5) :: (store 4 into %stack.0 + 216, align 256, addrspace 5) - ; CHECK: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 - ; CHECK: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) - ; CHECK: [[COPY55:%[0-9]+]]:_(p5) = COPY [[PTR_ADD57]](p5) - ; CHECK: G_STORE [[UV55]](s32), [[COPY55]](p5) :: (store 4 into %stack.0 + 220, align 256, addrspace 5) - ; CHECK: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 - ; CHECK: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) - ; CHECK: [[COPY56:%[0-9]+]]:_(p5) = COPY [[PTR_ADD58]](p5) - ; CHECK: G_STORE [[UV56]](s32), [[COPY56]](p5) :: (store 4 into %stack.0 + 224, align 256, addrspace 5) - ; CHECK: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 - ; CHECK: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) - ; CHECK: [[COPY57:%[0-9]+]]:_(p5) = COPY [[PTR_ADD59]](p5) - ; CHECK: G_STORE [[UV57]](s32), [[COPY57]](p5) :: (store 4 into %stack.0 + 228, align 256, addrspace 5) - ; CHECK: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 - ; CHECK: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) - ; CHECK: [[COPY58:%[0-9]+]]:_(p5) = COPY [[PTR_ADD60]](p5) - ; CHECK: G_STORE [[UV58]](s32), [[COPY58]](p5) :: (store 4 into %stack.0 + 232, align 256, addrspace 5) - ; CHECK: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 - ; CHECK: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) - ; CHECK: [[COPY59:%[0-9]+]]:_(p5) = COPY [[PTR_ADD61]](p5) - ; CHECK: G_STORE [[UV59]](s32), [[COPY59]](p5) :: (store 4 into %stack.0 + 236, align 256, addrspace 5) - ; CHECK: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 - ; CHECK: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) - ; CHECK: [[COPY60:%[0-9]+]]:_(p5) = COPY [[PTR_ADD62]](p5) - ; CHECK: G_STORE [[UV60]](s32), [[COPY60]](p5) :: (store 4 into %stack.0 + 240, align 256, addrspace 5) - ; CHECK: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 - ; CHECK: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) - ; CHECK: [[COPY61:%[0-9]+]]:_(p5) = COPY [[PTR_ADD63]](p5) - ; CHECK: G_STORE [[UV61]](s32), [[COPY61]](p5) :: (store 4 into %stack.0 + 244, align 256, addrspace 5) - ; CHECK: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 - ; CHECK: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) - ; CHECK: [[COPY62:%[0-9]+]]:_(p5) = COPY [[PTR_ADD64]](p5) - ; CHECK: G_STORE [[UV62]](s32), [[COPY62]](p5) :: (store 4 into %stack.0 + 248, align 256, addrspace 5) - ; CHECK: [[C66:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 - ; CHECK: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C66]](s32) - ; CHECK: [[COPY63:%[0-9]+]]:_(p5) = COPY [[PTR_ADD65]](p5) - ; CHECK: G_STORE [[UV63]](s32), [[COPY63]](p5) :: (store 4 into %stack.0 + 252, align 256, addrspace 5) - ; CHECK: G_STORE [[C3]](s32), [[PTR_ADD35]](p5) :: (store 4 into %stack.0 + 132, addrspace 5) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load 4 from %stack.0 + 132, align 256, addrspace 5) - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from %stack.0 + 136, align 256, addrspace 5) - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from %stack.0 + 140, align 256, addrspace 5) - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 4 from %stack.0 + 144, align 256, addrspace 5) - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 4 from %stack.0 + 148, align 256, addrspace 5) - ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 4 from %stack.0 + 152, align 256, addrspace 5) - ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 4 from %stack.0 + 156, align 256, addrspace 5) - ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 4 from %stack.0 + 160, align 256, addrspace 5) - ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 4 from %stack.0 + 164, align 256, addrspace 5) - ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 4 from %stack.0 + 168, align 256, addrspace 5) - ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 4 from %stack.0 + 172, align 256, addrspace 5) - ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 4 from %stack.0 + 176, align 256, addrspace 5) - ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 4 from %stack.0 + 180, align 256, addrspace 5) - ; CHECK: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load 4 from %stack.0 + 184, align 256, addrspace 5) - ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load 4 from %stack.0 + 188, align 256, addrspace 5) - ; CHECK: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load 4 from %stack.0 + 192, align 256, addrspace 5) - ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load 4 from %stack.0 + 196, align 256, addrspace 5) - ; CHECK: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load 4 from %stack.0 + 200, align 256, addrspace 5) - ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load 4 from %stack.0 + 204, align 256, addrspace 5) - ; CHECK: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load 4 from %stack.0 + 208, align 256, addrspace 5) - ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load 4 from %stack.0 + 212, align 256, addrspace 5) - ; CHECK: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p5) :: (load 4 from %stack.0 + 216, align 256, addrspace 5) - ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p5) :: (load 4 from %stack.0 + 220, align 256, addrspace 5) - ; CHECK: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p5) :: (load 4 from %stack.0 + 224, align 256, addrspace 5) - ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p5) :: (load 4 from %stack.0 + 228, align 256, addrspace 5) - ; CHECK: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p5) :: (load 4 from %stack.0 + 232, align 256, addrspace 5) - ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p5) :: (load 4 from %stack.0 + 236, align 256, addrspace 5) - ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p5) :: (load 4 from %stack.0 + 240, align 256, addrspace 5) - ; CHECK: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p5) :: (load 4 from %stack.0 + 244, align 256, addrspace 5) - ; CHECK: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD31]](p5) :: (load 4 from %stack.0 + 248, align 256, addrspace 5) - ; CHECK: [[LOAD34:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD32]](p5) :: (load 4 from %stack.0 + 252, align 256, addrspace 5) - ; CHECK: [[LOAD35:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD33]](p5) :: (load 4 from %stack.0 + 256, align 256, addrspace 5) - ; CHECK: [[LOAD36:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD34]](p5) :: (load 4 from %stack.0 + 260, align 256, addrspace 5) - ; CHECK: [[COPY64:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) - ; CHECK: [[LOAD37:%[0-9]+]]:_(s32) = G_LOAD [[COPY64]](p5) :: (load 4 from %stack.0 + 264, align 256, addrspace 5) - ; CHECK: [[LOAD38:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD36]](p5) :: (load 4 from %stack.0 + 268, align 256, addrspace 5) - ; CHECK: [[LOAD39:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD37]](p5) :: (load 4 from %stack.0 + 272, align 256, addrspace 5) - ; CHECK: [[LOAD40:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD38]](p5) :: (load 4 from %stack.0 + 276, align 256, addrspace 5) - ; CHECK: [[LOAD41:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD39]](p5) :: (load 4 from %stack.0 + 280, align 256, addrspace 5) - ; CHECK: [[LOAD42:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD40]](p5) :: (load 4 from %stack.0 + 284, align 256, addrspace 5) - ; CHECK: [[LOAD43:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD41]](p5) :: (load 4 from %stack.0 + 288, align 256, addrspace 5) - ; CHECK: [[LOAD44:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD42]](p5) :: (load 4 from %stack.0 + 292, align 256, addrspace 5) - ; CHECK: [[LOAD45:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD43]](p5) :: (load 4 from %stack.0 + 296, align 256, addrspace 5) - ; CHECK: [[LOAD46:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD44]](p5) :: (load 4 from %stack.0 + 300, align 256, addrspace 5) - ; CHECK: [[LOAD47:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD45]](p5) :: (load 4 from %stack.0 + 304, align 256, addrspace 5) - ; CHECK: [[LOAD48:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD46]](p5) :: (load 4 from %stack.0 + 308, align 256, addrspace 5) - ; CHECK: [[LOAD49:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD47]](p5) :: (load 4 from %stack.0 + 312, align 256, addrspace 5) - ; CHECK: [[LOAD50:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD48]](p5) :: (load 4 from %stack.0 + 316, align 256, addrspace 5) - ; CHECK: [[LOAD51:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD49]](p5) :: (load 4 from %stack.0 + 320, align 256, addrspace 5) - ; CHECK: [[LOAD52:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD50]](p5) :: (load 4 from %stack.0 + 324, align 256, addrspace 5) - ; CHECK: [[LOAD53:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD51]](p5) :: (load 4 from %stack.0 + 328, align 256, addrspace 5) - ; CHECK: [[LOAD54:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD52]](p5) :: (load 4 from %stack.0 + 332, align 256, addrspace 5) - ; CHECK: [[LOAD55:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD53]](p5) :: (load 4 from %stack.0 + 336, align 256, addrspace 5) - ; CHECK: [[LOAD56:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD54]](p5) :: (load 4 from %stack.0 + 340, align 256, addrspace 5) - ; CHECK: [[LOAD57:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD55]](p5) :: (load 4 from %stack.0 + 344, align 256, addrspace 5) - ; CHECK: [[LOAD58:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD56]](p5) :: (load 4 from %stack.0 + 348, align 256, addrspace 5) - ; CHECK: [[LOAD59:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD57]](p5) :: (load 4 from %stack.0 + 352, align 256, addrspace 5) - ; CHECK: [[LOAD60:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD58]](p5) :: (load 4 from %stack.0 + 356, align 256, addrspace 5) - ; CHECK: [[LOAD61:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD59]](p5) :: (load 4 from %stack.0 + 360, align 256, addrspace 5) - ; CHECK: [[LOAD62:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD60]](p5) :: (load 4 from %stack.0 + 364, align 256, addrspace 5) - ; CHECK: [[LOAD63:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD61]](p5) :: (load 4 from %stack.0 + 368, align 256, addrspace 5) - ; CHECK: [[LOAD64:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD62]](p5) :: (load 4 from %stack.0 + 372, align 256, addrspace 5) - ; CHECK: [[LOAD65:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD63]](p5) :: (load 4 from %stack.0 + 376, align 256, addrspace 5) - ; CHECK: [[LOAD66:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD64]](p5) :: (load 4 from %stack.0 + 380, align 256, addrspace 5) - ; CHECK: [[LOAD67:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD65]](p5) :: (load 4 from %stack.0 + 384, align 256, addrspace 5) - ; CHECK: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32) - ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32) - ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32) - ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32) - ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD32]](s32), [[LOAD33]](s32), [[LOAD34]](s32), [[LOAD35]](s32) - ; CHECK: [[BUILD_VECTOR8:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD36]](s32), [[LOAD37]](s32), [[LOAD38]](s32), [[LOAD39]](s32) - ; CHECK: [[BUILD_VECTOR9:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD40]](s32), [[LOAD41]](s32), [[LOAD42]](s32), [[LOAD43]](s32) - ; CHECK: [[BUILD_VECTOR10:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD44]](s32), [[LOAD45]](s32), [[LOAD46]](s32), [[LOAD47]](s32) - ; CHECK: [[BUILD_VECTOR11:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD48]](s32), [[LOAD49]](s32), [[LOAD50]](s32), [[LOAD51]](s32) - ; CHECK: [[BUILD_VECTOR12:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD52]](s32), [[LOAD53]](s32), [[LOAD54]](s32), [[LOAD55]](s32) - ; CHECK: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD56]](s32), [[LOAD57]](s32), [[LOAD58]](s32), [[LOAD59]](s32) - ; CHECK: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD60]](s32), [[LOAD61]](s32), [[LOAD62]](s32), [[LOAD63]](s32) - ; CHECK: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD64]](s32), [[LOAD65]](s32), [[LOAD66]](s32), [[LOAD67]](s32) - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY65]](p1) :: (store 16, align 4, addrspace 1) - ; CHECK: [[C67:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD66:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C67]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD66]](p1) :: (store 16 + 16, align 4, addrspace 1) - ; CHECK: [[C68:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C68]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD67]](p1) :: (store 16 + 32, align 4, addrspace 1) - ; CHECK: [[C69:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD68:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C69]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD68]](p1) :: (store 16 + 48, align 4, addrspace 1) - ; CHECK: [[PTR_ADD69:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD69]](p1) :: (store 16 + 64, align 4, addrspace 1) - ; CHECK: [[C70:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 - ; CHECK: [[PTR_ADD70:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C70]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD70]](p1) :: (store 16 + 80, align 4, addrspace 1) - ; CHECK: [[C71:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 - ; CHECK: [[PTR_ADD71:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C71]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD71]](p1) :: (store 16 + 96, align 4, addrspace 1) - ; CHECK: [[C72:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 - ; CHECK: [[PTR_ADD72:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C72]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD72]](p1) :: (store 16 + 112, align 4, addrspace 1) - ; CHECK: [[PTR_ADD73:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C1]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR8]](<4 x s32>), [[PTR_ADD73]](p1) :: (store 16 + 128, align 4, addrspace 1) - ; CHECK: [[C73:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK: [[PTR_ADD74:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C73]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR9]](<4 x s32>), [[PTR_ADD74]](p1) :: (store 16 + 144, align 4, addrspace 1) - ; CHECK: [[C74:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 - ; CHECK: [[PTR_ADD75:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C74]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR10]](<4 x s32>), [[PTR_ADD75]](p1) :: (store 16 + 160, align 4, addrspace 1) - ; CHECK: [[C75:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 - ; CHECK: [[PTR_ADD76:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C75]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR11]](<4 x s32>), [[PTR_ADD76]](p1) :: (store 16 + 176, align 4, addrspace 1) - ; CHECK: [[PTR_ADD77:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C2]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR12]](<4 x s32>), [[PTR_ADD77]](p1) :: (store 16 + 192, align 4, addrspace 1) - ; CHECK: [[C76:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 - ; CHECK: [[PTR_ADD78:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C76]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR13]](<4 x s32>), [[PTR_ADD78]](p1) :: (store 16 + 208, align 4, addrspace 1) - ; CHECK: [[C77:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 - ; CHECK: [[PTR_ADD79:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C77]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR14]](<4 x s32>), [[PTR_ADD79]](p1) :: (store 16 + 224, align 4, addrspace 1) - ; CHECK: [[C78:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 - ; CHECK: [[PTR_ADD80:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C78]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR15]](<4 x s32>), [[PTR_ADD80]](p1) :: (store 16 + 240, align 4, addrspace 1) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[LOAD2]](<16 x s32>), [[LOAD3]](<16 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<32 x s32>) = G_INSERT [[CONCAT_VECTORS]], [[C3]](s32), 32 + ; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) + ; CHECK: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) + ; CHECK: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[INSERT]](<32 x s32>) + ; CHECK: G_STORE [[UV]](<4 x s32>), [[COPY1]](p1) :: (store 16, align 4, addrspace 1) + ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64) + ; CHECK: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD3]](p1) :: (store 16 + 16, align 4, addrspace 1) + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64) + ; CHECK: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD4]](p1) :: (store 16 + 32, align 4, addrspace 1) + ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64) + ; CHECK: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD5]](p1) :: (store 16 + 48, align 4, addrspace 1) + ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD6]](p1) :: (store 16 + 64, align 4, addrspace 1) + ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 + ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64) + ; CHECK: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD7]](p1) :: (store 16 + 80, align 4, addrspace 1) + ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 + ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64) + ; CHECK: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD8]](p1) :: (store 16 + 96, align 4, addrspace 1) + ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 + ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64) + ; CHECK: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD9]](p1) :: (store 16 + 112, align 4, addrspace 1) + ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD10]](p1) :: (store 16 + 128, align 4, addrspace 1) + ; CHECK: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 + ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64) + ; CHECK: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD11]](p1) :: (store 16 + 144, align 4, addrspace 1) + ; CHECK: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 + ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64) + ; CHECK: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD12]](p1) :: (store 16 + 160, align 4, addrspace 1) + ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 + ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64) + ; CHECK: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD13]](p1) :: (store 16 + 176, align 4, addrspace 1) + ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD14]](p1) :: (store 16 + 192, align 4, addrspace 1) + ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 + ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64) + ; CHECK: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD15]](p1) :: (store 16 + 208, align 4, addrspace 1) + ; CHECK: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 + ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64) + ; CHECK: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD16]](p1) :: (store 16 + 224, align 4, addrspace 1) + ; CHECK: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 + ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C15]](s64) + ; CHECK: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD17]](p1) :: (store 16 + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll index e5d26476e94248..f3a53fb7d22d50 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll @@ -674,8 +674,7 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrs ; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x180000 -; GFX6-NEXT: s_lshl_b32 s0, s0, 8 -; GFX6-NEXT: s_ashr_i32 s0, s0, 8 +; GFX6-NEXT: s_bfe_i32 s0, s0, 0x180000 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm @@ -830,8 +829,7 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s2, s2, s0 ; GFX6-NEXT: s_bfe_i32 s0, s2, 0x80000 -; GFX6-NEXT: s_lshl_b32 s0, s0, 24 -; GFX6-NEXT: s_ashr_i32 s0, s0, 24 +; GFX6-NEXT: s_sext_i32_i8 s0, s0 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm @@ -854,8 +852,7 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %ou ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s2, s2, s0 ; GFX6-NEXT: s_bfe_i32 s0, s2, 8 -; GFX6-NEXT: s_lshl_b32 s0, s0, 24 -; GFX6-NEXT: s_ashr_i32 s0, s0, 24 +; GFX6-NEXT: s_sext_i32_i8 s0, s0 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm @@ -879,8 +876,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 add ; GFX6-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX6-NEXT: v_ashrrev_i32_e32 v0, 24, v0 +; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm %load = load i8, i8 addrspace(1)* %ptr, align 1 @@ -904,8 +900,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 a ; GFX6-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 8, 0 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX6-NEXT: v_ashrrev_i32_e32 v0, 24, v0 +; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm %load = load i8, i8 addrspace(1)* %ptr, align 1 @@ -927,8 +922,7 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b32 s0, s0, 31 -; GFX6-NEXT: s_ashr_i32 s0, s0, 31 +; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -951,8 +945,7 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b32 s0, s0, 30 -; GFX6-NEXT: s_ashr_i32 s0, s0, 30 +; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10001 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -975,8 +968,7 @@ define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b32 s0, s0, 30 -; GFX6-NEXT: s_ashr_i32 s0, s0, 30 +; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20001 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll index ab3fbc03e81d57..a8098b7dd9d159 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll @@ -423,8 +423,7 @@ define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b32 s0, s0, 31 -; GFX6-NEXT: s_ashr_i32 s0, s0, 31 +; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -950,22 +949,22 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, ; GFX6-LABEL: simplify_bfe_u32_multi_use_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX6-NEXT: s_mov_b32 s6, -1 -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_and_b32 s0, s0, 63 -; GFX6-NEXT: s_bfe_u32 s1, s0, 0x20002 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_store_dword v1, off, s[4:7], 0 -; GFX6-NEXT: buffer_store_dword v0, off, s[8:11], 0 -; GFX6-NEXT: s_endpgm +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_and_b32 s0, s0, 63 +; GFX6-NEXT: s_bfe_u32 s1, s0, 0x20002 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: buffer_store_dword v1, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; GFX6-NEXT: s_endpgm i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 { %src = load i32, i32 addrspace(1)* %in, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index f6565fe1b6e24a..db9e75dd582c82 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -3415,8 +3415,7 @@ define i64 @v_sdiv_i64_24bit(i64 %num, i64 %den) { ; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2| ; CGP-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0 -; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0 +; CGP-NEXT: v_bfe_i32 v0, v0, 0, 25 ; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; CGP-NEXT: s_setpc_b64 s[30:31] %num.mask = and i64 %num, 16777215 @@ -3736,10 +3735,8 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v5|, |v3| ; CGP-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0 -; CGP-NEXT: v_lshlrev_b32_e32 v2, 7, v2 -; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0 -; CGP-NEXT: v_ashrrev_i32_e32 v2, 7, v2 +; CGP-NEXT: v_bfe_i32 v0, v0, 0, 25 +; CGP-NEXT: v_bfe_i32 v2, v2, 0, 25 ; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; CGP-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index 06d46321a59b61..7f55c735859753 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -3363,8 +3363,7 @@ define i64 @v_srem_i64_24bit(i64 %num, i64 %den) { ; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; CGP-NEXT: v_mul_lo_u32 v1, v2, v1 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 -; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0 -; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0 +; CGP-NEXT: v_bfe_i32 v0, v0, 0, 25 ; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; CGP-NEXT: s_setpc_b64 s[30:31] %num.mask = and i64 %num, 16777215 @@ -3677,20 +3676,18 @@ define <2 x i64> @v_srem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_rcp_f32_e32 v5, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v6, 30, v6 ; CGP-NEXT: v_or_b32_e32 v6, 1, v6 -; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0 +; CGP-NEXT: v_bfe_i32 v0, v0, 0, 25 ; CGP-NEXT: v_mul_f32_e32 v5, v1, v5 ; CGP-NEXT: v_trunc_f32_e32 v5, v5 ; CGP-NEXT: v_mad_f32 v1, -v5, v4, v1 ; CGP-NEXT: v_cvt_i32_f32_e32 v5, v5 ; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v4| ; CGP-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc -; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1 ; CGP-NEXT: v_mul_lo_u32 v3, v1, v3 ; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 -; CGP-NEXT: v_lshlrev_b32_e32 v2, 7, v2 -; CGP-NEXT: v_ashrrev_i32_e32 v2, 7, v2 +; CGP-NEXT: v_bfe_i32 v2, v2, 0, 25 ; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; CGP-NEXT: s_setpc_b64 s[30:31] %num.mask = and <2 x i64> %num, diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll index 3224d8a3594ad8..db4032efceabb3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll @@ -2,6 +2,10 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s +; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,NOHSA %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s + @lds0 = addrspace(3) global [512 x float] undef, align 4 @lds1 = addrspace(3) global [256 x float] undef, align 4 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-symbol-rename.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-symbol-rename.ll index 72502f925d29de..f486fc9524c3d3 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-symbol-rename.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-symbol-rename.ll @@ -18,6 +18,9 @@ ; This is f"o" @"f\22o\22" = common global i32 0, align 4 +; This is f=o +@"f\3do" = internal global i32 0, align 4 + define internal i32 @f$o() { entry: %call = call i32 bitcast (i32 (...)* @"f\40o" to i32 ()*)() @@ -27,8 +30,10 @@ entry: ; This is f&o define i32 @"f\26o"() { entry: - %call = call i32 @f$o() - ret i32 %call + %tmp = call i32 @f$o() + %tmp1 = load i32, i32* @"f\3do" + %tmp2 = add i32 %tmp, %tmp1 + ret i32 %tmp2 } ; This is f&_o @@ -84,12 +89,17 @@ declare i32 @"f\40o"(...) ; ASM-NEXT: .vbyte 4, 10 # 0xa ; ASM-NEXT: .comm _Renamed..2222f_o_[RW],4,2 ; ASM-NEXT: .rename _Renamed..2222f_o_[RW],"f""o""" +; ASM-NEXT: .lcomm _Renamed..3df_o,4,_Renamed..3df_o[BS],2 +; ASM-NEXT: .rename _Renamed..3df_o[BS],"f=o" ; ASM-NEXT: .extern ._Renamed..40f_o[PR] ; ASM-NEXT: .rename ._Renamed..40f_o[PR],".f@o" ; ASM-NEXT: .extern _Renamed..40f_o[DS] ; ASM-NEXT: .rename _Renamed..40f_o[DS],"f@o" ; ASM-NEXT: .toc ; ASM-NEXT: L..C0: +; ASM-NEXT: .tc _Renamed..3df_o[TC],_Renamed..3df_o[BS] +; ASM-NEXT: .rename _Renamed..3df_o[TC],"f=o" +; ASM-NEXT: L..C1: ; ASM-NEXT: .tc _Renamed..40f_o[TC],_Renamed..40f_o[DS] ; ASM-NEXT: .rename _Renamed..40f_o[TC],"f@o" @@ -115,47 +125,59 @@ declare i32 @"f\40o"(...) ; OBJ-NEXT: 34: 90 01 00 08 stw 0, 8(1) ; OBJ-NEXT: 38: 94 21 ff c0 stwu 1, -64(1) ; OBJ-NEXT: 3c: 4b ff ff c5 bl 0x0 -; OBJ-NEXT: 40: 38 21 00 40 addi 1, 1, 64 -; OBJ-NEXT: 44: 80 01 00 08 lwz 0, 8(1) -; OBJ-NEXT: 48: 7c 08 03 a6 mtlr 0 -; OBJ-NEXT: 4c: 4e 80 00 20 blr +; OBJ-NEXT: 40: 80 82 00 00 lwz 4, 0(2) +; OBJ-NEXT: 00000042: R_TOC (idx: 24) f=o[TC] +; OBJ-NEXT: 44: 80 84 00 00 lwz 4, 0(4) +; OBJ-NEXT: 48: 7c 63 22 14 add 3, 3, 4 +; OBJ-NEXT: 4c: 38 21 00 40 addi 1, 1, 64 +; OBJ-NEXT: 50: 80 01 00 08 lwz 0, 8(1) +; OBJ-NEXT: 54: 7c 08 03 a6 mtlr 0 +; OBJ-NEXT: 58: 4e 80 00 20 blr +; OBJ-NEXT: 5c: 60 00 00 00 nop ; OBJ-EMPTY: -; OBJ-NEXT: 00000050 (idx: 10) .f&_o: -; OBJ-NEXT: 50: 80 62 00 00 lwz 3, 0(2) -; OBJ-NEXT: 00000052: R_TOC (idx: 24) f@o[TC] -; OBJ-NEXT: 54: 4e 80 00 20 blr +; OBJ-NEXT: 00000060 (idx: 10) .f&_o: +; OBJ-NEXT: 60: 80 62 00 04 lwz 3, 4(2) +; OBJ-NEXT: 00000062: R_TOC (idx: 26) f@o[TC] +; OBJ-NEXT: 64: 4e 80 00 20 blr ; OBJ-EMPTY: ; OBJ-NEXT: Disassembly of section .data: ; OBJ-EMPTY: -; OBJ-NEXT: 00000058 (idx: 14) f`o: -; OBJ-NEXT: 58: 00 00 00 0a +; OBJ-NEXT: 00000068 (idx: 14) f`o: +; OBJ-NEXT: 68: 00 00 00 0a ; OBJ-EMPTY: -; OBJ-NEXT: 0000005c (idx: 16) f$o[DS]: -; OBJ-NEXT: 5c: 00 00 00 00 -; OBJ-NEXT: 0000005c: R_POS (idx: 6) .f$o -; OBJ-NEXT: 60: 00 00 00 80 -; OBJ-NEXT: 00000060: R_POS (idx: 22) TOC[TC0] -; OBJ-NEXT: 64: 00 00 00 00 +; OBJ-NEXT: 0000006c (idx: 16) f$o[DS]: +; OBJ-NEXT: 6c: 00 00 00 00 +; OBJ-NEXT: 0000006c: R_POS (idx: 6) .f$o +; OBJ-NEXT: 70: 00 00 00 90 +; OBJ-NEXT: 00000070: R_POS (idx: 22) TOC[TC0] +; OBJ-NEXT: 74: 00 00 00 00 ; OBJ-EMPTY: -; OBJ-NEXT: 00000068 (idx: 18) f&o[DS]: -; OBJ-NEXT: 68: 00 00 00 30 -; OBJ-NEXT: 00000068: R_POS (idx: 8) .f&o -; OBJ-NEXT: 6c: 00 00 00 80 -; OBJ-NEXT: 0000006c: R_POS (idx: 22) TOC[TC0] -; OBJ-NEXT: 70: 00 00 00 00 +; OBJ-NEXT: 00000078 (idx: 18) f&o[DS]: +; OBJ-NEXT: 78: 00 00 00 30 +; OBJ-NEXT: 00000078: R_POS (idx: 8) .f&o +; OBJ-NEXT: 7c: 00 00 00 90 +; OBJ-NEXT: 0000007c: R_POS (idx: 22) TOC[TC0] +; OBJ-NEXT: 80: 00 00 00 00 ; OBJ-EMPTY: -; OBJ-NEXT: 00000074 (idx: 20) f&_o[DS]: -; OBJ-NEXT: 74: 00 00 00 50 -; OBJ-NEXT: 00000074: R_POS (idx: 10) .f&_o -; OBJ-NEXT: 78: 00 00 00 80 -; OBJ-NEXT: 00000078: R_POS (idx: 22) TOC[TC0] -; OBJ-NEXT: 7c: 00 00 00 00 +; OBJ-NEXT: 00000084 (idx: 20) f&_o[DS]: +; OBJ-NEXT: 84: 00 00 00 60 +; OBJ-NEXT: 00000084: R_POS (idx: 10) .f&_o +; OBJ-NEXT: 88: 00 00 00 90 +; OBJ-NEXT: 00000088: R_POS (idx: 22) TOC[TC0] +; OBJ-NEXT: 8c: 00 00 00 00 ; OBJ-EMPTY: -; OBJ-NEXT: 00000080 (idx: 24) f@o[TC]: -; OBJ-NEXT: 80: 00 00 00 00 -; OBJ-NEXT: 00000080: R_POS (idx: 2) f@o[DS] +; OBJ-NEXT: 00000090 (idx: 24) f=o[TC]: +; OBJ-NEXT: 90: 00 00 00 9c +; OBJ-NEXT: 00000090: R_POS (idx: 30) f=o[BS] +; OBJ-EMPTY: +; OBJ-NEXT: 00000094 (idx: 26) f@o[TC]: +; OBJ-NEXT: 94: 00 00 00 00 +; OBJ-NEXT: 00000094: R_POS (idx: 2) f@o[DS] ; OBJ-EMPTY: ; OBJ-NEXT: Disassembly of section .bss: ; OBJ-EMPTY: -; OBJ-NEXT: 00000084 (idx: 26) f"o"[RW]: +; OBJ-NEXT: 00000098 (idx: 28) f"o"[RW]: +; OBJ-NEXT: ... +; OBJ-EMPTY: +; OBJ-NEXT: 0000009c (idx: 30) f=o[BS]: ; OBJ-NEXT: ... diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll new file mode 100644 index 00000000000000..2b1cf27c20ec9a --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; This test case aims to test the vector mask manipulation operations +; on Power10. + +declare i32 @llvm.ppc.altivec.vextractbm(<16 x i8>) +declare i32 @llvm.ppc.altivec.vextracthm(<8 x i16>) +declare i32 @llvm.ppc.altivec.vextractwm(<4 x i32>) +declare i32 @llvm.ppc.altivec.vextractdm(<2 x i64>) +declare i32 @llvm.ppc.altivec.vextractqm(<1 x i128>) + +define i32 @test_vextractbm(<16 x i8> %a) { +; CHECK-LABEL: test_vextractbm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextractbm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextractbm(<16 x i8> %a) + ret i32 %ext +} + +define i32 @test_vextracthm(<8 x i16> %a) { +; CHECK-LABEL: test_vextracthm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextracthm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextracthm(<8 x i16> %a) + ret i32 %ext +} + +define i32 @test_vextractwm(<4 x i32> %a) { +; CHECK-LABEL: test_vextractwm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextractwm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextractwm(<4 x i32> %a) + ret i32 %ext +} + +define i32 @test_vextractdm(<2 x i64> %a) { +; CHECK-LABEL: test_vextractdm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextractdm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextractdm(<2 x i64> %a) + ret i32 %ext +} + +define i32 @test_vextractqm(<1 x i128> %a) { +; CHECK-LABEL: test_vextractqm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextractqm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextractqm(<1 x i128> %a) + ret i32 %ext +} diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir new file mode 100644 index 00000000000000..9a5856335dfc67 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir @@ -0,0 +1,269 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -run-pass=arm-low-overhead-loops -tail-predication=enabled %s -o - | FileCheck %s + +--- | + define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) #0 { + entry: + %0 = add i32 %blockSize, 3 + %1 = icmp slt i32 %blockSize, 4 + %smin = select i1 %1, i32 %blockSize, i32 4 + %2 = sub i32 %0, %smin + %3 = lshr i32 %2, 2 + %4 = add nuw nsw i32 %3, 1 + %5 = icmp slt i32 %blockSize, 4 + %smin3 = select i1 %5, i32 %blockSize, i32 4 + %6 = sub i32 %0, %smin3 + %7 = lshr i32 %6, 2 + %8 = add nuw nsw i32 %7, 1 + call void @llvm.set.loop.iterations.i32(i32 %8) + br label %do.body.i + + do.body.i: ; preds = %do.body.i, %entry + %blkCnt.0.i = phi i32 [ %13, %do.body.i ], [ %blockSize, %entry ] + %sumVec.0.i = phi <4 x float> [ %12, %do.body.i ], [ zeroinitializer, %entry ] + %pSrc.addr.0.i = phi float* [ %add.ptr.i, %do.body.i ], [ %pSrc, %entry ] + %9 = phi i32 [ %8, %entry ], [ %14, %do.body.i ] + %pSrc.addr.0.i2 = bitcast float* %pSrc.addr.0.i to <4 x float>* + %10 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0.i) + %11 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.0.i2, i32 4, <4 x i1> %10, <4 x float> zeroinitializer) + %12 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %sumVec.0.i, <4 x float> %11, <4 x i1> %10, <4 x float> %sumVec.0.i) + %add.ptr.i = getelementptr inbounds float, float* %pSrc.addr.0.i, i32 4 + %13 = add i32 %blkCnt.0.i, -4 + %14 = call i32 @llvm.loop.decrement.reg.i32(i32 %9, i32 1) + %15 = icmp ne i32 %14, 0 + br i1 %15, label %do.body.i, label %arm_mean_f32_mve.exit + + arm_mean_f32_mve.exit: ; preds = %do.body.i + %16 = extractelement <4 x float> %12, i32 3 + %add2.i.i = fadd fast float %16, %16 + %conv.i = uitofp i32 %blockSize to float + %div.i = fdiv fast float %add2.i.i, %conv.i + %17 = bitcast float %div.i to i32 + %18 = insertelement <4 x i32> undef, i32 %17, i64 0 + %19 = shufflevector <4 x i32> %18, <4 x i32> undef, <4 x i32> zeroinitializer + %20 = bitcast <4 x i32> %19 to <4 x float> + call void @llvm.set.loop.iterations.i32(i32 %4) + br label %do.body + + do.body: ; preds = %do.body, %arm_mean_f32_mve.exit + %blkCnt.0 = phi i32 [ %blockSize, %arm_mean_f32_mve.exit ], [ %26, %do.body ] + %sumVec.0 = phi <4 x float> [ zeroinitializer, %arm_mean_f32_mve.exit ], [ %25, %do.body ] + %pSrc.addr.0 = phi float* [ %pSrc, %arm_mean_f32_mve.exit ], [ %add.ptr, %do.body ] + %21 = phi i32 [ %4, %arm_mean_f32_mve.exit ], [ %27, %do.body ] + %pSrc.addr.01 = bitcast float* %pSrc.addr.0 to <4 x float>* + %22 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0) + %23 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.01, i32 4, <4 x i1> %22, <4 x float> zeroinitializer) + %24 = tail call fast <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %23, <4 x float> %20, <4 x i1> %22, <4 x float> undef) + %25 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %24, <4 x float> %24, <4 x float> %sumVec.0, <4 x i1> %22) + %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4 + %26 = add i32 %blkCnt.0, -4 + %27 = call i32 @llvm.loop.decrement.reg.i32(i32 %21, i32 1) + %28 = icmp ne i32 %27, 0 + br i1 %28, label %do.body, label %do.end + + do.end: ; preds = %do.body + %29 = extractelement <4 x float> %25, i32 3 + %add2.i = fadd fast float %29, %29 + %sub2 = add i32 %blockSize, -1 + %conv = uitofp i32 %sub2 to float + %div = fdiv fast float %add2.i, %conv + store float %div, float* %pResult, align 4 + ret void + } + + ; Function Attrs: nounwind readnone + declare <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1 + + ; Function Attrs: nounwind readnone + declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x float>, <4 x i1>) #1 + + ; Function Attrs: nounwind readnone + declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 + + ; Function Attrs: argmemonly nounwind readonly willreturn + declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2 + + ; Function Attrs: nounwind readnone + declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1 + + ; Function Attrs: noduplicate nounwind + declare void @llvm.set.loop.iterations.i32(i32) #3 + + ; Function Attrs: noduplicate nounwind + declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #3 + + attributes #0 = { "target-features"="+mve.fp" } + attributes #1 = { nounwind readnone "target-features"="+mve.fp" } + attributes #2 = { argmemonly nounwind readonly willreturn "target-features"="+mve.fp" } + attributes #3 = { noduplicate nounwind } + +... +--- +name: arm_var_f32_mve +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: arm_var_f32_mve + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r4 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8 + ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg + ; CHECK: $r12 = tMOVr $r0, 14 /* CC::al */, $noreg + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3 + ; CHECK: bb.1.do.body.i: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r12 + ; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.0.i2, align 4) + ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0 + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 + ; CHECK: bb.2.arm_mean_f32_mve.exit: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: liveins: $q0, $r0, $r1, $r2 + ; CHECK: $s4 = VMOVSR $r1, 14 /* CC::al */, $noreg + ; CHECK: $lr = MVE_DLSTP_32 $r1 + ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, killed renamable $s3, 14 /* CC::al */, $noreg, implicit killed $q0 + ; CHECK: renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg + ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg + ; CHECK: renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg + ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 + ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg + ; CHECK: bb.3.do.body: + ; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000) + ; CHECK: liveins: $lr, $q0, $q1, $r0, $r1, $r2, $r3 + ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg + ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.01, align 4) + ; CHECK: renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VSUBf32 killed renamable $q2, renamable $q1, 0, $noreg, undef renamable $q2 + ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 0, killed $noreg + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.3 + ; CHECK: bb.4.do.end: + ; CHECK: liveins: $q0, $r1, $r2 + ; CHECK: renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, killed renamable $s3, 14 /* CC::al */, $noreg, implicit killed $q0 + ; CHECK: $s2 = VMOVSR killed $r0, 14 /* CC::al */, $noreg + ; CHECK: renamable $s2 = VUITOS killed renamable $s2, 14 /* CC::al */, $noreg + ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg + ; CHECK: VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.pResult) + ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r4, $lr + + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r4, -8 + $r3 = tMOVr $r1, 14 /* CC::al */, $noreg + tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2IT 10, 8, implicit-def $itstate + renamable $r3 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate + renamable $r12 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + renamable $r3, dead $cpsr = tSUBrr renamable $r1, killed renamable $r3, 14 /* CC::al */, $noreg + renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14 /* CC::al */, $noreg + renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg + $r3 = tMOVr $r1, 14 /* CC::al */, $noreg + $r12 = tMOVr $r0, 14 /* CC::al */, $noreg + t2DoLoopStart renamable $lr + $r4 = tMOVr $lr, 14 /* CC::al */, $noreg + + bb.1.do.body.i: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r12 + + renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg + renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + MVE_VPST 4, implicit $vpr + renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.0.i2, align 4) + renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, renamable $q0 + t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2.arm_mean_f32_mve.exit: + successors: %bb.3(0x80000000) + liveins: $q0, $r0, $r1, $r2, $r4 + + $s4 = VMOVSR $r1, 14 /* CC::al */, $noreg + $lr = tMOVr $r4, 14 /* CC::al */, $noreg + renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0 + t2DoLoopStart killed $r4 + renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg + renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg + renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg + renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 + $r3 = tMOVr $r1, 14 /* CC::al */, $noreg + + bb.3.do.body: + successors: %bb.3(0x7c000000), %bb.4(0x04000000) + liveins: $lr, $q0, $q1, $r0, $r1, $r2, $r3 + + renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg + renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + MVE_VPST 2, implicit $vpr + renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.01, align 4) + renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VSUBf32 killed renamable $q2, renamable $q1, 1, renamable $vpr, undef renamable $q2 + renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q2, renamable $q2, 1, killed renamable $vpr + t2LoopEnd renamable $lr, %bb.3, implicit-def dead $cpsr + tB %bb.4, 14 /* CC::al */, $noreg + + bb.4.do.end: + liveins: $q0, $r1, $r2 + + renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14 /* CC::al */, $noreg + renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0 + $s2 = VMOVSR killed $r0, 14 /* CC::al */, $noreg + renamable $s2 = VUITOS killed renamable $s2, 14 /* CC::al */, $noreg + renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg + VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.pResult) + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc + +... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll new file mode 100644 index 00000000000000..63a2c0233f6e3e --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s +define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) { +; CHECK-LABEL: .LBB0_1: @ %do.body.i +; CHECK: dlstp.32 lr, r1 +; CHECK-NEXT: vadd.f32 s0, s3, s3 +; CHECK-NEXT: vcvt.f32.u32 s4, s4 +; CHECK-NEXT: vdiv.f32 s0, s0, s4 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vdup.32 q1, r3 +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: .LBB0_3: @ %do.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: vldrw.u32 q2, [r0], #16 +; CHECK-NEXT: vsub.f32 q2, q2, q1 +; CHECK-NEXT: vfma.f32 q0, q2, q2 +; CHECK-NEXT: letp lr, .LBB0_3 +entry: + br label %do.body.i + +do.body.i: ; preds = %entry, %do.body.i + %blkCnt.0.i = phi i32 [ %sub.i, %do.body.i ], [ %blockSize, %entry ] + %sumVec.0.i = phi <4 x float> [ %3, %do.body.i ], [ zeroinitializer, %entry ] + %pSrc.addr.0.i = phi float* [ %add.ptr.i, %do.body.i ], [ %pSrc, %entry ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0.i) + %1 = bitcast float* %pSrc.addr.0.i to <4 x float>* + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %3 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %sumVec.0.i, <4 x float> %2, <4 x i1> %0, <4 x float> %sumVec.0.i) + %sub.i = add nsw i32 %blkCnt.0.i, -4 + %add.ptr.i = getelementptr inbounds float, float* %pSrc.addr.0.i, i32 4 + %cmp.i = icmp sgt i32 %blkCnt.0.i, 4 + br i1 %cmp.i, label %do.body.i, label %arm_mean_f32_mve.exit + +arm_mean_f32_mve.exit: ; preds = %do.body.i + %4 = extractelement <4 x float> %3, i32 3 + %add2.i.i = fadd fast float %4, %4 + %conv.i = uitofp i32 %blockSize to float + %div.i = fdiv fast float %add2.i.i, %conv.i + %.splatinsert = insertelement <4 x float> undef, float %div.i, i32 0 + %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + br label %do.body + +do.body: ; preds = %do.body, %arm_mean_f32_mve.exit + %blkCnt.0 = phi i32 [ %blockSize, %arm_mean_f32_mve.exit ], [ %sub, %do.body ] + %sumVec.0 = phi <4 x float> [ zeroinitializer, %arm_mean_f32_mve.exit ], [ %9, %do.body ] + %pSrc.addr.0 = phi float* [ %pSrc, %arm_mean_f32_mve.exit ], [ %add.ptr, %do.body ] + %5 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0) + %6 = bitcast float* %pSrc.addr.0 to <4 x float>* + %7 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %6, i32 4, <4 x i1> %5, <4 x float> zeroinitializer) + %8 = tail call fast <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %7, <4 x float> %.splat, <4 x i1> %5, <4 x float> undef) + %9 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %8, <4 x float> %8, <4 x float> %sumVec.0, <4 x i1> %5) + %sub = add nsw i32 %blkCnt.0, -4 + %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4 + %cmp1 = icmp sgt i32 %blkCnt.0, 4 + br i1 %cmp1, label %do.body, label %do.end + +do.end: ; preds = %do.body + %10 = extractelement <4 x float> %9, i32 3 + %add2.i = fadd fast float %10, %10 + %sub2 = add i32 %blockSize, -1 + %conv = uitofp i32 %sub2 to float + %div = fdiv fast float %add2.i, %conv + br label %cleanup + +cleanup: ; preds = %entry, %do.end + store float %div, float* %pResult, align 4 + ret void +} + +declare <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) + +declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x float>, <4 x i1>) + +declare <4 x i1> @llvm.arm.mve.vctp32(i32) + +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) + +declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) + diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll index 3f48cc3ad59b2e..440080e4e142dc 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll @@ -6,26 +6,17 @@ define void @arm_cmplx_mag_squared_q15_mve(i16* %pSrc, i16* %pDst, i32 %blockSiz ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: subs.w r12, r2, #8 -; CHECK-NEXT: mov.w r3, #-1 -; CHECK-NEXT: csinv r3, r3, r12, pl -; CHECK-NEXT: add.w r12, r3, r2 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w lr, r3, r12, lsr #3 -; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vld20.16 {q0, q1}, [r0] -; CHECK-NEXT: vctp.16 r2 -; CHECK-NEXT: subs r2, #8 ; CHECK-NEXT: vld21.16 {q0, q1}, [r0]! -; CHECK-NEXT: vpstttt -; CHECK-NEXT: vmulht.s16 q2, q1, q1 -; CHECK-NEXT: vmulht.s16 q0, q0, q0 -; CHECK-NEXT: vqaddt.s16 q0, q0, q2 -; CHECK-NEXT: vshrt.s16 q0, q0, #1 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r1], #16 -; CHECK-NEXT: le lr, .LBB0_1 +; CHECK-NEXT: vmulh.s16 q2, q1, q1 +; CHECK-NEXT: vmulh.s16 q0, q0, q0 +; CHECK-NEXT: vqadd.s16 q0, q0, q2 +; CHECK-NEXT: vshr.s16 q0, q0, #1 +; CHECK-NEXT: vstrh.16 q0, [r1], #16 +; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %do.end ; CHECK-NEXT: pop {r7, pc} entry: @@ -148,25 +139,14 @@ define i32 @good2(i32* nocapture readonly %x, i32* nocapture readonly %y, i32 %n ; CHECK-LABEL: good2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: cmp r2, #4 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r3, #4 -; CHECK-NEXT: subs r3, r2, r3 -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB3_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r2 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 -; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vpst -; CHECK-NEXT: vmlavat.s32 r12, q1, q0 -; CHECK-NEXT: le lr, .LBB3_1 +; CHECK-NEXT: vmlava.s32 r12, q1, q0 +; CHECK-NEXT: letp lr, .LBB3_1 ; CHECK-NEXT: @ %bb.2: @ %do.end ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: pop {r7, pc} diff --git a/llvm/test/CodeGen/X86/load-slice.ll b/llvm/test/CodeGen/X86/load-slice.ll index 3cbb70bd70d788..3bf95778f5647b 100644 --- a/llvm/test/CodeGen/X86/load-slice.ll +++ b/llvm/test/CodeGen/X86/load-slice.ll @@ -16,7 +16,7 @@ ; Low slice starts at 0 (base) and is 8-bytes aligned. ; High slice starts at 4 (base + 4-bytes) and is 4-bytes aligned. ; -; STRESS-LABEL: t1: +; STRESS-LABEL: _t1: ; Load out[out_start + 8].real, this is base + 8 * 8 + 0. ; STRESS: vmovss 64([[BASE:[^(]+]]), [[OUT_Real:%xmm[0-9]+]] ; Load out[out_start + 8].imm, this is base + 8 * 8 + 4. @@ -31,7 +31,7 @@ ; STRESS-NEXT: vmovlps [[RES_Vec]], ([[BASE]]) ; ; Same for REGULAR, we eliminate register bank copy with each slices. -; REGULAR-LABEL: t1: +; REGULAR-LABEL: _t1: ; Load out[out_start + 8].real, this is base + 8 * 8 + 0. ; REGULAR: vmovss 64([[BASE:[^)]+]]), [[OUT_Real:%xmm[0-9]+]] ; Load out[out_start + 8].imm, this is base + 8 * 8 + 4. @@ -90,14 +90,14 @@ declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) ; Low slice starts at 0 (base) and is 8-bytes aligned. ; High slice starts at 6 (base + 6-bytes) and is 2-bytes aligned. ; -; STRESS-LABEL: t2: +; STRESS-LABEL: _t2: ; STRESS: movzwl 6([[BASE:[^)]+]]), %eax ; STRESS-NEXT: addl ([[BASE]]), %eax ; STRESS-NEXT: ret ; ; For the REGULAR heuristic, this is not profitable to slice things that are not ; next to each other in memory. Here we have a hole with bytes #4-5. -; REGULAR-LABEL: t2: +; REGULAR-LABEL: _t2: ; REGULAR: shrq $48 define i32 @t2(%class.Complex* nocapture %out, i64 %out_start) { %arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %out_start @@ -117,11 +117,11 @@ define i32 @t2(%class.Complex* nocapture %out, i64 %out_start) { ; Second slice uses bytes numbered 6 and 7. ; Third slice uses bytes numbered 4 to 7. ; -; STRESS-LABEL: t3: +; STRESS-LABEL: _t3: ; STRESS: shrq $48 ; STRESS: shrq $32 ; -; REGULAR-LABEL: t3: +; REGULAR-LABEL: _t3: ; REGULAR: shrq $48 ; REGULAR: shrq $32 define i32 @t3(%class.Complex* nocapture %out, i64 %out_start) { diff --git a/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll b/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll index 969ac375a70e33..40cd2fcd4fdeb7 100644 --- a/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll +++ b/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll @@ -85,12 +85,10 @@ define void @shuffle_v32i16_to_v16i16_1(<32 x i16>* %L, <16 x i16>* %S) nounwind ; ; AVX512BW-LABEL: shuffle_v32i16_to_v16i16_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm1 -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u,18,19,22,23,26,27,30,31] -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u,18,19,22,23,26,27,30,31,u,u,u,u,u,u,u,u] -; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] -; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,17,19,21,23,9,11,13,15,25,27,29,31] +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm1[0,2,1,3] ; AVX512BW-NEXT: vmovdqa %ymm0, (%rsi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -260,20 +258,11 @@ define void @shuffle_v32i16_to_v8i16_1(<32 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v32i16_to_v8i16_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512BW-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,2,3,10,11,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm4, %xmm3, %xmm3 -; AVX512BW-NEXT: vpshufb %xmm4, %xmm2, %xmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [2,3,10,11,8,9,10,11,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] -; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <1,5,9,13,17,21,25,29,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v32i16_to_v8i16_1: @@ -327,20 +316,11 @@ define void @shuffle_v32i16_to_v8i16_2(<32 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v32i16_to_v8i16_2: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512BW-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [12,13,14,15,4,5,12,13,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm4, %xmm3, %xmm3 -; AVX512BW-NEXT: vpshufb %xmm4, %xmm2, %xmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [4,5,12,13,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] -; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <2,6,10,14,18,22,26,30,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v32i16_to_v8i16_2: @@ -394,20 +374,11 @@ define void @shuffle_v32i16_to_v8i16_3(<32 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v32i16_to_v8i16_3: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512BW-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [12,13,14,15,6,7,14,15,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm4, %xmm3, %xmm3 -; AVX512BW-NEXT: vpshufb %xmm4, %xmm2, %xmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [6,7,14,15,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] -; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <3,7,11,15,19,23,27,31,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v32i16_to_v8i16_3: diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll index 9256a43f8e3393..3919f326d39a53 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll @@ -42,11 +42,10 @@ define void @shuffle_v32i8_to_v16i8(<32 x i8>* %L, <16 x i8>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v32i8_to_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255] -; AVX512BW-NEXT: vpand 16(%rdi), %xmm0, %xmm1 -; AVX512BW-NEXT: vpand (%rdi), %xmm0, %xmm0 -; AVX512BW-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v32i8_to_v16i8: @@ -143,11 +142,10 @@ define void @shuffle_v16i16_to_v8i16(<16 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512F-LABEL: shuffle_v16i16_to_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = mem[0],xmm0[1],mem[2],xmm0[3],mem[4],xmm0[5],mem[6],xmm0[7] -; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = mem[0],xmm0[1],mem[2],xmm0[3],mem[4],xmm0[5],mem[6],xmm0[7] -; AVX512F-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512F-NEXT: vmovdqa %xmm0, (%rsi) +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_to_v8i16: @@ -159,11 +157,10 @@ define void @shuffle_v16i16_to_v8i16(<16 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v16i16_to_v8i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: vpblendw {{.*#+}} xmm1 = mem[0],xmm0[1],mem[2],xmm0[3],mem[4],xmm0[5],mem[6],xmm0[7] -; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = mem[0],xmm0[1],mem[2],xmm0[3],mem[4],xmm0[5],mem[6],xmm0[7] -; AVX512BW-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v16i16_to_v8i16: @@ -377,54 +374,42 @@ define void @shuffle_v32i8_to_v8i8(<32 x i8>* %L, <8 x i8>* %S) nounwind { ; ; AVX512F-LABEL: shuffle_v32i8_to_v8i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512F-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vmovq %xmm0, (%rsi) +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v32i8_to_v8i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512VL-NEXT: vpmovdb %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0 ; AVX512VL-NEXT: vmovq %xmm0, (%rsi) +; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: shuffle_v32i8_to_v8i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovq %xmm0, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v32i8_to_v8i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BWVL-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BWVL-NEXT: vpmovdb %xmm1, %xmm1 -; AVX512BWVL-NEXT: vpmovdb %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512BWVL-NEXT: vpmovdb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vmovq %xmm0, (%rsi) +; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq ; ; AVX512VBMIVL-LABEL: shuffle_v32i8_to_v8i8: ; AVX512VBMIVL: # %bb.0: -; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512VBMIVL-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512VBMIVL-NEXT: vpmovdb %xmm1, %xmm1 -; AVX512VBMIVL-NEXT: vpmovdb %xmm0, %xmm0 -; AVX512VBMIVL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512VBMIVL-NEXT: vpmovdb %ymm0, %xmm0 ; AVX512VBMIVL-NEXT: vmovq %xmm0, (%rsi) +; AVX512VBMIVL-NEXT: vzeroupper ; AVX512VBMIVL-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %L %strided.vec = shufflevector <32 x i8> %vec, <32 x i8> undef, <8 x i32> @@ -576,9 +561,8 @@ define <16 x i8> @trunc_v8i32_to_v8i8_with_zext_return_v16i8(<8 x i32> %vec) nou ; ; AVX512F-LABEL: trunc_v8i32_to_v8i8_with_zext_return_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -590,9 +574,8 @@ define <16 x i8> @trunc_v8i32_to_v8i8_with_zext_return_v16i8(<8 x i32> %vec) nou ; ; AVX512BW-LABEL: trunc_v8i32_to_v8i8_with_zext_return_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -636,9 +619,8 @@ define <16 x i8> @trunc_v8i32_to_v8i8_via_v8i16_return_v16i8(<8 x i32> %vec) nou ; ; AVX512F-LABEL: trunc_v8i32_to_v8i8_via_v8i16_return_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -650,9 +632,8 @@ define <16 x i8> @trunc_v8i32_to_v8i8_via_v8i16_return_v16i8(<8 x i32> %vec) nou ; ; AVX512BW-LABEL: trunc_v8i32_to_v8i8_via_v8i16_return_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -831,9 +812,8 @@ define <8 x i16> @trunc_v4i64_to_v4i16_with_zext_return_v8i16(<4 x i64> %vec) no ; ; AVX512F-LABEL: trunc_v4i64_to_v4i16_with_zext_return_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -845,9 +825,8 @@ define <8 x i16> @trunc_v4i64_to_v4i16_with_zext_return_v8i16(<4 x i64> %vec) no ; ; AVX512BW-LABEL: trunc_v4i64_to_v4i16_with_zext_return_v8i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -896,9 +875,8 @@ define <8 x i16> @trunc_v4i64_to_v4i16_via_v4i32_return_v8i16(<4 x i64> %vec) no ; ; AVX512F-LABEL: trunc_v4i64_to_v4i16_via_v4i32_return_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -910,9 +888,8 @@ define <8 x i16> @trunc_v4i64_to_v4i16_via_v4i32_return_v8i16(<4 x i64> %vec) no ; ; AVX512BW-LABEL: trunc_v4i64_to_v4i16_via_v4i32_return_v8i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -1081,49 +1058,42 @@ define void @shuffle_v16i16_to_v4i16(<16 x i16>* %L, <4 x i16>* %S) nounwind { ; ; AVX512F-LABEL: shuffle_v16i16_to_v4i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512F-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512F-NEXT: vmovq %xmm0, (%rsi) +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_to_v4i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] -; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0 ; AVX512VL-NEXT: vmovq %xmm0, (%rsi) +; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: shuffle_v16i16_to_v4i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: vpblendw {{.*#+}} xmm1 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512BW-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512BW-NEXT: vmovq %xmm0, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v16i16_to_v4i16: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm1 = <0,4,8,12,u,u,u,u> -; AVX512BWVL-NEXT: vpermi2w 16(%rdi), %xmm0, %xmm1 -; AVX512BWVL-NEXT: vmovq %xmm1, (%rsi) +; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vmovq %xmm0, (%rsi) +; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq ; ; AVX512VBMIVL-LABEL: shuffle_v16i16_to_v4i16: ; AVX512VBMIVL: # %bb.0: -; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm1 = <0,4,8,12,u,u,u,u> -; AVX512VBMIVL-NEXT: vpermi2w 16(%rdi), %xmm0, %xmm1 -; AVX512VBMIVL-NEXT: vmovq %xmm1, (%rsi) +; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512VBMIVL-NEXT: vpmovqw %ymm0, %xmm0 +; AVX512VBMIVL-NEXT: vmovq %xmm0, (%rsi) +; AVX512VBMIVL-NEXT: vzeroupper ; AVX512VBMIVL-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %L %strided.vec = shufflevector <16 x i16> %vec, <16 x i16> undef, <4 x i32> @@ -1199,54 +1169,42 @@ define void @shuffle_v32i8_to_v4i8(<32 x i8>* %L, <4 x i8>* %S) nounwind { ; ; AVX512F-LABEL: shuffle_v32i8_to_v4i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512F-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 ; AVX512F-NEXT: vmovd %xmm0, (%rsi) +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v32i8_to_v4i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512VL-NEXT: vpmovqb %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovqb %xmm0, %xmm0 -; AVX512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512VL-NEXT: vpmovqb %ymm0, %xmm0 ; AVX512VL-NEXT: vmovd %xmm0, (%rsi) +; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: shuffle_v32i8_to_v4i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovd %xmm0, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v32i8_to_v4i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BWVL-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BWVL-NEXT: vpmovqb %xmm1, %xmm1 -; AVX512BWVL-NEXT: vpmovqb %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512BWVL-NEXT: vpmovqb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vmovd %xmm0, (%rsi) +; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq ; ; AVX512VBMIVL-LABEL: shuffle_v32i8_to_v4i8: ; AVX512VBMIVL: # %bb.0: -; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512VBMIVL-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512VBMIVL-NEXT: vpmovqb %xmm1, %xmm1 -; AVX512VBMIVL-NEXT: vpmovqb %xmm0, %xmm0 -; AVX512VBMIVL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512VBMIVL-NEXT: vpmovqb %ymm0, %xmm0 ; AVX512VBMIVL-NEXT: vmovd %xmm0, (%rsi) +; AVX512VBMIVL-NEXT: vzeroupper ; AVX512VBMIVL-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %L %strided.vec = shufflevector <32 x i8> %vec, <32 x i8> undef, <4 x i32> diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll index 811fbe37497cb0..9e3c92aca5da3a 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll @@ -176,98 +176,12 @@ define void @trunc_v8i64_to_v8i32(<16 x i32>* %L, <8 x i32>* %S) nounwind { } define void @shuffle_v64i8_to_v16i8(<64 x i8>* %L, <16 x i8>* %S) nounwind { -; AVX512F-LABEL: shuffle_v64i8_to_v16i8: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = -; AVX512F-NEXT: vpshufb %xmm4, %xmm3, %xmm3 -; AVX512F-NEXT: vpshufb %xmm4, %xmm2, %xmm2 -; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512F-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512F-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] -; AVX512F-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v64i8_to_v16i8: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm0 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = -; AVX512VL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512VL-NEXT: vpshufb %xmm1, %xmm2, %xmm1 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512VL-NEXT: vpmovdb %ymm1, %xmm1 -; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX512VL-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: shuffle_v64i8_to_v16i8: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512BW-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = -; AVX512BW-NEXT: vpshufb %xmm4, %xmm3, %xmm3 -; AVX512BW-NEXT: vpshufb %xmm4, %xmm2, %xmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512BW-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] -; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: shuffle_v64i8_to_v16i8: -; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vmovdqa 48(%rdi), %xmm0 -; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm1 = -; AVX512BWVL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512BWVL-NEXT: vpshufb %xmm1, %xmm2, %xmm1 -; AVX512BWVL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BWVL-NEXT: vpmovdb %ymm1, %xmm1 -; AVX512BWVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX512BWVL-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512BWVL-NEXT: vzeroupper -; AVX512BWVL-NEXT: retq -; -; AVX512VBMI-LABEL: shuffle_v64i8_to_v16i8: -; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512VBMI-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512VBMI-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512VBMI-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm4 = -; AVX512VBMI-NEXT: vpshufb %xmm4, %xmm3, %xmm3 -; AVX512VBMI-NEXT: vpshufb %xmm4, %xmm2, %xmm2 -; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] -; AVX512VBMI-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512VBMI-NEXT: retq -; -; AVX512VBMIVL-LABEL: shuffle_v64i8_to_v16i8: -; AVX512VBMIVL: # %bb.0: -; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60] -; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512VBMIVL-NEXT: vpermt2b 32(%rdi), %ymm0, %ymm1 -; AVX512VBMIVL-NEXT: vmovdqa %xmm1, (%rsi) -; AVX512VBMIVL-NEXT: vzeroupper -; AVX512VBMIVL-NEXT: retq +; AVX512-LABEL: shuffle_v64i8_to_v16i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, (%rsi) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %L %strided.vec = shufflevector <64 x i8> %vec, <64 x i8> undef, <16 x i32> store <16 x i8> %strided.vec, <16 x i8>* %S @@ -289,80 +203,12 @@ define void @trunc_v16i32_to_v16i8(<64 x i8>* %L, <16 x i8>* %S) nounwind { } define void @shuffle_v32i16_to_v8i16(<32 x i16>* %L, <8 x i16>* %S) nounwind { -; AVX512F-LABEL: shuffle_v32i16_to_v8i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512F-NEXT: vpblendw {{.*#+}} xmm2 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512F-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 -; AVX512F-NEXT: vpblendw {{.*#+}} xmm2 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512F-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v32i16_to_v8i16: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,0,1,8,9,8,9,10,11,12,13,14,15] -; AVX512VL-NEXT: vpshufb %xmm4, %xmm3, %xmm3 -; AVX512VL-NEXT: vpshufb %xmm4, %xmm2, %xmm2 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] -; AVX512VL-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512VL-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] -; AVX512VL-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: shuffle_v32i16_to_v8i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: vpblendw {{.*#+}} xmm1 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512BW-NEXT: vpblendw {{.*#+}} xmm2 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512BW-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 -; AVX512BW-NEXT: vpblendw {{.*#+}} xmm2 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512BW-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: shuffle_v32i16_to_v8i16: -; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,4,8,12,16,20,24,28] -; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BWVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1 -; AVX512BWVL-NEXT: vmovdqa %xmm1, (%rsi) -; AVX512BWVL-NEXT: vzeroupper -; AVX512BWVL-NEXT: retq -; -; AVX512VBMI-LABEL: shuffle_v32i16_to_v8i16: -; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512VBMI-NEXT: vpblendw {{.*#+}} xmm1 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512VBMI-NEXT: vpblendw {{.*#+}} xmm2 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512VBMI-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 -; AVX512VBMI-NEXT: vpblendw {{.*#+}} xmm2 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512VBMI-NEXT: vpblendw {{.*#+}} xmm0 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512VBMI-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 -; AVX512VBMI-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX512VBMI-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512VBMI-NEXT: retq -; -; AVX512VBMIVL-LABEL: shuffle_v32i16_to_v8i16: -; AVX512VBMIVL: # %bb.0: -; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,4,8,12,16,20,24,28] -; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512VBMIVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1 -; AVX512VBMIVL-NEXT: vmovdqa %xmm1, (%rsi) -; AVX512VBMIVL-NEXT: vzeroupper -; AVX512VBMIVL-NEXT: retq +; AVX512-LABEL: shuffle_v32i16_to_v8i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512-NEXT: vpmovqw %zmm0, (%rsi) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %L %strided.vec = shufflevector <32 x i16> %vec, <32 x i16> undef, <8 x i32> store <8 x i16> %strided.vec, <8 x i16>* %S @@ -384,90 +230,13 @@ define void @trunc_v8i64_to_v8i16(<32 x i16>* %L, <8 x i16>* %S) nounwind { } define void @shuffle_v64i8_to_v8i8(<64 x i8>* %L, <8 x i8>* %S) nounwind { -; AVX512F-LABEL: shuffle_v64i8_to_v8i8: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = -; AVX512F-NEXT: vpshufb %xmm4, %xmm3, %xmm3 -; AVX512F-NEXT: vpshufb %xmm4, %xmm2, %xmm2 -; AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512F-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512F-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3] -; AVX512F-NEXT: vmovq %xmm0, (%rsi) -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v64i8_to_v8i8: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512VL-NEXT: vmovdqa 32(%rdi), %ymm1 -; AVX512VL-NEXT: vpmovqb %ymm1, %xmm1 -; AVX512VL-NEXT: vpmovqb %ymm0, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512VL-NEXT: vmovq %xmm0, (%rsi) -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: shuffle_v64i8_to_v8i8: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512BW-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = -; AVX512BW-NEXT: vpshufb %xmm4, %xmm3, %xmm3 -; AVX512BW-NEXT: vpshufb %xmm4, %xmm2, %xmm2 -; AVX512BW-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512BW-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3] -; AVX512BW-NEXT: vmovq %xmm0, (%rsi) -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: shuffle_v64i8_to_v8i8: -; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BWVL-NEXT: vmovdqa 32(%rdi), %ymm1 -; AVX512BWVL-NEXT: vpmovqb %ymm1, %xmm1 -; AVX512BWVL-NEXT: vpmovqb %ymm0, %xmm0 -; AVX512BWVL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BWVL-NEXT: vmovq %xmm0, (%rsi) -; AVX512BWVL-NEXT: vzeroupper -; AVX512BWVL-NEXT: retq -; -; AVX512VBMI-LABEL: shuffle_v64i8_to_v8i8: -; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512VBMI-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512VBMI-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512VBMI-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm4 = -; AVX512VBMI-NEXT: vpshufb %xmm4, %xmm3, %xmm3 -; AVX512VBMI-NEXT: vpshufb %xmm4, %xmm2, %xmm2 -; AVX512VBMI-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512VBMI-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3] -; AVX512VBMI-NEXT: vmovq %xmm0, (%rsi) -; AVX512VBMI-NEXT: retq -; -; AVX512VBMIVL-LABEL: shuffle_v64i8_to_v8i8: -; AVX512VBMIVL: # %bb.0: -; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512VBMIVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4048780183313844224,4048780183313844224,4048780183313844224,4048780183313844224] -; AVX512VBMIVL-NEXT: vpermi2b 32(%rdi), %ymm0, %ymm1 -; AVX512VBMIVL-NEXT: vmovq %xmm1, (%rsi) -; AVX512VBMIVL-NEXT: vzeroupper -; AVX512VBMIVL-NEXT: retq +; AVX512-LABEL: shuffle_v64i8_to_v8i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512-NEXT: vmovq %xmm0, (%rsi) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %L %strided.vec = shufflevector <64 x i8> %vec, <64 x i8> undef, <8 x i32> store <8 x i8> %strided.vec, <8 x i8>* %S @@ -559,8 +328,8 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_ ; ; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_61: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61] -; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512VBMI-NEXT: vpermt2b %zmm0, %zmm1, %zmm0 ; AVX512VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI-NEXT: vzeroupper ; AVX512VBMI-NEXT: retq @@ -644,8 +413,8 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_ ; ; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,62] -; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,62,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512VBMI-NEXT: vpermt2b %zmm0, %zmm1, %zmm0 ; AVX512VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI-NEXT: vzeroupper ; AVX512VBMI-NEXT: retq @@ -688,13 +457,10 @@ define <4 x double> @PR34175(<32 x i16>* %p) { ; ; AVX512BW-LABEL: PR34175: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqu (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqu 32(%rdi), %xmm1 -; AVX512BW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] -; AVX512BW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3] -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] -; AVX512BW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,16,24,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqu (%rdi), %ymm1 +; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; AVX512BW-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX512BW-NEXT: retq ; @@ -709,13 +475,10 @@ define <4 x double> @PR34175(<32 x i16>* %p) { ; ; AVX512VBMI-LABEL: PR34175: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqu (%rdi), %xmm0 -; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %xmm1 -; AVX512VBMI-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] -; AVX512VBMI-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3] -; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] -; AVX512VBMI-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,16,24,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1 +; AVX512VBMI-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512VBMI-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; AVX512VBMI-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX512VBMI-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index e5285aebda69e3..b2c0acdf9b2287 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -4843,19 +4843,13 @@ define <32 x i8> @shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_ ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq ; -; AVX512VLBW-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VLBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 -; AVX512VLBW-NEXT: retq -; -; AVX512VLVBMI-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: -; AVX512VLVBMI: # %bb.0: -; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63] -; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 -; AVX512VLVBMI-NEXT: retq +; AVX512VL-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512VL-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512VL-NEXT: retq ; ; XOPAVX1-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: ; XOPAVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll index f0398b15d04a41..d52e6195f8dfd3 100644 --- a/llvm/test/CodeGen/X86/vector-trunc.ll +++ b/llvm/test/CodeGen/X86/vector-trunc.ll @@ -1581,10 +1581,11 @@ define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) { ; ; AVX512F-LABEL: trunc2x4i32_8i16: ; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] -; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] -; AVX512F-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc2x4i32_8i16: @@ -1597,10 +1598,11 @@ define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) { ; ; AVX512BW-LABEL: trunc2x4i32_8i16: ; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] -; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] -; AVX512BW-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc2x4i32_8i16: @@ -1709,10 +1711,11 @@ define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) { ; ; AVX512BW-LABEL: trunc2x8i16_16i8: ; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc2x8i16_16i8: diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll index b80775ac7d57c8..a540d04626ae83 100644 --- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll +++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll @@ -383,33 +383,88 @@ ret void } define <8 x i8> @interleaved_load_vf8_i8_stride4(<32 x i8>* %ptr) { -; AVX-LABEL: interleaved_load_vf8_i8_stride4: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX-NEXT: vmovdqa (%rdi), %xmm1 -; AVX-NEXT: vmovdqa 16(%rdi), %xmm2 -; AVX-NEXT: vpshufb %xmm0, %xmm2, %xmm3 -; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX-NEXT: vpshufb %xmm3, %xmm2, %xmm4 -; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm3 -; AVX-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] -; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = <2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX-NEXT: vpshufb %xmm3, %xmm2, %xmm4 -; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm3 -; AVX-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] -; AVX-NEXT: vmovdqa {{.*#+}} xmm4 = <3,7,11,15,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX-NEXT: vpshufb %xmm4, %xmm2, %xmm2 -; AVX-NEXT: vpshufb %xmm4, %xmm1, %xmm1 -; AVX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; AVX-NEXT: vpaddb %xmm3, %xmm1, %xmm1 -; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; AVX-NEXT: retq +; AVX1-LABEL: interleaved_load_vf8_i8_stride4: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX1-NEXT: vmovdqa (%rdi), %xmm1 +; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2 +; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3 +; AVX1-NEXT: vpshufb %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm4 +; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm3 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = <2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm4 +; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm3 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = <3,7,11,15,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpshufb %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX1-NEXT: retq +; +; AVX2-LABEL: interleaved_load_vf8_i8_stride4: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX2-NEXT: vmovdqa (%rdi), %xmm1 +; AVX2-NEXT: vmovdqa 16(%rdi), %xmm2 +; AVX2-NEXT: vpshufb %xmm0, %xmm2, %xmm3 +; AVX2-NEXT: vpshufb %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm4 +; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm3 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; AVX2-NEXT: vpaddb %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm4 +; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm3 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <3,7,11,15,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 +; AVX2-NEXT: vpshufb %xmm4, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX2-NEXT: vpaddb %xmm3, %xmm1, %xmm1 +; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX2-NEXT: retq +; +; AVX512-LABEL: interleaved_load_vf8_i8_stride4: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512-NEXT: vmovdqa (%rdi), %xmm2 +; AVX512-NEXT: vmovdqa 16(%rdi), %xmm3 +; AVX512-NEXT: vpshufb %xmm1, %xmm3, %xmm4 +; AVX512-NEXT: vpshufb %xmm1, %xmm2, %xmm1 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] +; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = <2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512-NEXT: vpshufb %xmm1, %xmm3, %xmm4 +; AVX512-NEXT: vpshufb %xmm1, %xmm2, %xmm1 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] +; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <3,7,11,15,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512-NEXT: vpshufb %xmm4, %xmm3, %xmm3 +; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; AVX512-NEXT: vpaddb %xmm1, %xmm2, %xmm1 +; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %wide.vec = load <32 x i8>, <32 x i8>* %ptr, align 16 %v1 = shufflevector <32 x i8> %wide.vec, <32 x i8> undef, <8 x i32> %v2 = shufflevector <32 x i8> %wide.vec, <32 x i8> undef, <8 x i32> @@ -521,48 +576,41 @@ define <16 x i1> @interleaved_load_vf16_i8_stride4(<64 x i8>* %ptr) { ; ; AVX512-LABEL: interleaved_load_vf16_i8_stride4: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = -; AVX512-NEXT: vpshufb %xmm4, %xmm3, %xmm5 -; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm4 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] -; AVX512-NEXT: vmovdqa {{.*#+}} xmm5 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %xmm5, %xmm1, %xmm6 -; AVX512-NEXT: vpshufb %xmm5, %xmm0, %xmm5 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] -; AVX512-NEXT: vpblendd {{.*#+}} xmm8 = xmm5[0,1],xmm4[2,3] +; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, %xmm8 +; AVX512-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512-NEXT: vmovdqa 16(%rdi), %xmm2 +; AVX512-NEXT: vmovdqa 32(%rdi), %xmm3 +; AVX512-NEXT: vmovdqa 48(%rdi), %xmm4 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm5 = -; AVX512-NEXT: vpshufb %xmm5, %xmm3, %xmm6 -; AVX512-NEXT: vpshufb %xmm5, %xmm2, %xmm5 +; AVX512-NEXT: vpshufb %xmm5, %xmm4, %xmm6 +; AVX512-NEXT: vpshufb %xmm5, %xmm3, %xmm5 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] ; AVX512-NEXT: vmovdqa {{.*#+}} xmm6 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %xmm6, %xmm1, %xmm7 -; AVX512-NEXT: vpshufb %xmm6, %xmm0, %xmm6 +; AVX512-NEXT: vpshufb %xmm6, %xmm2, %xmm7 +; AVX512-NEXT: vpshufb %xmm6, %xmm1, %xmm6 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1] ; AVX512-NEXT: vpblendd {{.*#+}} xmm5 = xmm6[0,1],xmm5[2,3] ; AVX512-NEXT: vmovdqa {{.*#+}} xmm6 = -; AVX512-NEXT: vpshufb %xmm6, %xmm3, %xmm7 -; AVX512-NEXT: vpshufb %xmm6, %xmm2, %xmm6 +; AVX512-NEXT: vpshufb %xmm6, %xmm4, %xmm7 +; AVX512-NEXT: vpshufb %xmm6, %xmm3, %xmm6 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1] ; AVX512-NEXT: vmovdqa {{.*#+}} xmm7 = <2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %xmm7, %xmm1, %xmm4 -; AVX512-NEXT: vpshufb %xmm7, %xmm0, %xmm7 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm7[0],xmm4[0],xmm7[1],xmm4[1] -; AVX512-NEXT: vpblendd {{.*#+}} xmm4 = xmm4[0,1],xmm6[2,3] +; AVX512-NEXT: vpshufb %xmm7, %xmm2, %xmm0 +; AVX512-NEXT: vpshufb %xmm7, %xmm1, %xmm7 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm7[0],xmm0[0],xmm7[1],xmm0[1] +; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm6[2,3] ; AVX512-NEXT: vmovdqa {{.*#+}} xmm6 = +; AVX512-NEXT: vpshufb %xmm6, %xmm4, %xmm4 ; AVX512-NEXT: vpshufb %xmm6, %xmm3, %xmm3 -; AVX512-NEXT: vpshufb %xmm6, %xmm2, %xmm2 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = <3,7,11,15,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <3,7,11,15,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2 +; AVX512-NEXT: vpshufb %xmm4, %xmm1, %xmm1 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX512-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] ; AVX512-NEXT: vpcmpeqb %zmm5, %zmm8, %k0 -; AVX512-NEXT: vpcmpeqb %zmm0, %zmm4, %k1 +; AVX512-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 ; AVX512-NEXT: kxnorw %k1, %k0, %k0 ; AVX512-NEXT: vpmovm2b %k0, %zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -762,85 +810,83 @@ define <32 x i1> @interleaved_load_vf32_i8_stride4(<128 x i8>* %ptr) { ; ; AVX512-LABEL: interleaved_load_vf32_i8_stride4: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa 112(%rdi), %xmm11 +; AVX512-NEXT: vmovdqa 112(%rdi), %xmm14 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm0 = -; AVX512-NEXT: vpshufb %xmm0, %xmm11, %xmm3 -; AVX512-NEXT: vmovdqa 96(%rdi), %xmm13 -; AVX512-NEXT: vpshufb %xmm0, %xmm13, %xmm0 +; AVX512-NEXT: vpshufb %xmm0, %xmm14, %xmm3 +; AVX512-NEXT: vmovdqa 96(%rdi), %xmm2 +; AVX512-NEXT: vpshufb %xmm0, %xmm2, %xmm0 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] ; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa 80(%rdi), %xmm14 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm5 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %xmm5, %xmm14, %xmm6 -; AVX512-NEXT: vmovdqa 64(%rdi), %xmm4 -; AVX512-NEXT: vpshufb %xmm5, %xmm4, %xmm5 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] -; AVX512-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5 -; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm5[0,1,2,3,4,5],ymm0[6,7] -; AVX512-NEXT: vmovdqa64 (%rdi), %zmm5 -; AVX512-NEXT: vpmovdb %zmm5, %xmm5 -; AVX512-NEXT: vpblendd {{.*#+}} ymm9 = ymm5[0,1,2,3],ymm0[4,5,6,7] -; AVX512-NEXT: vmovdqa {{.*#+}} xmm5 = -; AVX512-NEXT: vpshufb %xmm5, %xmm11, %xmm0 -; AVX512-NEXT: vpshufb %xmm5, %xmm13, %xmm6 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm6[0],xmm0[0],xmm6[1],xmm0[1] +; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3 +; AVX512-NEXT: vpmovdb %zmm3, %xmm3 +; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3,4,5],ymm0[6,7] +; AVX512-NEXT: vmovdqa64 (%rdi), %zmm3 +; AVX512-NEXT: vpmovdb %zmm3, %xmm3 +; AVX512-NEXT: vpblendd {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm0[4,5,6,7] +; AVX512-NEXT: vmovdqa 64(%rdi), %xmm10 +; AVX512-NEXT: vmovdqa 80(%rdi), %xmm11 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = +; AVX512-NEXT: vpshufb %xmm3, %xmm14, %xmm0 +; AVX512-NEXT: vpshufb %xmm3, %xmm2, %xmm5 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm5[0],xmm0[0],xmm5[1],xmm0[1] ; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %xmm1, %xmm14, %xmm6 -; AVX512-NEXT: vpshufb %xmm1, %xmm4, %xmm7 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] -; AVX512-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6 -; AVX512-NEXT: vpblendd {{.*#+}} ymm8 = ymm6[0,1,2,3,4,5],ymm0[6,7] -; AVX512-NEXT: vmovdqa (%rdi), %xmm10 -; AVX512-NEXT: vmovdqa 16(%rdi), %xmm12 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512-NEXT: vpshufb %xmm4, %xmm11, %xmm5 +; AVX512-NEXT: vpshufb %xmm4, %xmm10, %xmm6 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm6[0],xmm5[0],xmm6[1],xmm5[1] +; AVX512-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5 +; AVX512-NEXT: vpblendd {{.*#+}} ymm8 = ymm5[0,1,2,3,4,5],ymm0[6,7] +; AVX512-NEXT: vmovdqa (%rdi), %xmm12 +; AVX512-NEXT: vmovdqa 16(%rdi), %xmm13 ; AVX512-NEXT: vmovdqa 32(%rdi), %xmm7 ; AVX512-NEXT: vmovdqa 48(%rdi), %xmm0 -; AVX512-NEXT: vpshufb %xmm5, %xmm0, %xmm6 -; AVX512-NEXT: vpshufb %xmm5, %xmm7, %xmm5 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] -; AVX512-NEXT: vpshufb %xmm1, %xmm12, %xmm6 -; AVX512-NEXT: vpshufb %xmm1, %xmm10, %xmm1 +; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm5 +; AVX512-NEXT: vpshufb %xmm3, %xmm7, %xmm3 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] +; AVX512-NEXT: vpshufb %xmm4, %xmm13, %xmm5 +; AVX512-NEXT: vpshufb %xmm4, %xmm12, %xmm4 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] +; AVX512-NEXT: vpblendd {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3] +; AVX512-NEXT: vpblendd {{.*#+}} ymm8 = ymm3[0,1,2,3],ymm8[4,5,6,7] +; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = +; AVX512-NEXT: vpshufb %xmm3, %xmm14, %xmm4 +; AVX512-NEXT: vpshufb %xmm3, %xmm2, %xmm5 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] +; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm5 = <2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512-NEXT: vpshufb %xmm5, %xmm11, %xmm6 +; AVX512-NEXT: vpshufb %xmm5, %xmm10, %xmm1 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1] -; AVX512-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm5[2,3] -; AVX512-NEXT: vpblendd {{.*#+}} ymm8 = ymm1[0,1,2,3],ymm8[4,5,6,7] -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = -; AVX512-NEXT: vpshufb %xmm1, %xmm11, %xmm5 -; AVX512-NEXT: vpshufb %xmm1, %xmm13, %xmm6 +; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX512-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm4[6,7] +; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm4 +; AVX512-NEXT: vpshufb %xmm3, %xmm7, %xmm3 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; AVX512-NEXT: vpshufb %xmm5, %xmm13, %xmm4 +; AVX512-NEXT: vpshufb %xmm5, %xmm12, %xmm5 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] +; AVX512-NEXT: vpblendd {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3] +; AVX512-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7] +; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = +; AVX512-NEXT: vpshufb %xmm3, %xmm14, %xmm4 +; AVX512-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <3,7,11,15,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512-NEXT: vpshufb %xmm4, %xmm11, %xmm5 +; AVX512-NEXT: vpshufb %xmm4, %xmm10, %xmm6 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm6[0],xmm5[0],xmm6[1],xmm5[1] ; AVX512-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm6 = <2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %xmm6, %xmm14, %xmm2 -; AVX512-NEXT: vpshufb %xmm6, %xmm4, %xmm3 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm5[6,7] -; AVX512-NEXT: vpshufb %xmm1, %xmm0, %xmm3 -; AVX512-NEXT: vpshufb %xmm1, %xmm7, %xmm1 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; AVX512-NEXT: vpshufb %xmm6, %xmm12, %xmm3 -; AVX512-NEXT: vpshufb %xmm6, %xmm10, %xmm5 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] -; AVX512-NEXT: vpblendd {{.*#+}} xmm1 = xmm3[0,1],xmm1[2,3] -; AVX512-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] -; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = -; AVX512-NEXT: vpshufb %xmm2, %xmm11, %xmm3 -; AVX512-NEXT: vpshufb %xmm2, %xmm13, %xmm5 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] -; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm5 = <3,7,11,15,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %xmm5, %xmm14, %xmm6 -; AVX512-NEXT: vpshufb %xmm5, %xmm4, %xmm4 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1] -; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 -; AVX512-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3,4,5],ymm3[6,7] -; AVX512-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpshufb %xmm2, %xmm7, %xmm2 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; AVX512-NEXT: vpshufb %xmm5, %xmm12, %xmm2 -; AVX512-NEXT: vpshufb %xmm5, %xmm10, %xmm4 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] -; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] -; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm3[4,5,6,7] +; AVX512-NEXT: vpblendd {{.*#+}} ymm2 = ymm5[0,1,2,3,4,5],ymm2[6,7] +; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; AVX512-NEXT: vpshufb %xmm3, %xmm7, %xmm3 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] +; AVX512-NEXT: vpshufb %xmm4, %xmm13, %xmm3 +; AVX512-NEXT: vpshufb %xmm4, %xmm12, %xmm4 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm3[0,1],xmm0[2,3] +; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] ; AVX512-NEXT: vpcmpeqb %zmm8, %zmm9, %k0 ; AVX512-NEXT: vpcmpeqb %zmm0, %zmm1, %k1 ; AVX512-NEXT: kxnord %k1, %k0, %k0 diff --git a/llvm/test/DebugInfo/Generic/template-recursive-void.ll b/llvm/test/DebugInfo/Generic/template-recursive-void.ll index 0b70f218b3567e..4718b7a1591125 100644 --- a/llvm/test/DebugInfo/Generic/template-recursive-void.ll +++ b/llvm/test/DebugInfo/Generic/template-recursive-void.ll @@ -14,7 +14,7 @@ ; CHECK: DW_TAG_template_type_parameter [{{.*}}] ; CHECK-NEXT: DW_AT_name{{.*}}"T" ; CHECK-NOT: DW_AT_type -; CHECK: NULL +; CHECK: {{DW_TAG|NULL}} source_filename = "test/DebugInfo/Generic/template-recursive-void.ll" diff --git a/llvm/test/DebugInfo/X86/addr-tu-to-non-tu.ll b/llvm/test/DebugInfo/X86/addr-tu-to-non-tu.ll new file mode 100644 index 00000000000000..e81cb38c2131b0 --- /dev/null +++ b/llvm/test/DebugInfo/X86/addr-tu-to-non-tu.ll @@ -0,0 +1,89 @@ +; RUN: llc -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu -split-dwarf-file=x.dwo < %s \ +; RUN: | llvm-dwarfdump -debug-info -debug-types - \ +; RUN: | FileCheck --implicit-check-not=Unit --implicit-check-not=contents --implicit-check-not=declaration %s + +; Test that an address-using-with-Split-DWARF type unit that references a +; non-type unit is handled correctly. A NonTypeUnitContext is used to insulate +; the type construction from being discarded when the prior/outer type has to be +; discarded due to finding it used an address & so can't be type united under +; Split DWARF. + +; The intermediate types tu and t2 are here just to test a bit more +; thoroughly/broadly. They also demonstrate one slight limitation/sub-optimality +; since 't2' isn't put in a type unit. + + +; extern int foo; +; namespace { +; struct t1 { +; }; +; } +; template struct t2 { +; t1 v1; +; }; +; struct t3 { +; t2<&foo> v1; +; }; +; t3 v1; + +; CHECK: .debug_info contents: +; CHECK: Compile Unit: + +; CHECK: .debug_info.dwo contents: +; CHECK: Compile Unit: + +; FIXME: In theory "t3" could be in a type unit - but at the moment, because it +; references t2, which needs an address, t3 gets non-type-united. +; But the same doesn't happen if t3 referenced an anonymous namespace type. + +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("t3") +; CHECK: DW_TAG_member +; CHECK: DW_AT_type {{.*}} "t2<&foo>" +; CHECK: DW_TAG_namespace +; CHECK: [[T1:0x[0-9a-f]*]]: DW_TAG_structure_type +; CHECK: DW_AT_name ("t1") +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("t2<&foo>") +; CHECK: DW_TAG_member +; CHECK: DW_AT_name ("v1") +; CHECK: DW_AT_type ([[T1]] "t1") + +; CHECK: .debug_types contents: + +; CHECK-NOT: .debug_types.dwo contents: + + +%struct.t3 = type { %struct.t2 } +%struct.t2 = type { %"struct.(anonymous namespace)::t1" } +%"struct.(anonymous namespace)::t1" = type { i8 } + +@v1 = dso_local global %struct.t3 zeroinitializer, align 1, !dbg !0 +@foo = external dso_local global i32, align 4 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!18, !19, !20} +!llvm.ident = !{!21} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "v1", scope: !2, file: !3, line: 16, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 12.0.0 (git@github.com:llvm/llvm-project.git be646ae2865371c7a4966797e88f355de5653e04)", isOptimized: false, runtimeVersion: 0, splitDebugFilename: "test.dwo", emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: GNU) +!3 = !DIFile(filename: "test.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch") +!4 = !{} +!5 = !{!0} +!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t3", file: !3, line: 12, size: 8, flags: DIFlagTypePassByValue, elements: !7, identifier: "_ZTS2t3") +!7 = !{!8} +!8 = !DIDerivedType(tag: DW_TAG_member, name: "v1", scope: !6, file: !3, line: 13, baseType: !9, size: 8) +!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t2<&foo>", file: !3, line: 8, size: 8, flags: DIFlagTypePassByValue, elements: !10, templateParams: !14, identifier: "_ZTS2t2IXadL_Z3fooEEE") +!10 = !{!11} +!11 = !DIDerivedType(tag: DW_TAG_member, name: "v1", scope: !9, file: !3, line: 9, baseType: !12, size: 8) +!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t1", scope: !13, file: !3, line: 4, size: 8, flags: DIFlagTypePassByValue, elements: !4) +!13 = !DINamespace(scope: null) +!14 = !{!15} +!15 = !DITemplateValueParameter(type: !16, value: i32* @foo) +!16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 64) +!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!18 = !{i32 7, !"Dwarf Version", i32 4} +!19 = !{i32 2, !"Debug Info Version", i32 3} +!20 = !{i32 1, !"wchar_size", i32 4} +!21 = !{!"clang version 12.0.0 (git@github.com:llvm/llvm-project.git be646ae2865371c7a4966797e88f355de5653e04)"} diff --git a/llvm/test/Other/cspgo-O2-pipeline.ll b/llvm/test/Other/cspgo-O2-pipeline.ll index 974213c83c8128..26f2e338cbc8b7 100644 --- a/llvm/test/Other/cspgo-O2-pipeline.ll +++ b/llvm/test/Other/cspgo-O2-pipeline.ll @@ -1,13 +1,13 @@ ; Test CSGen pass in CSPGO. ; RUN: llvm-profdata merge %S/Inputs/cspgo-noncs.proftext -o %t-noncs.profdata ; RUN: llvm-profdata merge %S/Inputs/cspgo-cs.proftext -o %t-cs.profdata -; RUN: opt -O2 -debug-pass=Structure -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-noncs.profdata' -cspgo-kind=cspgo-instr-gen-pipeline -cs-profilegen-file=alloc %s 2>&1 |FileCheck %s --check-prefixes=CSGENDEFAULT +; RUN: opt -enable-new-pm=0 -O2 -debug-pass=Structure -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-noncs.profdata' -cspgo-kind=cspgo-instr-gen-pipeline -cs-profilegen-file=alloc %s 2>&1 |FileCheck %s --check-prefixes=CSGENDEFAULT ; CSGENDEFAULT: PGOInstrumentationUse ; CSGENDEFAULT: PGOInstrumentationGenCreateVar ; CSGENDEFAULT: PGOInstrumentationGen ; Test CSUse pass in CSPGO. -; RUN: opt -O2 -debug-pass=Structure -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-cs.profdata' -cspgo-kind=cspgo-instr-use-pipeline %s 2>&1 |FileCheck %s --check-prefixes=CSUSEDEFAULT +; RUN: opt -enable-new-pm=0 -O2 -debug-pass=Structure -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-cs.profdata' -cspgo-kind=cspgo-instr-use-pipeline %s 2>&1 |FileCheck %s --check-prefixes=CSUSEDEFAULT ; CSUSEDEFAULT: PGOInstrumentationUse ; CSUSEDEFAULT-NOT: PGOInstrumentationGenCreateVar ; CSUSEDEFAULT: PGOInstrumentationUse diff --git a/llvm/test/Other/loop-pass-printer.ll b/llvm/test/Other/loop-pass-printer.ll index aab4dc91573a5f..c74d202f262158 100644 --- a/llvm/test/Other/loop-pass-printer.ll +++ b/llvm/test/Other/loop-pass-printer.ll @@ -1,19 +1,19 @@ ; This test checks -print-after/before on loop passes ; Besides of the loop itself it should be dumping loop pre-header and exits. ; -; RUN: opt < %s 2>&1 -disable-output \ +; RUN: opt -enable-new-pm=0 < %s 2>&1 -disable-output \ ; RUN: -loop-deletion -print-before=loop-deletion \ ; RUN: | FileCheck %s -check-prefix=DEL ; RUN: opt < %s 2>&1 -disable-output \ ; RUN: -passes='loop(loop-deletion)' -print-before-all \ ; RUN: | FileCheck %s -check-prefix=DEL -; RUN: opt < %s 2>&1 -disable-output \ +; RUN: opt -enable-new-pm=0 < %s 2>&1 -disable-output \ ; RUN: -loop-unroll -print-after=loop-unroll -filter-print-funcs=bar \ ; RUN: | FileCheck %s -check-prefix=BAR -check-prefix=BAR-OLD ; RUN: opt < %s 2>&1 -disable-output \ ; RUN: -passes='require,loop(loop-unroll-full)' -print-after-all -filter-print-funcs=bar \ ; RUN: | FileCheck %s -check-prefix=BAR -; RUN: opt < %s 2>&1 -disable-output \ +; RUN: opt -enable-new-pm=0 < %s 2>&1 -disable-output \ ; RUN: -loop-unroll -print-after=loop-unroll -filter-print-funcs=foo -print-module-scope \ ; RUN: | FileCheck %s -check-prefix=FOO-MODULE -check-prefix=FOO-MODULE-OLD ; RUN: opt < %s 2>&1 -disable-output \ diff --git a/llvm/test/Other/opt-O0-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O0-pipeline-enable-matrix.ll index 401cbb976a4164..f754f6d1a513eb 100644 --- a/llvm/test/Other/opt-O0-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O0-pipeline-enable-matrix.ll @@ -1,4 +1,4 @@ -; RUN: opt -O0 -enable-matrix -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s +; RUN: opt -enable-new-pm=0 -O0 -enable-matrix -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s ; REQUIRES: asserts diff --git a/llvm/test/Other/opt-O0-pipeline.ll b/llvm/test/Other/opt-O0-pipeline.ll index ce431a502f93cb..6900b88cbb4ebf 100644 --- a/llvm/test/Other/opt-O0-pipeline.ll +++ b/llvm/test/Other/opt-O0-pipeline.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=x86_64-- -O0 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=CHECK,%llvmcheckext +; RUN: opt -enable-new-pm=0 -mtriple=x86_64-- -O0 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=CHECK,%llvmcheckext ; REQUIRES: asserts diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll index 56f85d0fb9a8c1..e606e7cfac1716 100644 --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=x86_64-- -O2 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,%llvmcheckext %s +; RUN: opt -enable-new-pm=0 -mtriple=x86_64-- -O2 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,%llvmcheckext %s ; REQUIRES: asserts diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll index a0b7a8f5e1e3d7..aaee6f786bac91 100644 --- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -1,4 +1,4 @@ -; RUN: opt -O3 -enable-matrix -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s +; RUN: opt -enable-new-pm=0 -O3 -enable-matrix -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s ; REQUIRES: asserts diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll index 942f7d9dfead59..b2d2f85ae21be2 100644 --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=x86_64-- -O3 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,%llvmcheckext %s +; RUN: opt -enable-new-pm=0 -mtriple=x86_64-- -O3 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,%llvmcheckext %s ; REQUIRES: asserts diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll index d975cc48b629c3..cc91707c4b009d 100644 --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=x86_64-- -Os -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,%llvmcheckext %s +; RUN: opt -enable-new-pm=0 -mtriple=x86_64-- -Os -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,%llvmcheckext %s ; REQUIRES: asserts diff --git a/llvm/test/Other/opt-pipeline-vector-passes.ll b/llvm/test/Other/opt-pipeline-vector-passes.ll index c9966d43e49126..5a76bfed168542 100644 --- a/llvm/test/Other/opt-pipeline-vector-passes.ll +++ b/llvm/test/Other/opt-pipeline-vector-passes.ll @@ -1,8 +1,8 @@ -; RUN: opt -O1 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O1 -; RUN: opt -O2 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2 -; RUN: opt -O2 -extra-vectorizer-passes -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2_EXTRA -; RUN: opt -O1 -vectorize-loops=0 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O1_FORCE_OFF -; RUN: opt -O2 -vectorize-loops=0 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2_FORCE_OFF +; RUN: opt -enable-new-pm=0 -O1 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O1 +; RUN: opt -enable-new-pm=0 -O2 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2 +; RUN: opt -enable-new-pm=0 -O2 -extra-vectorizer-passes -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2_EXTRA +; RUN: opt -enable-new-pm=0 -O1 -vectorize-loops=0 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O1_FORCE_OFF +; RUN: opt -enable-new-pm=0 -O2 -vectorize-loops=0 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2_FORCE_OFF ; RUN: opt -disable-verify -debug-pass-manager -passes='default' -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O1 ; RUN: opt -disable-verify -debug-pass-manager -passes='default' -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O2 diff --git a/llvm/test/Other/optimize-options.ll b/llvm/test/Other/optimize-options.ll index 22dd842cab0696..ab2fc8f75b73bf 100644 --- a/llvm/test/Other/optimize-options.ll +++ b/llvm/test/Other/optimize-options.ll @@ -1,8 +1,8 @@ -;RUN: opt -S -O1 -debug-pass=Arguments %s 2>&1 | FileCheck %s -;RUN: opt -S -O2 -debug-pass=Arguments %s 2>&1 | FileCheck %s -;RUN: opt -S -Os -debug-pass=Arguments %s 2>&1 | FileCheck %s -;RUN: opt -S -Oz -debug-pass=Arguments %s 2>&1 | FileCheck %s -;RUN: opt -S -O3 -debug-pass=Arguments %s 2>&1 | FileCheck %s +;RUN: opt -enable-new-pm=0 -S -O1 -debug-pass=Arguments %s 2>&1 | FileCheck %s +;RUN: opt -enable-new-pm=0 -S -O2 -debug-pass=Arguments %s 2>&1 | FileCheck %s +;RUN: opt -enable-new-pm=0 -S -Os -debug-pass=Arguments %s 2>&1 | FileCheck %s +;RUN: opt -enable-new-pm=0 -S -Oz -debug-pass=Arguments %s 2>&1 | FileCheck %s +;RUN: opt -enable-new-pm=0 -S -O3 -debug-pass=Arguments %s 2>&1 | FileCheck %s ; Just check that we get a non-empty set of passes for each -O option. ;CHECK: Pass Arguments: {{.*}} -print-module diff --git a/llvm/test/Other/pass-pipelines.ll b/llvm/test/Other/pass-pipelines.ll index 620325ec1d5ee6..ccd364d5d74044 100644 --- a/llvm/test/Other/pass-pipelines.ll +++ b/llvm/test/Other/pass-pipelines.ll @@ -3,15 +3,15 @@ ; legacy pass manager doesn't introduce unexpected structural changes in the ; pass pipeline. ; -; RUN: opt -disable-output -disable-verify -debug-pass=Structure \ +; RUN: opt -enable-new-pm=0 -disable-output -disable-verify -debug-pass=Structure \ ; RUN: -O2 %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O2 ; RUN: llvm-profdata merge %S/Inputs/pass-pipelines.proftext -o %t.profdata -; RUN: opt -disable-output -disable-verify -debug-pass=Structure \ +; RUN: opt -enable-new-pm=0 -disable-output -disable-verify -debug-pass=Structure \ ; RUN: -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' \ ; RUN: -O2 %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O2 --check-prefix=PGOUSE -; RUN: opt -disable-output -disable-verify -debug-pass=Structure \ +; RUN: opt -enable-new-pm=0 -disable-output -disable-verify -debug-pass=Structure \ ; RUN: -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' \ ; RUN: -hot-cold-split \ ; RUN: -O2 %s 2>&1 \ diff --git a/llvm/test/Other/print-cfg-sccs.ll b/llvm/test/Other/print-cfg-sccs.ll index 43e885476bca81..6162b2d38fed5f 100644 --- a/llvm/test/Other/print-cfg-sccs.ll +++ b/llvm/test/Other/print-cfg-sccs.ll @@ -1,4 +1,4 @@ -; RUN: opt -print-cfg-sccs -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -enable-new-pm=0 -print-cfg-sccs -disable-output < %s 2>&1 | FileCheck %s ; CHECK: SCCs for Function test in PostOrder: ; CHECK-NEXT: SCC #1 : %exit, diff --git a/llvm/test/Other/print-module-scope.ll b/llvm/test/Other/print-module-scope.ll index 54e087ff29d25f..08d6bbb3a28b0a 100644 --- a/llvm/test/Other/print-module-scope.ll +++ b/llvm/test/Other/print-module-scope.ll @@ -3,13 +3,13 @@ ; - all the function attributes are shown, including those of declarations ; - works on top of -print-after and -filter-print-funcs ; -; RUN: opt < %s 2>&1 -disable-output \ +; RUN: opt -enable-new-pm=0 < %s 2>&1 -disable-output \ ; RUN: -simplifycfg -print-after=simplifycfg -print-module-scope \ ; RUN: | FileCheck %s -check-prefix=CFG ; RUN: opt < %s 2>&1 -disable-output \ ; RUN: -passes=simplify-cfg -print-after-all -print-module-scope \ ; RUN: | FileCheck %s -check-prefix=CFG -; RUN: opt < %s 2>&1 -disable-output \ +; RUN: opt -enable-new-pm=0 < %s 2>&1 -disable-output \ ; RUN: -simplifycfg -print-after=simplifycfg -filter-print-funcs=foo -print-module-scope \ ; RUN: | FileCheck %s -check-prefix=FOO ; RUN: opt < %s 2>&1 -disable-output \ diff --git a/llvm/test/Other/printer.ll b/llvm/test/Other/printer.ll index 86337656285500..f5fdbfc1d7099c 100644 --- a/llvm/test/Other/printer.ll +++ b/llvm/test/Other/printer.ll @@ -1,4 +1,4 @@ -; RUN: opt -mem2reg -instcombine -print-after-all -disable-output < %s 2>&1 | \ +; RUN: opt -enable-new-pm=0 -mem2reg -instcombine -print-after-all -disable-output < %s 2>&1 | \ ; RUN: FileCheck --check-prefixes=CHECK,OLDPM %s --implicit-check-not='IR Dump' ; RUN: opt -passes='mem2reg,instcombine' -print-after-all -disable-output < %s 2>&1 | \ ; RUN: FileCheck --check-prefixes=CHECK,NEWPM %s --implicit-check-not='IR Dump' diff --git a/llvm/test/Other/time-passes.ll b/llvm/test/Other/time-passes.ll index 743b4ebc0d6d1a..e3b5a003703079 100644 --- a/llvm/test/Other/time-passes.ll +++ b/llvm/test/Other/time-passes.ll @@ -1,11 +1,11 @@ -; RUN: opt < %s -disable-output -instcombine -instcombine -licm -time-passes 2>&1 | FileCheck %s --check-prefix=TIME --check-prefix=TIME-LEGACY -; RUN: opt < %s -disable-output -instcombine -instcombine -licm -licm -time-passes 2>&1 | FileCheck %s --check-prefix=TIME --check-prefix=TIME-LEGACY --check-prefix=TIME-DOUBLE-LICM-LEGACY +; RUN: opt -enable-new-pm=0 < %s -disable-output -instcombine -instcombine -licm -time-passes 2>&1 | FileCheck %s --check-prefix=TIME --check-prefix=TIME-LEGACY +; RUN: opt -enable-new-pm=0 < %s -disable-output -instcombine -instcombine -licm -licm -time-passes 2>&1 | FileCheck %s --check-prefix=TIME --check-prefix=TIME-LEGACY --check-prefix=TIME-DOUBLE-LICM-LEGACY ; RUN: opt < %s -disable-output -passes='instcombine,instcombine,loop(licm)' -time-passes 2>&1 | FileCheck %s --check-prefix=TIME --check-prefix=TIME-NEW ; RUN: opt < %s -disable-output -passes='instcombine,loop(licm),instcombine,loop(licm)' -time-passes 2>&1 | FileCheck %s --check-prefix=TIME --check-prefix=TIME-NEW -check-prefix=TIME-DOUBLE-LICM-NEW ; RUN: opt < %s -disable-output -passes='default' -time-passes 2>&1 | FileCheck %s --check-prefix=TIME ; ; The following 4 test runs verify -info-output-file interaction (default goes to stderr, '-' goes to stdout). -; RUN: opt < %s -disable-output -O2 -time-passes -info-output-file='-' 2>/dev/null | FileCheck %s --check-prefix=TIME +; RUN: opt -enable-new-pm=0 < %s -disable-output -O2 -time-passes -info-output-file='-' 2>/dev/null | FileCheck %s --check-prefix=TIME ; RUN: opt < %s -disable-output -passes='default' -time-passes -info-output-file='-' 2>/dev/null | FileCheck %s --check-prefix=TIME ; ; RUN: rm -f %t; opt < %s -disable-output -O2 -time-passes -info-output-file=%t diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll index 639772d553f6ab..1a95bdb9ce351a 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll @@ -7,7 +7,7 @@ define internal i32 @deref(i32* %x) nounwind { ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@deref -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]], align 4 ; IS__TUNIT_OPM-NEXT: ret i32 [[TMP2]] @@ -23,7 +23,7 @@ define internal i32 @deref(i32* %x) nounwind { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@deref -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]], align 4 ; IS__CGSCC____-NEXT: ret i32 [[TMP2]] @@ -40,7 +40,7 @@ define i32 @f(i32 %x) { ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 -; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = call i32 @deref(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X_ADDR]]) +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = call i32 @deref(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X_ADDR]]) ; IS__TUNIT_OPM-NEXT: ret i32 [[TMP1]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn @@ -59,7 +59,7 @@ define i32 @f(i32 %x) { ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 -; IS__CGSCC____-NEXT: [[TMP1:%.*]] = call i32 @deref(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X_ADDR]]) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = call i32 @deref(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X_ADDR]]) ; IS__CGSCC____-NEXT: ret i32 [[TMP1]] ; entry: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll index db349295a54d4a..c5affd398d0cdf 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll @@ -10,14 +10,14 @@ target triple = "x86_64-unknown-linux-gnu" define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@no_promote_avx2 -; NOT_TUNIT_NPM-SAME: (<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<4 x i64>* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]], align 32 ; NOT_TUNIT_NPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@no_promote_avx2 -; IS__TUNIT_NPM-SAME: (<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) +; IS__TUNIT_NPM-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]], align 32 ; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32 @@ -36,8 +36,8 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__TUNIT_OPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -48,8 +48,8 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_NPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -60,8 +60,8 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__CGSCC_OPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -72,8 +72,8 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -91,14 +91,14 @@ bb: define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@promote_avx2 -; NOT_TUNIT_NPM-SAME: (<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<4 x i64>* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]], align 32 ; NOT_TUNIT_NPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@promote_avx2 -; IS__TUNIT_NPM-SAME: (<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) +; IS__TUNIT_NPM-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]], align 32 @@ -119,8 +119,8 @@ define void @promote(<4 x i64>* %arg) #0 { ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__TUNIT_OPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -131,9 +131,9 @@ define void @promote(<4 x i64>* %arg) #0 { ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 32 -; IS__TUNIT_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) +; IS__TUNIT_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -144,8 +144,8 @@ define void @promote(<4 x i64>* %arg) #0 { ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__CGSCC_OPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -156,8 +156,8 @@ define void @promote(<4 x i64>* %arg) #0 { ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll index 4274e3c89111c1..6fa2d588382e18 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -12,14 +12,14 @@ target triple = "x86_64-unknown-linux-gnu" define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 @@ -41,8 +41,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -53,9 +53,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -66,8 +66,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -78,8 +78,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -99,14 +99,14 @@ bb: define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 @@ -128,8 +128,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -140,9 +140,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -153,8 +153,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -165,8 +165,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -186,14 +186,14 @@ bb: define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 @@ -215,8 +215,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -227,9 +227,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -240,8 +240,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -252,8 +252,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -273,14 +273,14 @@ bb: define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 @@ -302,8 +302,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -314,9 +314,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -327,8 +327,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -339,8 +339,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -360,14 +360,14 @@ bb: define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 @@ -387,8 +387,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -399,8 +399,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -411,8 +411,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -423,8 +423,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -444,14 +444,14 @@ bb: define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 @@ -471,8 +471,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -483,8 +483,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -495,8 +495,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -507,8 +507,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -528,14 +528,14 @@ bb: define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 @@ -557,8 +557,8 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -569,9 +569,9 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -582,8 +582,8 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -594,8 +594,8 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -615,14 +615,14 @@ bb: define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 @@ -644,8 +644,8 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -656,9 +656,9 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -669,8 +669,8 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -681,8 +681,8 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll index fa289c15cacd5e..f96cc52e9837af 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll @@ -16,23 +16,23 @@ target triple = "i386-pc-windows-msvc19.11.0" define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a }>* inalloca) { ; IS__TUNIT____-LABEL: define {{[^@]+}}@internalfun -; IS__TUNIT____-SAME: (%struct.a* noalias nocapture nofree readnone [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca nonnull align 4 dereferenceable(1) [[TMP0:%.*]]) +; IS__TUNIT____-SAME: (%struct.a* noalias nocapture nofree readnone [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca noundef nonnull align 4 dereferenceable(1) [[TMP0:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0 ; IS__TUNIT____-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0 -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* nonnull align 4 dereferenceable(1) [[TMP1]], %struct.a* nonnull align 4 dereferenceable(1) [[A]]) -; IS__TUNIT____-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* noundef nonnull align 4 dereferenceable(1) [[TMP1]], %struct.a* noundef nonnull align 4 dereferenceable(1) [[A]]) +; IS__TUNIT____-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca noundef nonnull align 4 dereferenceable(1) [[ARGMEM]]) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@internalfun -; IS__CGSCC____-SAME: (%struct.a* nocapture nofree readnone [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca nonnull align 4 dereferenceable(1) [[TMP0:%.*]]) +; IS__CGSCC____-SAME: (%struct.a* nocapture nofree readnone [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca noundef nonnull align 4 dereferenceable(1) [[TMP0:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0 ; IS__CGSCC____-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4 ; IS__CGSCC____-NEXT: [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0 -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* nonnull align 4 dereferenceable(1) [[TMP1]], %struct.a* nonnull align 4 dereferenceable(1) [[A]]) -; IS__CGSCC____-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]]) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* noundef nonnull align 4 dereferenceable(1) [[TMP1]], %struct.a* noundef nonnull align 4 dereferenceable(1) [[A]]) +; IS__CGSCC____-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca noundef nonnull align 4 dereferenceable(1) [[ARGMEM]]) ; IS__CGSCC____-NEXT: ret void ; entry: @@ -48,18 +48,18 @@ entry: define void @exportedfun(%struct.a* %a) { ; IS__TUNIT____-LABEL: define {{[^@]+}}@exportedfun ; IS__TUNIT____-SAME: (%struct.a* nocapture nofree readnone [[A:%.*]]) -; IS__TUNIT____-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() +; IS__TUNIT____-NEXT: [[INALLOCA_SAVE:%.*]] = tail call noundef i8* @llvm.stacksave() ; IS__TUNIT____-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4 -; IS__TUNIT____-NEXT: call x86_thiscallcc void @internalfun(%struct.a* noalias nocapture nofree readnone undef, <{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]]) -; IS__TUNIT____-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) +; IS__TUNIT____-NEXT: call x86_thiscallcc void @internalfun(%struct.a* noalias nocapture nofree readnone undef, <{ [[STRUCT_A]] }>* inalloca noundef nonnull align 4 dereferenceable(1) [[ARGMEM]]) +; IS__TUNIT____-NEXT: call void @llvm.stackrestore(i8* noundef [[INALLOCA_SAVE]]) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@exportedfun ; IS__CGSCC____-SAME: (%struct.a* nocapture nofree readnone [[A:%.*]]) -; IS__CGSCC____-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() +; IS__CGSCC____-NEXT: [[INALLOCA_SAVE:%.*]] = tail call noundef i8* @llvm.stacksave() ; IS__CGSCC____-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4 -; IS__CGSCC____-NEXT: call x86_thiscallcc void @internalfun(%struct.a* noalias nocapture nofree readnone [[A]], <{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]]) -; IS__CGSCC____-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) +; IS__CGSCC____-NEXT: call x86_thiscallcc void @internalfun(%struct.a* noalias nocapture nofree readnone [[A]], <{ [[STRUCT_A]] }>* inalloca noundef nonnull align 4 dereferenceable(1) [[ARGMEM]]) +; IS__CGSCC____-NEXT: call void @llvm.stackrestore(i8* noundef [[INALLOCA_SAVE]]) ; IS__CGSCC____-NEXT: ret void ; %inalloca.save = tail call i8* @llvm.stacksave() diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll index 33cc4975d59608..59c590abe9e932 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll @@ -8,7 +8,7 @@ define void @f() { ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@f() ; NOT_TUNIT_NPM-NEXT: entry: ; NOT_TUNIT_NPM-NEXT: [[A:%.*]] = alloca i32, align 1 -; NOT_TUNIT_NPM-NEXT: call void @g(i32* noalias nocapture nonnull readonly dereferenceable(4) [[A]]) +; NOT_TUNIT_NPM-NEXT: call void @g(i32* noalias nocapture noundef nonnull readonly dereferenceable(4) [[A]]) ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f() @@ -26,7 +26,7 @@ entry: define internal void @g(i32* %a) { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@g -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nonnull readonly dereferenceable(4) [[A:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture noundef nonnull readonly dereferenceable(4) [[A:%.*]]) ; IS__TUNIT_OPM-NEXT: [[AA:%.*]] = load i32, i32* [[A]], align 1 ; IS__TUNIT_OPM-NEXT: call void @z(i32 [[AA]]) ; IS__TUNIT_OPM-NEXT: ret void @@ -40,7 +40,7 @@ define internal void @g(i32* %a) { ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@g -; IS__CGSCC____-SAME: (i32* nocapture nonnull readonly dereferenceable(4) [[A:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture noundef nonnull readonly dereferenceable(4) [[A:%.*]]) ; IS__CGSCC____-NEXT: [[AA:%.*]] = load i32, i32* [[A]], align 1 ; IS__CGSCC____-NEXT: call void @z(i32 [[AA]]) ; IS__CGSCC____-NEXT: ret void @@ -57,7 +57,7 @@ declare void @z(i32) define internal i32 @test(i32* %X, i64* %Y) { ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i64* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[Y:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i64* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[Y:%.*]]) ; IS__TUNIT_OPM-NEXT: [[A:%.*]] = load i32, i32* [[X]], align 4 ; IS__TUNIT_OPM-NEXT: [[B:%.*]] = load i64, i64* [[Y]], align 8 ; IS__TUNIT_OPM-NEXT: [[C:%.*]] = add i32 [[A]], 1 @@ -89,7 +89,7 @@ define internal i32 @test(i32* %X, i64* %Y) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@test -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i64* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[Y:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i64* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[Y:%.*]]) ; IS__CGSCC____-NEXT: [[A:%.*]] = load i32, i32* [[X]], align 4 ; IS__CGSCC____-NEXT: [[B:%.*]] = load i64, i64* [[Y]], align 8 ; IS__CGSCC____-NEXT: [[C:%.*]] = add i32 [[A]], 1 @@ -116,10 +116,10 @@ Return2: define internal i32 @caller(i32* %A) { ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@caller -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) ; IS__TUNIT_OPM-NEXT: [[B:%.*]] = alloca i64, align 8 ; IS__TUNIT_OPM-NEXT: store i64 1, i64* [[B]], align 8 -; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i64* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[B]]) +; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i64* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[B]]) ; IS__TUNIT_OPM-NEXT: ret i32 [[C]] ; ; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn @@ -136,10 +136,10 @@ define internal i32 @caller(i32* %A) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) ; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i64, align 8 ; IS__CGSCC____-NEXT: store i64 1, i64* [[B]], align 8 -; IS__CGSCC____-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i64* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[B]]) +; IS__CGSCC____-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i64* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[B]]) ; IS__CGSCC____-NEXT: ret i32 [[C]] ; %B = alloca i64 @@ -153,7 +153,7 @@ define i32 @callercaller() { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@callercaller() ; IS__TUNIT_OPM-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: store i32 2, i32* [[B]], align 4 -; IS__TUNIT_OPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__TUNIT_OPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__TUNIT_OPM-NEXT: ret i32 [[X]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone @@ -168,7 +168,7 @@ define i32 @callercaller() { ; IS__CGSCC____-LABEL: define {{[^@]+}}@callercaller() ; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 2, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC____-NEXT: ret i32 [[X]] ; %B = alloca i32 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll index e7e67c71cf509d..e4a33ef7fc232e 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll @@ -11,7 +11,7 @@ define internal i32 @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind { ; ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f -; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[X:%.*]], i32 [[I:%.*]]) +; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[X:%.*]], i32 [[I:%.*]]) ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 @@ -44,7 +44,7 @@ define internal i32 @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind { ; ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f -; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[X:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[X:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 @@ -100,7 +100,7 @@ define i32 @test(i32* %X) { ; IS__TUNIT_OPM-NEXT: store i32 1, i32* [[TMP1]], align 8 ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__TUNIT_OPM-NEXT: store i64 2, i64* [[TMP4]], align 4 -; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree nonnull readonly byval align 8 dereferenceable(12) [[S]], i32* nocapture nofree readonly byval align 4 [[X]], i32 zeroext 0) +; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree noundef nonnull readonly byval align 8 dereferenceable(12) [[S]], i32* nocapture nofree readonly byval align 4 [[X]], i32 zeroext 0) ; IS__TUNIT_OPM-NEXT: ret i32 [[C]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn @@ -129,7 +129,7 @@ define i32 @test(i32* %X) { ; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[TMP1]], align 8 ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__CGSCC_OPM-NEXT: store i64 2, i64* [[TMP4]], align 4 -; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree nonnull readnone byval align 8 dereferenceable(12) [[S]], i32* noalias nocapture nofree nonnull readnone byval align 4 dereferenceable(4) [[X]]) +; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree noundef nonnull readnone byval align 8 dereferenceable(12) [[S]], i32* noalias nocapture nofree nonnull readnone byval align 4 dereferenceable(4) [[X]]) ; IS__CGSCC_OPM-NEXT: ret i32 [[C]] ; ; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll index 1522dfe907f005..ea60eb5a1d4900 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll @@ -8,7 +8,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 define internal i32 @test(i32* %X, i32* %Y) { ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[Y:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[Y:%.*]]) ; IS__TUNIT_OPM-NEXT: [[A:%.*]] = load i32, i32* [[X]], align 4 ; IS__TUNIT_OPM-NEXT: [[B:%.*]] = load i32, i32* [[Y]], align 4 ; IS__TUNIT_OPM-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] @@ -28,7 +28,7 @@ define internal i32 @test(i32* %X, i32* %Y) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@test -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[Y:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[Y:%.*]]) ; IS__CGSCC____-NEXT: [[A:%.*]] = load i32, i32* [[X]], align 4 ; IS__CGSCC____-NEXT: [[B:%.*]] = load i32, i32* [[Y]], align 4 ; IS__CGSCC____-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] @@ -43,10 +43,10 @@ define internal i32 @test(i32* %X, i32* %Y) { define internal i32 @caller(i32* %B) { ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@caller -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__TUNIT_OPM-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: store i32 1, i32* [[A]], align 4 -; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__TUNIT_OPM-NEXT: ret i32 [[C]] ; ; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn @@ -63,10 +63,10 @@ define internal i32 @caller(i32* %B) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__CGSCC____-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 1, i32* [[A]], align 4 -; IS__CGSCC____-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC____-NEXT: ret i32 [[C]] ; %A = alloca i32 @@ -80,7 +80,7 @@ define i32 @callercaller() { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@callercaller() ; IS__TUNIT_OPM-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: store i32 2, i32* [[B]], align 4 -; IS__TUNIT_OPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__TUNIT_OPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__TUNIT_OPM-NEXT: ret i32 [[X]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone @@ -95,7 +95,7 @@ define i32 @callercaller() { ; IS__CGSCC____-LABEL: define {{[^@]+}}@callercaller() ; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 2, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC____-NEXT: ret i32 [[X]] ; %B = alloca i32 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll index 3d410cf51bcc17..484d5bcaed3a48 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll @@ -9,7 +9,7 @@ define internal void @f(%struct.ss* byval %b, i32* byval %X) nounwind { ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f -; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull writeonly byval align 4 dereferenceable(4) [[X:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull writeonly byval align 4 dereferenceable(4) [[X:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll index f87bd4b802eb81..e04f0b02204b8c 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll @@ -11,7 +11,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 define internal i32 @f(%struct.ss* byval %b) nounwind { ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f -; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]]) +; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval align 8 dereferenceable(12) [[B:%.*]]) ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 @@ -36,7 +36,7 @@ define internal i32 @f(%struct.ss* byval %b) nounwind { ; ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f -; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval align 8 dereferenceable(12) [[B:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 @@ -71,7 +71,7 @@ entry: define internal i32 @g(%struct.ss* byval align 32 %b) nounwind { ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@g -; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(12) [[B:%.*]]) +; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval align 32 dereferenceable(12) [[B:%.*]]) ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32 @@ -96,7 +96,7 @@ define internal i32 @g(%struct.ss* byval align 32 %b) nounwind { ; ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@g -; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(12) [[B:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval align 32 dereferenceable(12) [[B:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32 @@ -137,8 +137,8 @@ define i32 @main() nounwind { ; IS__TUNIT_OPM-NEXT: store i32 1, i32* [[TMP1]], align 8 ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__TUNIT_OPM-NEXT: store i64 2, i64* [[TMP4]], align 4 -; IS__TUNIT_OPM-NEXT: [[C0:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree nonnull readonly byval align 8 dereferenceable(12) [[S]]) -; IS__TUNIT_OPM-NEXT: [[C1:%.*]] = call i32 @g(%struct.ss* noalias nocapture nofree nonnull readonly byval align 32 dereferenceable(12) [[S]]) +; IS__TUNIT_OPM-NEXT: [[C0:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree noundef nonnull readonly byval align 8 dereferenceable(12) [[S]]) +; IS__TUNIT_OPM-NEXT: [[C1:%.*]] = call i32 @g(%struct.ss* noalias nocapture nofree noundef nonnull readonly byval align 32 dereferenceable(12) [[S]]) ; IS__TUNIT_OPM-NEXT: [[A:%.*]] = add i32 [[C0]], [[C1]] ; IS__TUNIT_OPM-NEXT: ret i32 [[A]] ; @@ -171,8 +171,8 @@ define i32 @main() nounwind { ; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[TMP1]], align 32 ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__CGSCC_OPM-NEXT: store i64 2, i64* [[TMP4]], align 4 -; IS__CGSCC_OPM-NEXT: [[C0:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree nonnull readnone byval align 32 dereferenceable(12) [[S]]) -; IS__CGSCC_OPM-NEXT: [[C1:%.*]] = call i32 @g(%struct.ss* noalias nocapture nofree nonnull readnone byval align 32 dereferenceable(12) [[S]]) +; IS__CGSCC_OPM-NEXT: [[C0:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree noundef nonnull readnone byval align 32 dereferenceable(12) [[S]]) +; IS__CGSCC_OPM-NEXT: [[C1:%.*]] = call i32 @g(%struct.ss* noalias nocapture nofree noundef nonnull readnone byval align 32 dereferenceable(12) [[S]]) ; IS__CGSCC_OPM-NEXT: [[A:%.*]] = add i32 [[C0]], [[C1]] ; IS__CGSCC_OPM-NEXT: ret i32 [[A]] ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll index ce997ba494931a..bc22fd6c862fa2 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll @@ -9,7 +9,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 define internal i32 @callee(i1 %C, i32* %P) { ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@callee -; IS__TUNIT_OPM-SAME: (i1 [[C:%.*]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) +; IS__TUNIT_OPM-SAME: (i1 [[C:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) ; IS__TUNIT_OPM-NEXT: br label [[F:%.*]] ; IS__TUNIT_OPM: T: ; IS__TUNIT_OPM-NEXT: unreachable @@ -31,7 +31,7 @@ define internal i32 @callee(i1 %C, i32* %P) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@callee -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) ; IS__CGSCC____-NEXT: br label [[F:%.*]] ; IS__CGSCC____: T: ; IS__CGSCC____-NEXT: unreachable @@ -54,7 +54,7 @@ define i32 @foo() { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@foo() ; IS__TUNIT_OPM-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: store i32 17, i32* [[A]], align 4 -; IS__TUNIT_OPM-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]]) +; IS__TUNIT_OPM-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]]) ; IS__TUNIT_OPM-NEXT: ret i32 [[X]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn @@ -69,7 +69,7 @@ define i32 @foo() { ; IS__CGSCC____-LABEL: define {{[^@]+}}@foo() ; IS__CGSCC____-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 17, i32* [[A]], align 4 -; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @callee(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]]) +; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @callee(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]]) ; IS__CGSCC____-NEXT: ret i32 [[X]] ; %A = alloca i32 ; [#uses=2] diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll index b7ff607c270382..5da4437f3ae24f 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll @@ -12,7 +12,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 define internal i32 @f(%struct.ss* inalloca %s) { ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@f -; IS__TUNIT____-SAME: (%struct.ss* inalloca noalias nocapture nofree nonnull align 4 dereferenceable(8) [[S:%.*]]) +; IS__TUNIT____-SAME: (%struct.ss* inalloca noalias nocapture nofree noundef nonnull align 4 dereferenceable(8) [[S:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[F0:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[S]], i32 0, i32 0 ; IS__TUNIT____-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 @@ -23,7 +23,7 @@ define internal i32 @f(%struct.ss* inalloca %s) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@f -; IS__CGSCC____-SAME: (%struct.ss* inalloca nocapture nofree nonnull align 4 dereferenceable(8) [[S:%.*]]) +; IS__CGSCC____-SAME: (%struct.ss* inalloca nocapture nofree noundef nonnull align 4 dereferenceable(8) [[S:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[F0:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[S]], i32 0, i32 0 ; IS__CGSCC____-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 @@ -50,7 +50,7 @@ define i32 @main() { ; IS__TUNIT____-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__TUNIT____-NEXT: store i32 1, i32* [[F0]], align 4 ; IS__TUNIT____-NEXT: store i32 2, i32* [[F1]], align 4 -; IS__TUNIT____-NEXT: [[R:%.*]] = call i32 @f(%struct.ss* inalloca noalias nocapture nofree nonnull align 4 dereferenceable(8) [[S]]) +; IS__TUNIT____-NEXT: [[R:%.*]] = call i32 @f(%struct.ss* inalloca noalias nocapture nofree noundef nonnull align 4 dereferenceable(8) [[S]]) ; IS__TUNIT____-NEXT: ret i32 [[R]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn @@ -61,7 +61,7 @@ define i32 @main() { ; IS__CGSCC____-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__CGSCC____-NEXT: store i32 1, i32* [[F0]], align 4 ; IS__CGSCC____-NEXT: store i32 2, i32* [[F1]], align 4 -; IS__CGSCC____-NEXT: [[R:%.*]] = call i32 @f(%struct.ss* inalloca noalias nocapture nofree nonnull align 4 dereferenceable(8) [[S]]) +; IS__CGSCC____-NEXT: [[R:%.*]] = call i32 @f(%struct.ss* inalloca noalias nocapture nofree noundef nonnull align 4 dereferenceable(8) [[S]]) ; IS__CGSCC____-NEXT: ret i32 [[R]] ; entry: @@ -78,7 +78,7 @@ entry: define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind { ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@g -; IS__CGSCC____-SAME: (%struct.ss* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[A:%.*]], %struct.ss* inalloca nocapture nofree nonnull writeonly align 4 dereferenceable(8) [[B:%.*]]) +; IS__CGSCC____-SAME: (%struct.ss* nocapture nofree noundef nonnull readnone align 4 dereferenceable(8) [[A:%.*]], %struct.ss* inalloca nocapture nofree noundef nonnull writeonly align 4 dereferenceable(8) [[B:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: ret i1 undef ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll index bb14e16820c769..4c2886f83aa992 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll @@ -13,7 +13,7 @@ define internal void @dead() { define internal i32 @test(i32* %X, i32* %Y) { ; IS__CGSCC_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test -; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree writeonly align 4 [[X:%.*]]) +; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree noundef writeonly align 4 [[X:%.*]]) ; IS__CGSCC_OPM-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] ; IS__CGSCC_OPM: live: ; IS__CGSCC_OPM-NEXT: store i32 0, i32* [[X]], align 4 @@ -23,7 +23,7 @@ define internal i32 @test(i32* %X, i32* %Y) { ; ; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test -; IS__CGSCC_NPM-SAME: (i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[X:%.*]]) +; IS__CGSCC_NPM-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[X:%.*]]) ; IS__CGSCC_NPM-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] ; IS__CGSCC_NPM: live: ; IS__CGSCC_NPM-NEXT: store i32 0, i32* [[X]], align 4 @@ -46,14 +46,14 @@ define internal i32 @caller(i32* %B) { ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@caller() ; IS__CGSCC_OPM-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[A]], align 4 -; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) +; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) ; IS__CGSCC_OPM-NEXT: ret i32 0 ; ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller() ; IS__CGSCC_NPM-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: store i32 1, i32* [[A]], align 4 -; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) +; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) ; IS__CGSCC_NPM-NEXT: ret i32 undef ; %A = alloca i32 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll index fc1e6589499032..d3bc0c4d317796 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll @@ -13,7 +13,7 @@ define internal void @dead() { define internal i32 @test(i32* %X, i32* %Y) { ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__TUNIT____-LABEL: define {{[^@]+}}@test -; IS__TUNIT____-SAME: (i32* noalias nocapture nofree writeonly align 4 [[X:%.*]]) +; IS__TUNIT____-SAME: (i32* noalias nocapture nofree noundef writeonly align 4 [[X:%.*]]) ; IS__TUNIT____-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] ; IS__TUNIT____: live: ; IS__TUNIT____-NEXT: store i32 0, i32* [[X]], align 4 @@ -23,7 +23,7 @@ define internal i32 @test(i32* %X, i32* %Y) { ; ; IS__CGSCC_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test -; IS__CGSCC_OPM-SAME: (i32* nocapture nofree writeonly align 4 [[X:%.*]]) +; IS__CGSCC_OPM-SAME: (i32* nocapture nofree noundef writeonly align 4 [[X:%.*]]) ; IS__CGSCC_OPM-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] ; IS__CGSCC_OPM: live: ; IS__CGSCC_OPM-NEXT: store i32 0, i32* [[X]], align 4 @@ -33,7 +33,7 @@ define internal i32 @test(i32* %X, i32* %Y) { ; ; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test -; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[X:%.*]]) +; IS__CGSCC_NPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[X:%.*]]) ; IS__CGSCC_NPM-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] ; IS__CGSCC_NPM: live: ; IS__CGSCC_NPM-NEXT: store i32 0, i32* [[X]], align 4 @@ -54,26 +54,26 @@ dead: define internal i32 @caller(i32* %B) { ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__TUNIT____-LABEL: define {{[^@]+}}@caller -; IS__TUNIT____-SAME: (i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) +; IS__TUNIT____-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__TUNIT____-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__TUNIT____-NEXT: store i32 1, i32* [[A]], align 4 -; IS__TUNIT____-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) +; IS__TUNIT____-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) ; IS__TUNIT____-NEXT: ret i32 0 ; ; IS__CGSCC_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@caller -; IS__CGSCC_OPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) +; IS__CGSCC_OPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__CGSCC_OPM-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[A]], align 4 -; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC_OPM-NEXT: ret i32 0 ; ; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller -; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) +; IS__CGSCC_NPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__CGSCC_NPM-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: store i32 1, i32* [[A]], align 4 -; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC_NPM-NEXT: ret i32 undef ; %A = alloca i32 @@ -87,14 +87,14 @@ define i32 @callercaller() { ; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@callercaller() ; NOT_CGSCC_NPM-NEXT: [[B:%.*]] = alloca i32, align 4 ; NOT_CGSCC_NPM-NEXT: store i32 2, i32* [[B]], align 4 -; NOT_CGSCC_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) +; NOT_CGSCC_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) ; NOT_CGSCC_NPM-NEXT: ret i32 0 ; ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callercaller() ; IS__CGSCC_NPM-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: store i32 2, i32* [[B]], align 4 -; IS__CGSCC_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC_NPM-NEXT: ret i32 0 ; %B = alloca i32 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/naked_functions.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/naked_functions.ll index 0d3464c062fa20..dcd4feba716a00 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/naked_functions.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/naked_functions.ll @@ -11,7 +11,7 @@ define i32 @bar() { ; CHECK-LABEL: define {{[^@]+}}@bar() ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo(i32* nonnull align 4 dereferenceable(4) @g) +; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo(i32* noundef nonnull align 4 dereferenceable(4) @g) ; CHECK-NEXT: ret i32 [[CALL]] ; entry: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll index 1c847b88b52860..94be92dc73695f 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll @@ -11,7 +11,7 @@ define void @caller() #0 { ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@caller() ; NOT_TUNIT_NPM-NEXT: [[X:%.*]] = alloca i32, align 4 ; NOT_TUNIT_NPM-NEXT: store i32 42, i32* [[X]], align 4 -; NOT_TUNIT_NPM-NEXT: call void @promote_i32_ptr(i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[X]]), !prof !0 +; NOT_TUNIT_NPM-NEXT: call void @promote_i32_ptr(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[X]]), !prof !0 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@caller() @@ -29,7 +29,7 @@ define void @caller() #0 { define internal void @promote_i32_ptr(i32* %xp) { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@promote_i32_ptr -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[XP:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[XP:%.*]]) ; IS__TUNIT_OPM-NEXT: [[X:%.*]] = load i32, i32* [[XP]], align 4 ; IS__TUNIT_OPM-NEXT: call void @use_i32(i32 [[X]]) ; IS__TUNIT_OPM-NEXT: ret void @@ -43,7 +43,7 @@ define internal void @promote_i32_ptr(i32* %xp) { ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@promote_i32_ptr -; IS__CGSCC____-SAME: (i32* nocapture nonnull readonly align 4 dereferenceable(4) [[XP:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[XP:%.*]]) ; IS__CGSCC____-NEXT: [[X:%.*]] = load i32, i32* [[XP]], align 4 ; IS__CGSCC____-NEXT: call void @use_i32(i32 [[X]]) ; IS__CGSCC____-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll index 38a159608827b5..2f7e41f080cd6f 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll @@ -19,7 +19,7 @@ define internal fastcc void @fn(i32* nocapture readonly %p1, i64* nocapture readonly %p2) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@fn -; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P1:%.*]]) +; IS__TUNIT____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P1:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* @g, align 4, [[TBAA0:!tbaa !.*]] ; IS__TUNIT____-NEXT: [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8 @@ -51,7 +51,7 @@ define i32 @main() { ; IS__TUNIT____-NEXT: store i32* @g, i32** [[TMP0]], align 8, [[TBAA5]] ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32*, i32** @a, align 8, [[TBAA5]] ; IS__TUNIT____-NEXT: store i32 1, i32* [[TMP1]], align 4, [[TBAA0]] -; IS__TUNIT____-NEXT: call fastcc void @fn(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) @g) +; IS__TUNIT____-NEXT: call fastcc void @fn(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) @g) ; IS__TUNIT____-NEXT: ret i32 0 ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll index a16d6fc49ee3dc..834df9a1c85447 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll @@ -11,7 +11,7 @@ define internal void @add({i32, i32}* %this, i32* sret %r) { ; ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@add -; IS__TUNIT_OPM-SAME: ({ i32, i32 }* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]]) +; IS__TUNIT_OPM-SAME: ({ i32, i32 }* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* nocapture nofree noundef nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]]) ; IS__TUNIT_OPM-NEXT: [[AP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 0 ; IS__TUNIT_OPM-NEXT: [[BP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 1 ; IS__TUNIT_OPM-NEXT: [[A:%.*]] = load i32, i32* [[AP]], align 8 @@ -22,7 +22,7 @@ define internal void @add({i32, i32}* %this, i32* sret %r) { ; ; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@add -; IS__TUNIT_NPM-SAME: ({ i32, i32 }* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* noalias nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]]) +; IS__TUNIT_NPM-SAME: ({ i32, i32 }* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* noalias nocapture nofree noundef nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]]) ; IS__TUNIT_NPM-NEXT: [[AP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 0 ; IS__TUNIT_NPM-NEXT: [[BP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 1 ; IS__TUNIT_NPM-NEXT: [[A:%.*]] = load i32, i32* [[AP]], align 8 @@ -33,7 +33,7 @@ define internal void @add({i32, i32}* %this, i32* sret %r) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@add -; IS__CGSCC____-SAME: ({ i32, i32 }* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]]) +; IS__CGSCC____-SAME: ({ i32, i32 }* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* nocapture nofree noundef nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]]) ; IS__CGSCC____-NEXT: [[AP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 0 ; IS__CGSCC____-NEXT: [[BP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 1 ; IS__CGSCC____-NEXT: [[A:%.*]] = load i32, i32* [[AP]], align 8 @@ -56,28 +56,28 @@ define void @f() { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f() ; IS__TUNIT_OPM-NEXT: [[R:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 }, align 8 -; IS__TUNIT_OPM-NEXT: call void @add({ i32, i32 }* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) +; IS__TUNIT_OPM-NEXT: call void @add({ i32, i32 }* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* nocapture nofree noundef nonnull sret writeonly align 4 dereferenceable(4) [[R]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f() ; IS__TUNIT_NPM-NEXT: [[R:%.*]] = alloca i32, align 4 ; IS__TUNIT_NPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 }, align 8 -; IS__TUNIT_NPM-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) +; IS__TUNIT_NPM-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree noundef nonnull sret writeonly align 4 dereferenceable(4) [[R]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f() ; IS__CGSCC_OPM-NEXT: [[R:%.*]] = alloca i32, align 4 ; IS__CGSCC_OPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 }, align 8 -; IS__CGSCC_OPM-NEXT: call void @add({ i32, i32 }* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) +; IS__CGSCC_OPM-NEXT: call void @add({ i32, i32 }* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* nocapture nofree noundef nonnull sret writeonly align 4 dereferenceable(4) [[R]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f() ; IS__CGSCC_NPM-NEXT: [[R:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 }, align 8 -; IS__CGSCC_NPM-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) +; IS__CGSCC_NPM-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree noundef nonnull sret writeonly align 4 dereferenceable(4) [[R]]) ; IS__CGSCC_NPM-NEXT: ret void ; %r = alloca i32 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll index ce4f78065d1161..685e21df3d27c0 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll @@ -34,7 +34,7 @@ define internal void @bar(%pair* byval %Data) { ; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]], align 4 ; IS__CGSCC_NPM-NEXT: [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1 ; IS__CGSCC_NPM-NEXT: store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]], align 4 -; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = call i8* @foo(%pair* nonnull align 8 dereferenceable(8) [[DATA_PRIV]]) +; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = call i8* @foo(%pair* noundef nonnull align 8 dereferenceable(8) [[DATA_PRIV]]) ; IS__CGSCC_NPM-NEXT: ret void ; tail call i8* @foo(%pair* %Data) diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/variadic.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/variadic.ll index 6120b725cc74fc..b072069f8945ef 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/variadic.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/variadic.ll @@ -21,7 +21,7 @@ define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 { ; CHECK-LABEL: define {{[^@]+}}@main ; CHECK-SAME: (i32 [[ARGC:%.*]], i8** nocapture nofree readnone [[ARGV:%.*]]) ; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* nonnull byval align 8 dereferenceable(16) @t45) +; CHECK-NEXT: tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* noundef nonnull byval align 8 dereferenceable(16) @t45) ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll index 98051fc678ad3c..a6e27f7254dd47 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll @@ -11,7 +11,7 @@ define internal void @vfu1(%struct.MYstr* byval align 4 %u) nounwind { ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@vfu1 -; IS__CGSCC_OPM-SAME: (%struct.MYstr* noalias nocapture nofree nonnull writeonly byval align 8 dereferenceable(8) [[U:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull writeonly byval align 8 dereferenceable(8) [[U:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* [[U]], i32 0, i32 1 ; IS__CGSCC_OPM-NEXT: store i32 99, i32* [[TMP0]], align 4 @@ -52,7 +52,7 @@ return: ; preds = %entry define internal i32 @vfu2(%struct.MYstr* byval align 4 %u) nounwind readonly { ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readonly willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@vfu2 -; IS__TUNIT_OPM-SAME: (%struct.MYstr* noalias nocapture nofree nonnull readonly byval align 8 dereferenceable(8) [[U:%.*]]) +; IS__TUNIT_OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull readonly byval align 8 dereferenceable(8) [[U:%.*]]) ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1 ; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 @@ -104,7 +104,7 @@ define i32 @unions() nounwind { ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@unions() ; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2(%struct.MYstr* nocapture nofree nonnull readonly byval align 8 dereferenceable(8) @mystr) +; IS__TUNIT_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2(%struct.MYstr* nocapture nofree noundef nonnull readonly byval align 8 dereferenceable(8) @mystr) ; IS__TUNIT_OPM-NEXT: ret i32 [[RESULT]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn @@ -132,7 +132,7 @@ entry: define internal i32 @vfu2_v2(%struct.MYstr* byval align 4 %u) nounwind readonly { ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@vfu2_v2 -; IS__TUNIT_OPM-SAME: (%struct.MYstr* noalias nocapture nofree nonnull byval align 8 dereferenceable(8) [[U:%.*]]) +; IS__TUNIT_OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull byval align 8 dereferenceable(8) [[U:%.*]]) ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[Z:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* [[U]], i32 0, i32 1 ; IS__TUNIT_OPM-NEXT: store i32 99, i32* [[Z]], align 4 @@ -165,7 +165,7 @@ define internal i32 @vfu2_v2(%struct.MYstr* byval align 4 %u) nounwind readonly ; ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@vfu2_v2 -; IS__CGSCC_OPM-SAME: (%struct.MYstr* noalias nocapture nofree nonnull byval align 8 dereferenceable(8) [[U:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull byval align 8 dereferenceable(8) [[U:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[Z:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* [[U]], i32 0, i32 1 ; IS__CGSCC_OPM-NEXT: store i32 99, i32* [[Z]], align 4 @@ -212,7 +212,7 @@ define i32 @unions_v2() nounwind { ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@unions_v2() ; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* nocapture nofree nonnull readonly byval align 8 dereferenceable(8) @mystr) +; IS__TUNIT_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* nocapture nofree noundef nonnull readonly byval align 8 dereferenceable(8) @mystr) ; IS__TUNIT_OPM-NEXT: ret i32 [[RESULT]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn @@ -228,7 +228,7 @@ define i32 @unions_v2() nounwind { ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@unions_v2() ; IS__CGSCC_OPM-NEXT: entry: -; IS__CGSCC_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* noalias nocapture nofree nonnull readnone byval align 8 dereferenceable(8) @mystr) +; IS__CGSCC_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* noalias nocapture nofree noundef nonnull readnone byval align 8 dereferenceable(8) @mystr) ; IS__CGSCC_OPM-NEXT: ret i32 [[RESULT]] ; ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readonly willreturn diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll index e6b8e8317a42ae..da44880a7c6fea 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll @@ -128,10 +128,10 @@ entry: define void @foo() { ; CHECK-LABEL: define {{[^@]+}}@foo() ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @broker(i32 (i32)* nonnull @cb0, i32 (i32)* nonnull @cb1, i32 (i32)* nonnull @cb0, i32 0, i32 1) -; CHECK-NEXT: call void @broker(i32 (i32)* nonnull @cb1, i32 (i32)* nonnull @cb2, i32 (i32)* nonnull @cb2, i32 0, i32 1) -; CHECK-NEXT: call void @broker(i32 (i32)* nonnull @cb3, i32 (i32)* nonnull @cb2, i32 (i32)* nonnull @cb3, i32 0, i32 1) -; CHECK-NEXT: call void @broker(i32 (i32)* nonnull @cb4, i32 (i32)* nonnull @cb4, i32 (i32)* nonnull @cb4, i32 0, i32 1) +; CHECK-NEXT: call void @broker(i32 (i32)* noundef nonnull @cb0, i32 (i32)* noundef nonnull @cb1, i32 (i32)* noundef nonnull @cb0, i32 0, i32 1) +; CHECK-NEXT: call void @broker(i32 (i32)* noundef nonnull @cb1, i32 (i32)* noundef nonnull @cb2, i32 (i32)* noundef nonnull @cb2, i32 0, i32 1) +; CHECK-NEXT: call void @broker(i32 (i32)* noundef nonnull @cb3, i32 (i32)* noundef nonnull @cb2, i32 (i32)* noundef nonnull @cb3, i32 0, i32 1) +; CHECK-NEXT: call void @broker(i32 (i32)* noundef nonnull @cb4, i32 (i32)* noundef nonnull @cb4, i32 (i32)* noundef nonnull @cb4, i32 0, i32 1) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll index 83990224bddae0..e33db1ca6f4a06 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll @@ -36,7 +36,7 @@ define dso_local void @foo(i32 %N) { ; IS__TUNIT_OPM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; IS__TUNIT_OPM-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; IS__TUNIT_OPM-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; IS__TUNIT_OPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) +; IS__TUNIT_OPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@foo @@ -47,7 +47,7 @@ define dso_local void @foo(i32 %N) { ; IS__TUNIT_NPM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; IS__TUNIT_NPM-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; IS__TUNIT_NPM-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; IS__TUNIT_NPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) +; IS__TUNIT_NPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@foo @@ -58,7 +58,7 @@ define dso_local void @foo(i32 %N) { ; IS__CGSCC_OPM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; IS__CGSCC_OPM-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; IS__CGSCC_OPM-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; IS__CGSCC_OPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture nonnull readonly align 4 dereferenceable(4) [[P]], i64 4617315517961601024) +; IS__CGSCC_OPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 4617315517961601024) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@foo @@ -69,7 +69,7 @@ define dso_local void @foo(i32 %N) { ; IS__CGSCC_NPM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; IS__CGSCC_NPM-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; IS__CGSCC_NPM-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; IS__CGSCC_NPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[P]], i64 4617315517961601024) +; IS__CGSCC_NPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 4617315517961601024) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -84,7 +84,7 @@ entry: define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %N, float* dereferenceable(4) %p, i64 %q) { ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@.omp_outlined. -; NOT_TUNIT_NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) +; NOT_TUNIT_NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) ; NOT_TUNIT_NPM-NEXT: entry: ; NOT_TUNIT_NPM-NEXT: [[Q_ADDR:%.*]] = alloca i64, align 8 ; NOT_TUNIT_NPM-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -103,7 +103,7 @@ define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %. ; NOT_TUNIT_NPM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 ; NOT_TUNIT_NPM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 ; NOT_TUNIT_NPM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; NOT_TUNIT_NPM-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB0:@.*]], i32 [[TMP5]], i32 34, i32* nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 1, i32 1) +; NOT_TUNIT_NPM-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0:@.*]], i32 [[TMP5]], i32 34, i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 1, i32 1) ; NOT_TUNIT_NPM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 ; NOT_TUNIT_NPM-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]] ; NOT_TUNIT_NPM-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -139,13 +139,13 @@ define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %. ; NOT_TUNIT_NPM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] ; NOT_TUNIT_NPM: omp.loop.exit: ; NOT_TUNIT_NPM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; NOT_TUNIT_NPM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB0]], i32 [[TMP12]]) +; NOT_TUNIT_NPM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 [[TMP12]]) ; NOT_TUNIT_NPM-NEXT: br label [[OMP_PRECOND_END]] ; NOT_TUNIT_NPM: omp.precond.end: ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@.omp_outlined. -; IS__TUNIT_NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) +; IS__TUNIT_NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[Q_ADDR:%.*]] = alloca i64, align 8 ; IS__TUNIT_NPM-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -164,7 +164,7 @@ define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %. ; IS__TUNIT_NPM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 ; IS__TUNIT_NPM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 ; IS__TUNIT_NPM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; IS__TUNIT_NPM-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB0:@.*]], i32 [[TMP5]], i32 34, i32* nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 1, i32 1) +; IS__TUNIT_NPM-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0:@.*]], i32 [[TMP5]], i32 34, i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 1, i32 1) ; IS__TUNIT_NPM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 ; IS__TUNIT_NPM-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]] ; IS__TUNIT_NPM-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -200,7 +200,7 @@ define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %. ; IS__TUNIT_NPM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] ; IS__TUNIT_NPM: omp.loop.exit: ; IS__TUNIT_NPM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; IS__TUNIT_NPM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB0]], i32 [[TMP12]]) +; IS__TUNIT_NPM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 [[TMP12]]) ; IS__TUNIT_NPM-NEXT: br label [[OMP_PRECOND_END]] ; IS__TUNIT_NPM: omp.precond.end: ; IS__TUNIT_NPM-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll index 7ac5b42d741723..bf3ee0ff8eec5b 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll @@ -36,10 +36,10 @@ define dso_local i32 @main() { ; IS__TUNIT____-NEXT: [[ALLOC1:%.*]] = alloca i8, align 8 ; IS__TUNIT____-NEXT: [[ALLOC2:%.*]] = alloca i8, align 8 ; IS__TUNIT____-NEXT: [[THREAD:%.*]] = alloca i64, align 8 -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @foo, i8* noalias nocapture nofree readnone align 536870912 undef) -; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @bar, i8* noalias nofree nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" undef) -; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @baz, i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC1]]) -; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @buz, i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[ALLOC2]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @foo, i8* noalias nocapture nofree noundef readnone align 536870912 undef) +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @bar, i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" undef) +; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @baz, i8* noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(1) [[ALLOC1]]) +; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @buz, i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[ALLOC2]]) ; IS__TUNIT____-NEXT: ret i32 0 ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@main() @@ -47,10 +47,10 @@ define dso_local i32 @main() { ; IS__CGSCC____-NEXT: [[ALLOC1:%.*]] = alloca i8, align 8 ; IS__CGSCC____-NEXT: [[ALLOC2:%.*]] = alloca i8, align 8 ; IS__CGSCC____-NEXT: [[THREAD:%.*]] = alloca i64, align 8 -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @foo, i8* noalias nocapture nofree readnone align 536870912 null) -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @bar, i8* noalias nofree nonnull readnone align 8 dereferenceable(8) bitcast (i8** @GlobalVPtr to i8*)) -; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @baz, i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC1]]) -; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @buz, i8* noalias nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC2]]) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @foo, i8* noalias nocapture nofree noundef readnone align 536870912 null) +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @bar, i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(8) bitcast (i8** @GlobalVPtr to i8*)) +; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @baz, i8* noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(1) [[ALLOC1]]) +; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @buz, i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) [[ALLOC2]]) ; IS__CGSCC____-NEXT: ret i32 0 ; entry: @@ -69,13 +69,13 @@ declare !callback !0 dso_local i32 @pthread_create(i64*, %union.pthread_attr_t*, define internal i8* @foo(i8* %arg) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@foo -; IS__TUNIT____-SAME: (i8* noalias nofree readnone returned align 536870912 "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__TUNIT____-SAME: (i8* noalias nofree noundef readnone returned align 536870912 "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: ret i8* null ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@foo -; IS__CGSCC____-SAME: (i8* noalias nofree readnone returned align 536870912 "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__CGSCC____-SAME: (i8* noalias nofree noundef readnone returned align 536870912 "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: ret i8* null ; @@ -86,13 +86,13 @@ entry: define internal i8* @bar(i8* %arg) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@bar -; IS__TUNIT____-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(8) "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__TUNIT____-SAME: (i8* noalias nofree noundef nonnull readnone returned align 8 dereferenceable(8) "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: ret i8* bitcast (i8** @GlobalVPtr to i8*) ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@bar -; IS__CGSCC____-SAME: (i8* nofree readnone returned "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__CGSCC____-SAME: (i8* nofree noundef readnone returned "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: ret i8* bitcast (i8** @GlobalVPtr to i8*) ; @@ -103,13 +103,13 @@ entry: define internal i8* @baz(i8* %arg) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@baz -; IS__TUNIT____-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__TUNIT____-SAME: (i8* noalias nofree noundef nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: ret i8* [[ARG]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@baz -; IS__CGSCC____-SAME: (i8* nofree nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__CGSCC____-SAME: (i8* nofree noundef nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: ret i8* [[ARG]] ; @@ -120,13 +120,13 @@ entry: define internal i8* @buz(i8* %arg) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@buz -; IS__TUNIT____-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__TUNIT____-SAME: (i8* noalias nofree noundef nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: ret i8* [[ARG]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@buz -; IS__CGSCC____-SAME: (i8* nofree nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__CGSCC____-SAME: (i8* nofree noundef nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: ret i8* [[ARG]] ; diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll index c6af31713bfac9..0d472837ddb845 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll @@ -8,7 +8,7 @@ define internal i32* @incdec(i1 %C, i32* %V) { ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@incdec -; IS__TUNIT____-SAME: (i1 [[C:%.*]], i32* noalias nofree nonnull returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) +; IS__TUNIT____-SAME: (i1 [[C:%.*]], i32* noalias nofree noundef nonnull returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) ; IS__TUNIT____-NEXT: [[X:%.*]] = load i32, i32* [[V]], align 4 ; IS__TUNIT____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__TUNIT____: T: @@ -22,7 +22,7 @@ define internal i32* @incdec(i1 %C, i32* %V) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@incdec -; IS__CGSCC____-SAME: (i1 [[C:%.*]], i32* nofree nonnull returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) +; IS__CGSCC____-SAME: (i1 [[C:%.*]], i32* nofree noundef nonnull returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) ; IS__CGSCC____-NEXT: [[X:%.*]] = load i32, i32* [[V]], align 4 ; IS__CGSCC____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__CGSCC____: T: @@ -78,7 +78,7 @@ define void @caller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { ; IS__TUNIT____-LABEL: define {{[^@]+}}@caller ; IS__TUNIT____-SAME: (i1 [[C:%.*]]) [[ATTR2:#.*]] personality i32 (...)* @__gxx_personality_v0 ; IS__TUNIT____-NEXT: [[Q:%.*]] = alloca i32, align 4 -; IS__TUNIT____-NEXT: [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) +; IS__TUNIT____-NEXT: [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) ; IS__TUNIT____-NEXT: [[S1:%.*]] = call { i32, i32 } @foo(i32 1, i32 2) ; IS__TUNIT____-NEXT: [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0 ; IS__TUNIT____-NEXT: [[S2:%.*]] = call { i32, i32 } @foo(i32 3, i32 4) @@ -97,7 +97,7 @@ define void @caller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller ; IS__CGSCC____-SAME: (i1 [[C:%.*]]) [[ATTR1:#.*]] personality i32 (...)* @__gxx_personality_v0 ; IS__CGSCC____-NEXT: [[Q:%.*]] = alloca i32, align 4 -; IS__CGSCC____-NEXT: [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree nonnull align 4 dereferenceable(4) [[Q]]) +; IS__CGSCC____-NEXT: [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[Q]]) ; IS__CGSCC____-NEXT: [[S1:%.*]] = call { i32, i32 } @foo(i32 1, i32 2) ; IS__CGSCC____-NEXT: [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0 ; IS__CGSCC____-NEXT: [[S2:%.*]] = call { i32, i32 } @foo(i32 3, i32 4) diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll b/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll index 904860e4921c7b..4405b7bc1b0955 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll @@ -26,7 +26,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define internal i32 @callee(i32* %thread_local_ptr, i32* %shared_ptr) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readonly willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@callee -; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[THREAD_LOCAL_PTR:%.*]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[SHARED_PTR:%.*]]) +; IS__TUNIT____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[THREAD_LOCAL_PTR:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[SHARED_PTR:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP:%.*]] = load i32, i32* [[THREAD_LOCAL_PTR]], align 4 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* @gsh, align 4 @@ -35,7 +35,7 @@ define internal i32 @callee(i32* %thread_local_ptr, i32* %shared_ptr) { ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@callee -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[THREAD_LOCAL_PTR:%.*]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[SHARED_PTR:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[THREAD_LOCAL_PTR:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[SHARED_PTR:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[TMP:%.*]] = load i32, i32* [[THREAD_LOCAL_PTR]], align 4 ; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* @gsh, align 4 @@ -52,12 +52,12 @@ entry: define dso_local void @caller() { ; IS__TUNIT____-LABEL: define {{[^@]+}}@caller() ; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: call void @broker(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) undef) +; IS__TUNIT____-NEXT: call void @broker(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* noundef nonnull @callee, i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) undef) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller() ; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: call void @broker(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) @gsh) +; IS__CGSCC____-NEXT: call void @broker(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* noundef nonnull @callee, i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) @gsh) ; IS__CGSCC____-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/Attributor/align.ll b/llvm/test/Transforms/Attributor/align.ll index b31327b6a85231..7df160e817b5f9 100644 --- a/llvm/test/Transforms/Attributor/align.ll +++ b/llvm/test/Transforms/Attributor/align.ll @@ -149,7 +149,7 @@ define i32* @test6_2() #0 { define internal i8* @f1(i8* readnone %0) local_unnamed_addr #0 { ; IS__TUNIT____: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; IS__TUNIT____-LABEL: define {{[^@]+}}@f1 -; IS__TUNIT____-SAME: (i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr +; IS__TUNIT____-SAME: (i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr ; IS__TUNIT____-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP0]], null ; IS__TUNIT____-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP5:%.*]] ; IS__TUNIT____: 3: @@ -161,7 +161,7 @@ define internal i8* @f1(i8* readnone %0) local_unnamed_addr #0 { ; ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; IS__CGSCC____-LABEL: define {{[^@]+}}@f1 -; IS__CGSCC____-SAME: (i8* nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr +; IS__CGSCC____-SAME: (i8* nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP0]], null ; IS__CGSCC____-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP5:%.*]] ; IS__CGSCC____: 3: @@ -191,7 +191,7 @@ define internal i8* @f2(i8* readnone %0) local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8* @a1, null ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]] ; CHECK: 2: -; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) ; CHECK-NEXT: br label [[TMP6:%.*]] ; CHECK: 4: ; CHECK-NEXT: [[TMP5:%.*]] = tail call i8* @f3() @@ -224,7 +224,7 @@ define internal i8* @f3(i8* readnone %0) local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8* @a2, null ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP4:%.*]] ; CHECK: 2: -; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1(i8* noalias nofree nonnull readnone align 16 dereferenceable(1) "no-capture-maybe-returned" @a2) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1(i8* noalias nofree noundef nonnull readnone align 16 dereferenceable(1) "no-capture-maybe-returned" @a2) ; CHECK-NEXT: br label [[TMP4]] ; CHECK: 4: ; CHECK-NEXT: [[TMP5:%.*]] = phi i8* [ [[TMP3]], [[TMP2]] ], [ @a1, [[TMP0:%.*]] ] @@ -247,12 +247,12 @@ define internal i8* @f3(i8* readnone %0) local_unnamed_addr #0 { define align 4 i8* @test7() #0 { ; IS__TUNIT____: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; IS__TUNIT____-LABEL: define {{[^@]+}}@test7() -; IS__TUNIT____-NEXT: [[C:%.*]] = tail call i8* @f1(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) +; IS__TUNIT____-NEXT: [[C:%.*]] = tail call i8* @f1(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) ; IS__TUNIT____-NEXT: ret i8* [[C]] ; ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; IS__CGSCC____-LABEL: define {{[^@]+}}@test7() -; IS__CGSCC____-NEXT: [[C:%.*]] = tail call nonnull align 8 dereferenceable(1) i8* @f1(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) @a1) +; IS__CGSCC____-NEXT: [[C:%.*]] = tail call nonnull align 8 dereferenceable(1) i8* @f1(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) @a1) ; IS__CGSCC____-NEXT: ret i8* [[C]] ; %c = tail call i8* @f1(i8* align 8 dereferenceable(1) @a1) @@ -264,7 +264,7 @@ define align 4 i8* @test7() #0 { define internal i8* @f1b(i8* readnone %0) local_unnamed_addr #0 { ; IS__TUNIT____: Function Attrs: nofree noinline nosync nounwind uwtable ; IS__TUNIT____-LABEL: define {{[^@]+}}@f1b -; IS__TUNIT____-SAME: (i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr +; IS__TUNIT____-SAME: (i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr ; IS__TUNIT____-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP0]], null ; IS__TUNIT____-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP5:%.*]] ; IS__TUNIT____: 3: @@ -278,7 +278,7 @@ define internal i8* @f1b(i8* readnone %0) local_unnamed_addr #0 { ; ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind uwtable ; IS__CGSCC____-LABEL: define {{[^@]+}}@f1b -; IS__CGSCC____-SAME: (i8* nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr +; IS__CGSCC____-SAME: (i8* nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP0]], null ; IS__CGSCC____-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP5:%.*]] ; IS__CGSCC____: 3: @@ -312,7 +312,7 @@ define internal i8* @f2b(i8* readnone %0) local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8* @a1, null ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]] ; CHECK: 2: -; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1b(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1b(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) ; CHECK-NEXT: br label [[TMP6:%.*]] ; CHECK: 4: ; CHECK-NEXT: [[TMP5:%.*]] = tail call i8* @f3b() @@ -346,7 +346,7 @@ define internal i8* @f3b(i8* readnone %0) local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8* @a2, null ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP4:%.*]] ; CHECK: 2: -; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1b(i8* noalias nofree nonnull readnone align 16 dereferenceable(1) "no-capture-maybe-returned" @a2) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1b(i8* noalias nofree noundef nonnull readnone align 16 dereferenceable(1) "no-capture-maybe-returned" @a2) ; CHECK-NEXT: br label [[TMP4]] ; CHECK: 4: ; CHECK-NEXT: [[TMP5:%.*]] = phi i8* [ [[TMP3]], [[TMP2]] ], [ @a1, [[TMP0:%.*]] ] @@ -368,13 +368,13 @@ define align 4 i32* @test7b(i32* align 32 %p) #0 { ; IS__TUNIT____: Function Attrs: nofree noinline nosync nounwind uwtable ; IS__TUNIT____-LABEL: define {{[^@]+}}@test7b ; IS__TUNIT____-SAME: (i32* nofree readnone returned align 32 "no-capture-maybe-returned" [[P:%.*]]) -; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call i8* @f1b(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call i8* @f1b(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) ; IS__TUNIT____-NEXT: ret i32* [[P]] ; ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind uwtable ; IS__CGSCC____-LABEL: define {{[^@]+}}@test7b ; IS__CGSCC____-SAME: (i32* nofree readnone returned align 32 "no-capture-maybe-returned" [[P:%.*]]) -; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call i8* @f1b(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) @a1) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call i8* @f1b(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) @a1) ; IS__CGSCC____-NEXT: ret i32* [[P]] ; tail call i8* @f1b(i8* align 8 dereferenceable(1) @a1) @@ -384,12 +384,12 @@ define align 4 i32* @test7b(i32* align 32 %p) #0 { ; TEST 8 define void @test8_helper() { ; CHECK-LABEL: define {{[^@]+}}@test8_helper() -; CHECK-NEXT: [[PTR0:%.*]] = tail call i32* @unknown() -; CHECK-NEXT: [[PTR1:%.*]] = tail call align 4 i32* @unknown() -; CHECK-NEXT: [[PTR2:%.*]] = tail call align 8 i32* @unknown() -; CHECK-NEXT: tail call void @test8(i32* noalias nocapture readnone align 4 [[PTR1]], i32* noalias nocapture readnone align 4 [[PTR1]], i32* noalias nocapture readnone [[PTR0]]) -; CHECK-NEXT: tail call void @test8(i32* noalias nocapture readnone align 8 [[PTR2]], i32* noalias nocapture readnone align 4 [[PTR1]], i32* noalias nocapture readnone align 4 [[PTR1]]) -; CHECK-NEXT: tail call void @test8(i32* noalias nocapture readnone align 8 [[PTR2]], i32* noalias nocapture readnone align 4 [[PTR1]], i32* noalias nocapture readnone align 4 [[PTR1]]) +; CHECK-NEXT: [[PTR0:%.*]] = tail call noundef i32* @unknown() +; CHECK-NEXT: [[PTR1:%.*]] = tail call noundef align 4 i32* @unknown() +; CHECK-NEXT: [[PTR2:%.*]] = tail call noundef align 8 i32* @unknown() +; CHECK-NEXT: tail call void @test8(i32* noalias nocapture noundef readnone align 4 [[PTR1]], i32* noalias nocapture noundef readnone align 4 [[PTR1]], i32* noalias nocapture noundef readnone [[PTR0]]) +; CHECK-NEXT: tail call void @test8(i32* noalias nocapture noundef readnone align 8 [[PTR2]], i32* noalias nocapture noundef readnone align 4 [[PTR1]], i32* noalias nocapture noundef readnone align 4 [[PTR1]]) +; CHECK-NEXT: tail call void @test8(i32* noalias nocapture noundef readnone align 8 [[PTR2]], i32* noalias nocapture noundef readnone align 4 [[PTR1]], i32* noalias nocapture noundef readnone align 4 [[PTR1]]) ; CHECK-NEXT: ret void ; %ptr0 = tail call i32* @unknown() @@ -406,10 +406,10 @@ declare void @user_i32_ptr(i32* nocapture readnone) nounwind define internal void @test8(i32* %a, i32* %b, i32* %c) { ; IS__TUNIT____: Function Attrs: nounwind ; IS__TUNIT____-LABEL: define {{[^@]+}}@test8 -; IS__TUNIT____-SAME: (i32* noalias nocapture readnone align 4 [[A:%.*]], i32* noalias nocapture readnone align 4 [[B:%.*]], i32* noalias nocapture readnone [[C:%.*]]) -; IS__TUNIT____-NEXT: call void @user_i32_ptr(i32* noalias nocapture readnone align 4 [[A]]) -; IS__TUNIT____-NEXT: call void @user_i32_ptr(i32* noalias nocapture readnone align 4 [[B]]) -; IS__TUNIT____-NEXT: call void @user_i32_ptr(i32* noalias nocapture readnone [[C]]) +; IS__TUNIT____-SAME: (i32* noalias nocapture noundef readnone align 4 [[A:%.*]], i32* noalias nocapture noundef readnone align 4 [[B:%.*]], i32* noalias nocapture noundef readnone [[C:%.*]]) +; IS__TUNIT____-NEXT: call void @user_i32_ptr(i32* noalias nocapture noundef readnone align 4 [[A]]) +; IS__TUNIT____-NEXT: call void @user_i32_ptr(i32* noalias nocapture noundef readnone align 4 [[B]]) +; IS__TUNIT____-NEXT: call void @user_i32_ptr(i32* noalias nocapture noundef readnone [[C]]) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nounwind diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll index 951f9830ae59e5..7abb5fd241ce73 100644 --- a/llvm/test/Transforms/Attributor/callbacks.ll +++ b/llvm/test/Transforms/Attributor/callbacks.ll @@ -25,7 +25,7 @@ define void @t0_caller(i32* %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t0_caller @@ -37,7 +37,7 @@ define void @t0_caller(i32* %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t0_caller @@ -49,7 +49,7 @@ define void @t0_caller(i32* %a) { ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t0_caller @@ -61,7 +61,7 @@ define void @t0_caller(i32* %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -80,7 +80,7 @@ entry: define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@t0_callback_callee -; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; NOT_TUNIT_NPM-NEXT: entry: ; NOT_TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; NOT_TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -89,7 +89,7 @@ define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t0_callback_callee -; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS__TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -124,7 +124,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t1_caller @@ -136,7 +136,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t1_caller @@ -148,7 +148,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t1_caller @@ -160,7 +160,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -180,7 +180,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; ; NOT_TUNIT_NPM: Function Attrs: nosync ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@t1_callback_callee -; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; NOT_TUNIT_NPM-NEXT: entry: ; NOT_TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; NOT_TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -190,7 +190,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; ; IS__TUNIT_NPM: Function Attrs: nosync ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t1_callback_callee -; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* noalias nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* noalias nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS__TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -224,7 +224,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t2_caller @@ -236,7 +236,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t2_caller @@ -248,7 +248,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t2_caller @@ -260,7 +260,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -281,7 +281,7 @@ entry: define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@t2_callback_callee -; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; NOT_TUNIT_NPM-NEXT: entry: ; NOT_TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; NOT_TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -290,7 +290,7 @@ define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t2_callback_callee -; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS__TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -324,8 +324,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t3_caller @@ -337,8 +337,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t3_caller @@ -350,8 +350,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t3_caller @@ -363,8 +363,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -386,7 +386,7 @@ entry: define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@t3_callback_callee -; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; NOT_TUNIT_NPM-NEXT: entry: ; NOT_TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; NOT_TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -395,7 +395,7 @@ define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t3_callback_callee -; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS__TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll index 059587789035e6..f7de3287b88359 100644 --- a/llvm/test/Transforms/Attributor/depgraph.ll +++ b/llvm/test/Transforms/Attributor/depgraph.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=attributor-cgscc -disable-output -attributor-print-dep < %s 2>&1 | FileCheck %s --check-prefixes=GRAPH ; RUN: opt -passes=attributor-cgscc -disable-output -attributor-dump-dep-graph -attributor-depgraph-dot-filename-prefix=%t < %s 2>/dev/null ; RUN: FileCheck %s -input-file=%t_0.dot --check-prefix=DOT diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll index 9a995396e516ef..816d3df44560d9 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -275,20 +275,37 @@ if.false: } define void @f7_2(i1 %c) { -; CHECK: Function Attrs: nounwind willreturn -; CHECK-LABEL: define {{[^@]+}}@f7_2 -; CHECK-SAME: (i1 [[C:%.*]]) -; CHECK-NEXT: [[PTR:%.*]] = tail call nonnull align 4 dereferenceable(4) i32* @unkown_ptr() -; CHECK-NEXT: [[A:%.*]] = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(4) [[PTR]]) -; CHECK-NEXT: [[B:%.*]] = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(4) [[PTR]]) -; CHECK-NEXT: br i1 [[C]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] -; CHECK: if.true: -; CHECK-NEXT: [[C:%.*]] = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(8) [[PTR]]) -; CHECK-NEXT: [[D:%.*]] = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(8) [[PTR]]) -; CHECK-NEXT: [[E:%.*]] = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(8) [[PTR]]) -; CHECK-NEXT: ret void -; CHECK: if.false: -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM: Function Attrs: nounwind willreturn +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@f7_2 +; NOT_CGSCC_NPM-SAME: (i1 [[C:%.*]]) +; NOT_CGSCC_NPM-NEXT: [[PTR:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) i32* @unkown_ptr() +; NOT_CGSCC_NPM-NEXT: [[A:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(4) [[PTR]]) +; NOT_CGSCC_NPM-NEXT: [[ARG_A_0:%.*]] = load i32, i32* [[PTR]], align 4 +; NOT_CGSCC_NPM-NEXT: [[B:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(4) [[PTR]]) +; NOT_CGSCC_NPM-NEXT: br i1 [[C]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; NOT_CGSCC_NPM: if.true: +; NOT_CGSCC_NPM-NEXT: [[C:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(8) [[PTR]]) +; NOT_CGSCC_NPM-NEXT: [[D:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(8) [[PTR]]) +; NOT_CGSCC_NPM-NEXT: [[E:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(8) [[PTR]]) +; NOT_CGSCC_NPM-NEXT: ret void +; NOT_CGSCC_NPM: if.false: +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC_NPM: Function Attrs: nounwind willreturn +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f7_2 +; IS__CGSCC_NPM-SAME: (i1 [[C:%.*]]) +; IS__CGSCC_NPM-NEXT: [[PTR:%.*]] = tail call nonnull align 4 dereferenceable(4) i32* @unkown_ptr() +; IS__CGSCC_NPM-NEXT: [[A:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC_NPM-NEXT: [[ARG_A_0:%.*]] = load i32, i32* [[PTR]], align 4 +; IS__CGSCC_NPM-NEXT: [[B:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC_NPM-NEXT: br i1 [[C]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; IS__CGSCC_NPM: if.true: +; IS__CGSCC_NPM-NEXT: [[C:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(8) [[PTR]]) +; IS__CGSCC_NPM-NEXT: [[D:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(8) [[PTR]]) +; IS__CGSCC_NPM-NEXT: [[E:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(8) [[PTR]]) +; IS__CGSCC_NPM-NEXT: ret void +; IS__CGSCC_NPM: if.false: +; IS__CGSCC_NPM-NEXT: ret void ; %ptr = tail call i32* @unkown_ptr() %A = tail call i32 @unkown_f(i32* %ptr) @@ -1016,23 +1033,41 @@ define void @nonnull_assume_call(i8* %arg1, i8* %arg2, i8* %arg3, i8* %arg4) { ; ATTRIBUTOR-NEXT: call void @unknown() ; ATTRIBUTOR-NEXT: ret void ; -; CHECK-LABEL: define {{[^@]+}}@nonnull_assume_call -; CHECK-SAME: (i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* [[ARG3:%.*]], i8* [[ARG4:%.*]]) -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: [[P:%.*]] = call nonnull dereferenceable(101) i32* @unkown_ptr() -; CHECK-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(42) [[ARG4]]) -; CHECK-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(31) [[ARG2]]) -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i32* [[P]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(31) [[ARG2]]) -; CHECK-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(42) [[ARG4]]) -; CHECK-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@nonnull_assume_call +; NOT_CGSCC_OPM-SAME: (i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* [[ARG3:%.*]], i8* [[ARG4:%.*]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown() +; NOT_CGSCC_OPM-NEXT: [[P:%.*]] = call noundef nonnull dereferenceable(101) i32* @unkown_ptr() +; NOT_CGSCC_OPM-NEXT: call void @unknown_use32(i32* noundef nonnull dereferenceable(101) [[P]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(42) [[ARG4]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(31) [[ARG2]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) +; NOT_CGSCC_OPM-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i32* [[P]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(31) [[ARG2]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(42) [[ARG4]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use32(i32* noundef nonnull dereferenceable(101) [[P]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown() +; NOT_CGSCC_OPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@nonnull_assume_call +; IS__CGSCC_OPM-SAME: (i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* [[ARG3:%.*]], i8* [[ARG4:%.*]]) +; IS__CGSCC_OPM-NEXT: call void @unknown() +; IS__CGSCC_OPM-NEXT: [[P:%.*]] = call nonnull dereferenceable(101) i32* @unkown_ptr() +; IS__CGSCC_OPM-NEXT: call void @unknown_use32(i32* noundef nonnull dereferenceable(101) [[P]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(42) [[ARG4]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(31) [[ARG2]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i32* [[P]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(31) [[ARG2]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(42) [[ARG4]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use32(i32* noundef nonnull dereferenceable(101) [[P]]) +; IS__CGSCC_OPM-NEXT: call void @unknown() +; IS__CGSCC_OPM-NEXT: ret void ; call void @unknown() %p = call i32* @unkown_ptr() diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll index 54e293e73179f2..3451fa8a59f44f 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -43,12 +43,19 @@ define void @nofree_arg_only(i8* %p1, i8* %p2) { ; TEST 1 - negative, pointer freed in another function. define void @test1() { -; CHECK-LABEL: define {{[^@]+}}@test1() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @nocapture_func_frees_pointer(i8* noalias nocapture [[TMP1]]) -; CHECK-NEXT: tail call void (...) @func_throws() -; CHECK-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test1() +; NOT_CGSCC_NPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; NOT_CGSCC_NPM-NEXT: tail call void @nocapture_func_frees_pointer(i8* noalias nocapture noundef [[TMP1]]) +; NOT_CGSCC_NPM-NEXT: tail call void (...) @func_throws() +; NOT_CGSCC_NPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test1() +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_NPM-NEXT: tail call void @nocapture_func_frees_pointer(i8* noalias nocapture noundef [[TMP1]]) +; IS__CGSCC_NPM-NEXT: tail call void (...) @func_throws() +; IS__CGSCC_NPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__CGSCC_NPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @nocapture_func_frees_pointer(i8* %1) @@ -60,11 +67,17 @@ define void @test1() { ; TEST 2 - negative, call to a sync function. define void @test2() { -; CHECK-LABEL: define {{[^@]+}}@test2() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @sync_func(i8* [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; IS__TUNIT____-LABEL: define {{[^@]+}}@test2() +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT____-NEXT: tail call void @sync_func(i8* noundef [[TMP1]]) +; IS__TUNIT____-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test2() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: tail call void @sync_func(i8* noundef [[TMP1]]) +; IS__CGSCC____-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; IS__CGSCC____-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @sync_func(i8* %1) @@ -75,16 +88,22 @@ define void @test2() { ; TEST 3 - 1 malloc, 1 free define void @test3() { -; IS________OPM-LABEL: define {{[^@]+}}@test3() -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) -; IS________OPM-NEXT: ret void +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test3() +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__TUNIT_OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test3() ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 -; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) +; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) ; IS________NPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test3() +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @no_sync_func(i8* %1) @@ -93,18 +112,25 @@ define void @test3() { } define void @test3a(i8* %p) { -; IS________OPM-LABEL: define {{[^@]+}}@test3a -; IS________OPM-SAME: (i8* nocapture [[P:%.*]]) -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree [[TMP1]], i8* nocapture [[P]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) -; IS________OPM-NEXT: ret void +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test3a +; IS__TUNIT_OPM-SAME: (i8* nocapture [[P:%.*]]) +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree noundef [[TMP1]], i8* nocapture [[P]]) +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__TUNIT_OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test3a ; IS________NPM-SAME: (i8* nocapture [[P:%.*]]) ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 -; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree [[TMP1]], i8* nocapture [[P]]) +; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree noundef [[TMP1]], i8* nocapture [[P]]) ; IS________NPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test3a +; IS__CGSCC_OPM-SAME: (i8* nocapture [[P:%.*]]) +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree noundef [[TMP1]], i8* nocapture [[P]]) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @nofree_arg_only(i8* %1, i8* %p) @@ -117,15 +143,15 @@ declare noalias i8* @aligned_alloc(i64, i64) define void @test3b(i8* %p) { ; IS________OPM-LABEL: define {{[^@]+}}@test3b ; IS________OPM-SAME: (i8* nocapture [[P:%.*]]) -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 32, i64 128) -; IS________OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree [[TMP1]], i8* nocapture [[P]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) +; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @aligned_alloc(i64 32, i64 128) +; IS________OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree noundef [[TMP1]], i8* nocapture [[P]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test3b ; IS________NPM-SAME: (i8* nocapture [[P:%.*]]) ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 128, align 32 -; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree [[TMP1]], i8* nocapture [[P]]) +; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree noundef [[TMP1]], i8* nocapture [[P]]) ; IS________NPM-NEXT: ret void ; %1 = tail call noalias i8* @aligned_alloc(i64 32, i64 128) @@ -136,11 +162,17 @@ define void @test3b(i8* %p) { ; leave alone non-constant alignments. define void @test3c(i64 %alignment) { -; CHECK-LABEL: define {{[^@]+}}@test3c -; CHECK-SAME: (i64 [[ALIGNMENT:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 [[ALIGNMENT]], i64 128) -; CHECK-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@test3c +; NOT_CGSCC_OPM-SAME: (i64 [[ALIGNMENT:%.*]]) +; NOT_CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @aligned_alloc(i64 [[ALIGNMENT]], i64 128) +; NOT_CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; NOT_CGSCC_OPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test3c +; IS__CGSCC_OPM-SAME: (i64 [[ALIGNMENT:%.*]]) +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 [[ALIGNMENT]], i64 128) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @aligned_alloc(i64 %alignment, i64 128) tail call void @free(i8* %1) @@ -151,16 +183,16 @@ declare noalias i8* @calloc(i64, i64) define void @test0() { ; IS________OPM-LABEL: define {{[^@]+}}@test0() -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 2, i64 4) -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) +; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @calloc(i64 2, i64 4) +; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test0() ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 8, align 1 ; IS________NPM-NEXT: [[CALLOC_BC:%.*]] = bitcast i8* [[TMP1]] to i8* ; IS________NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALLOC_BC]], i8 0, i64 8, i1 false) -; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) +; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) ; IS________NPM-NEXT: ret void ; %1 = tail call noalias i8* @calloc(i64 2, i64 4) @@ -171,15 +203,20 @@ define void @test0() { ; TEST 4 define void @test4() { -; IS________OPM-LABEL: define {{[^@]+}}@test4() -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP1]]) -; IS________OPM-NEXT: ret void +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test4() +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT_OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test4() ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 -; IS________NPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP1]]) +; IS________NPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP1]]) ; IS________NPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test4() +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @nofree_func(i8* %1) @@ -190,20 +227,20 @@ define void @test4() { ; are in nofree functions and are not captured define void @test5(i32, i8* %p) { -; IS________OPM-LABEL: define {{[^@]+}}@test5 -; IS________OPM-SAME: (i32 [[TMP0:%.*]], i8* nocapture [[P:%.*]]) -; IS________OPM-NEXT: [[TMP2:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; IS________OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] -; IS________OPM: 4: -; IS________OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP2]]) -; IS________OPM-NEXT: br label [[TMP6:%.*]] -; IS________OPM: 5: -; IS________OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree [[TMP2]], i8* nocapture [[P]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP2]]) -; IS________OPM-NEXT: br label [[TMP6]] -; IS________OPM: 6: -; IS________OPM-NEXT: ret void +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test5 +; IS__TUNIT_OPM-SAME: (i32 [[TMP0:%.*]], i8* nocapture [[P:%.*]]) +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; IS__TUNIT_OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] +; IS__TUNIT_OPM: 4: +; IS__TUNIT_OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP2]]) +; IS__TUNIT_OPM-NEXT: br label [[TMP6:%.*]] +; IS__TUNIT_OPM: 5: +; IS__TUNIT_OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree noundef [[TMP2]], i8* nocapture [[P]]) +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP2]]) +; IS__TUNIT_OPM-NEXT: br label [[TMP6]] +; IS__TUNIT_OPM: 6: +; IS__TUNIT_OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test5 ; IS________NPM-SAME: (i32 [[TMP0:%.*]], i8* nocapture [[P:%.*]]) @@ -211,13 +248,28 @@ define void @test5(i32, i8* %p) { ; IS________NPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 ; IS________NPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] ; IS________NPM: 4: -; IS________NPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP2]]) +; IS________NPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP2]]) ; IS________NPM-NEXT: br label [[TMP6:%.*]] ; IS________NPM: 5: -; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree [[TMP2]], i8* nocapture [[P]]) +; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree noundef [[TMP2]], i8* nocapture [[P]]) ; IS________NPM-NEXT: br label [[TMP6]] ; IS________NPM: 6: ; IS________NPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test5 +; IS__CGSCC_OPM-SAME: (i32 [[TMP0:%.*]], i8* nocapture [[P:%.*]]) +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; IS__CGSCC_OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] +; IS__CGSCC_OPM: 4: +; IS__CGSCC_OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP2]]) +; IS__CGSCC_OPM-NEXT: br label [[TMP6:%.*]] +; IS__CGSCC_OPM: 5: +; IS__CGSCC_OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree noundef [[TMP2]], i8* nocapture [[P]]) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP2]]) +; IS__CGSCC_OPM-NEXT: br label [[TMP6]] +; IS__CGSCC_OPM: 6: +; IS__CGSCC_OPM-NEXT: ret void ; %2 = tail call noalias i8* @malloc(i64 4) %3 = icmp eq i32 %0, 0 @@ -239,20 +291,20 @@ define void @test5(i32, i8* %p) { ; TEST 6 - all exit paths have a call to free define void @test6(i32) { -; IS________OPM-LABEL: define {{[^@]+}}@test6 -; IS________OPM-SAME: (i32 [[TMP0:%.*]]) -; IS________OPM-NEXT: [[TMP2:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; IS________OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] -; IS________OPM: 4: -; IS________OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP2]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP2]]) -; IS________OPM-NEXT: br label [[TMP6:%.*]] -; IS________OPM: 5: -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP2]]) -; IS________OPM-NEXT: br label [[TMP6]] -; IS________OPM: 6: -; IS________OPM-NEXT: ret void +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test6 +; IS__TUNIT_OPM-SAME: (i32 [[TMP0:%.*]]) +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; IS__TUNIT_OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] +; IS__TUNIT_OPM: 4: +; IS__TUNIT_OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP2]]) +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP2]]) +; IS__TUNIT_OPM-NEXT: br label [[TMP6:%.*]] +; IS__TUNIT_OPM: 5: +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP2]]) +; IS__TUNIT_OPM-NEXT: br label [[TMP6]] +; IS__TUNIT_OPM: 6: +; IS__TUNIT_OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test6 ; IS________NPM-SAME: (i32 [[TMP0:%.*]]) @@ -260,12 +312,27 @@ define void @test6(i32) { ; IS________NPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 ; IS________NPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] ; IS________NPM: 4: -; IS________NPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP2]]) +; IS________NPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP2]]) ; IS________NPM-NEXT: br label [[TMP6:%.*]] ; IS________NPM: 5: ; IS________NPM-NEXT: br label [[TMP6]] ; IS________NPM: 6: ; IS________NPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test6 +; IS__CGSCC_OPM-SAME: (i32 [[TMP0:%.*]]) +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; IS__CGSCC_OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] +; IS__CGSCC_OPM: 4: +; IS__CGSCC_OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP2]]) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP2]]) +; IS__CGSCC_OPM-NEXT: br label [[TMP6:%.*]] +; IS__CGSCC_OPM: 5: +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP2]]) +; IS__CGSCC_OPM-NEXT: br label [[TMP6]] +; IS__CGSCC_OPM: 6: +; IS__CGSCC_OPM-NEXT: ret void ; %2 = tail call noalias i8* @malloc(i64 4) %3 = icmp eq i32 %0, 0 @@ -308,14 +375,23 @@ define void @test7() { ; TEST 8 - Negative: bitcast pointer used in capture function define void @test8() { -; CHECK-LABEL: define {{[^@]+}}@test8() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @foo(i32* align 4 [[TMP2]]) -; CHECK-NEXT: tail call void @free(i8* nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; CHECK-NEXT: ret void +; IS__TUNIT____-LABEL: define {{[^@]+}}@test8() +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__TUNIT____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: tail call void @foo(i32* noundef align 4 [[TMP2]]) +; IS__TUNIT____-NEXT: tail call void @free(i8* nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test8() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__CGSCC____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: tail call void @foo(i32* noundef align 4 [[TMP2]]) +; IS__CGSCC____-NEXT: tail call void @free(i8* nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC____-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @no_sync_func(i8* %1) @@ -329,14 +405,23 @@ define void @test8() { ; TEST 9 - FIXME: malloc should be converted. define void @test9() { -; CHECK-LABEL: define {{[^@]+}}@test9() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @foo_nounw(i32* nofree align 4 [[TMP2]]) -; CHECK-NEXT: tail call void @free(i8* nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; CHECK-NEXT: ret void +; IS__TUNIT____-LABEL: define {{[^@]+}}@test9() +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__TUNIT____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: tail call void @foo_nounw(i32* nofree noundef align 4 [[TMP2]]) +; IS__TUNIT____-NEXT: tail call void @free(i8* nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test9() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__CGSCC____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: tail call void @foo_nounw(i32* nofree noundef align 4 [[TMP2]]) +; IS__CGSCC____-NEXT: tail call void @free(i8* nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC____-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @no_sync_func(i8* %1) @@ -351,22 +436,31 @@ define void @test9() { ; TEST 10 - 1 malloc, 1 free define i32 @test10() { -; IS________OPM-LABEL: define {{[^@]+}}@test10() -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; IS________OPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -; IS________OPM-NEXT: store i32 10, i32* [[TMP2]], align 4 -; IS________OPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; IS________OPM-NEXT: ret i32 [[TMP3]] +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test10() +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__TUNIT_OPM-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP3]] ; ; IS________NPM-LABEL: define {{[^@]+}}@test10() ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 -; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) +; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) ; IS________NPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* ; IS________NPM-NEXT: store i32 10, i32* [[TMP2]], align 4 ; IS________NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 ; IS________NPM-NEXT: ret i32 [[TMP3]] +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test10() +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__CGSCC_OPM-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP3]] ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @no_sync_func(i8* %1) @@ -378,24 +472,34 @@ define i32 @test10() { } define i32 @test_lifetime() { -; IS________OPM-LABEL: define {{[^@]+}}@test_lifetime() -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; IS________OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; IS________OPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -; IS________OPM-NEXT: store i32 10, i32* [[TMP2]], align 4 -; IS________OPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; IS________OPM-NEXT: ret i32 [[TMP3]] +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test_lifetime() +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__TUNIT_OPM-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP3]] ; ; IS________NPM-LABEL: define {{[^@]+}}@test_lifetime() ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 -; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; IS________NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS________NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) ; IS________NPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* ; IS________NPM-NEXT: store i32 10, i32* [[TMP2]], align 4 ; IS________NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 ; IS________NPM-NEXT: ret i32 [[TMP3]] +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test_lifetime() +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__CGSCC_OPM-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP3]] ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @no_sync_func(i8* %1) @@ -410,11 +514,17 @@ define i32 @test_lifetime() { ; TEST 11 define void @test11() { -; CHECK-LABEL: define {{[^@]+}}@test11() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @sync_will_return(i8* [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; IS__TUNIT____-LABEL: define {{[^@]+}}@test11() +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT____-NEXT: tail call void @sync_will_return(i8* noundef [[TMP1]]) +; IS__TUNIT____-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test11() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: tail call void @sync_will_return(i8* noundef [[TMP1]]) +; IS__CGSCC____-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; IS__CGSCC____-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @sync_will_return(i8* %1) @@ -424,36 +534,37 @@ define void @test11() { ; TEST 12 define i32 @irreducible_cfg(i32 %0) { -; IS________OPM-LABEL: define {{[^@]+}}@irreducible_cfg -; IS________OPM-SAME: (i32 [[TMP0:%.*]]) -; IS________OPM-NEXT: [[TMP2:%.*]] = call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32* -; IS________OPM-NEXT: store i32 10, i32* [[TMP3]], align 4 -; IS________OPM-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP0]], 1 -; IS________OPM-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP7:%.*]] -; IS________OPM: 5: -; IS________OPM-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP0]], 5 -; IS________OPM-NEXT: br label [[TMP13:%.*]] -; IS________OPM: 7: -; IS________OPM-NEXT: br label [[TMP8:%.*]] -; IS________OPM: 8: -; IS________OPM-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP14:%.*]], [[TMP13]] ], [ 1, [[TMP7]] ] -; IS________OPM-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4 -; IS________OPM-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1 -; IS________OPM-NEXT: store i32 [[TMP10]], i32* [[TMP3]], align 4 -; IS________OPM-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], 0 -; IS________OPM-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP15:%.*]] -; IS________OPM: 12: -; IS________OPM-NEXT: br label [[TMP13]] -; IS________OPM: 13: -; IS________OPM-NEXT: [[DOT1:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[DOT0]], [[TMP12]] ] -; IS________OPM-NEXT: [[TMP14]] = add nsw i32 [[DOT1]], 1 -; IS________OPM-NEXT: br label [[TMP8]] -; IS________OPM: 15: -; IS________OPM-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP3]] to i8* -; IS________OPM-NEXT: call void @free(i8* nocapture [[TMP16]]) -; IS________OPM-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP3]], align 4 -; IS________OPM-NEXT: ret i32 [[TMP17]] +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@irreducible_cfg +; IS__TUNIT_OPM-SAME: (i32 [[TMP0:%.*]]) +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32* +; IS__TUNIT_OPM-NEXT: store i32 10, i32* [[TMP3]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP0]], 1 +; IS__TUNIT_OPM-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP7:%.*]] +; IS__TUNIT_OPM: 5: +; IS__TUNIT_OPM-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP0]], 5 +; IS__TUNIT_OPM-NEXT: br label [[TMP13:%.*]] +; IS__TUNIT_OPM: 7: +; IS__TUNIT_OPM-NEXT: br label [[TMP8:%.*]] +; IS__TUNIT_OPM: 8: +; IS__TUNIT_OPM-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP14:%.*]], [[TMP13]] ], [ 1, [[TMP7]] ] +; IS__TUNIT_OPM-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1 +; IS__TUNIT_OPM-NEXT: store i32 [[TMP10]], i32* [[TMP3]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], 0 +; IS__TUNIT_OPM-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP15:%.*]] +; IS__TUNIT_OPM: 12: +; IS__TUNIT_OPM-NEXT: br label [[TMP13]] +; IS__TUNIT_OPM: 13: +; IS__TUNIT_OPM-NEXT: [[DOT1:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[DOT0]], [[TMP12]] ] +; IS__TUNIT_OPM-NEXT: [[TMP14]] = add nsw i32 [[DOT1]], 1 +; IS__TUNIT_OPM-NEXT: br label [[TMP8]] +; IS__TUNIT_OPM: 15: +; IS__TUNIT_OPM-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP3]] to i8* +; IS__TUNIT_OPM-NEXT: call void @free(i8* nocapture noundef [[TMP17]]) +; IS__TUNIT_OPM-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP18]] ; ; IS________NPM-LABEL: define {{[^@]+}}@irreducible_cfg ; IS________NPM-SAME: (i32 [[TMP0:%.*]]) @@ -484,6 +595,38 @@ define i32 @irreducible_cfg(i32 %0) { ; IS________NPM-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP3]] to i8* ; IS________NPM-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP3]], align 4 ; IS________NPM-NEXT: ret i32 [[TMP17]] +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@irreducible_cfg +; IS__CGSCC_OPM-SAME: (i32 [[TMP0:%.*]]) +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32* +; IS__CGSCC_OPM-NEXT: store i32 10, i32* [[TMP3]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP0]], 1 +; IS__CGSCC_OPM-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP7:%.*]] +; IS__CGSCC_OPM: 5: +; IS__CGSCC_OPM-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP0]], 5 +; IS__CGSCC_OPM-NEXT: br label [[TMP13:%.*]] +; IS__CGSCC_OPM: 7: +; IS__CGSCC_OPM-NEXT: br label [[TMP8:%.*]] +; IS__CGSCC_OPM: 8: +; IS__CGSCC_OPM-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP14:%.*]], [[TMP13]] ], [ 1, [[TMP7]] ] +; IS__CGSCC_OPM-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1 +; IS__CGSCC_OPM-NEXT: store i32 [[TMP10]], i32* [[TMP3]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], 0 +; IS__CGSCC_OPM-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP15:%.*]] +; IS__CGSCC_OPM: 12: +; IS__CGSCC_OPM-NEXT: br label [[TMP13]] +; IS__CGSCC_OPM: 13: +; IS__CGSCC_OPM-NEXT: [[DOT1:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[DOT0]], [[TMP12]] ] +; IS__CGSCC_OPM-NEXT: [[TMP14]] = add nsw i32 [[DOT1]], 1 +; IS__CGSCC_OPM-NEXT: br label [[TMP8]] +; IS__CGSCC_OPM: 15: +; IS__CGSCC_OPM-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP3]] to i8* +; IS__CGSCC_OPM-NEXT: call void @free(i8* nocapture noundef [[TMP17]]) +; IS__CGSCC_OPM-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP18]] ; %2 = call noalias i8* @malloc(i64 4) %3 = bitcast i8* %2 to i32* @@ -588,14 +731,23 @@ define i32 @malloc_in_loop(i32 %0) { ; Malloc/Calloc too large define i32 @test13() { -; CHECK-LABEL: define {{[^@]+}}@test13() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 256) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; CHECK-NEXT: ret i32 [[TMP3]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@test13() +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 256) +; IS__TUNIT____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__TUNIT____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT____-NEXT: ret i32 [[TMP3]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test13() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 256) +; IS__CGSCC____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__CGSCC____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC____-NEXT: ret i32 [[TMP3]] ; %1 = tail call noalias i8* @malloc(i64 256) tail call void @no_sync_func(i8* %1) @@ -607,14 +759,23 @@ define i32 @test13() { } define i32 @test_sle() { -; CHECK-LABEL: define {{[^@]+}}@test_sle() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 -1) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; CHECK-NEXT: ret i32 [[TMP3]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@test_sle() +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 -1) +; IS__TUNIT____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__TUNIT____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT____-NEXT: ret i32 [[TMP3]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test_sle() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 -1) +; IS__CGSCC____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__CGSCC____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC____-NEXT: ret i32 [[TMP3]] ; %1 = tail call noalias i8* @malloc(i64 -1) tail call void @no_sync_func(i8* %1) @@ -626,14 +787,23 @@ define i32 @test_sle() { } define i32 @test_overflow() { -; CHECK-LABEL: define {{[^@]+}}@test_overflow() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 65537, i64 65537) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; CHECK-NEXT: ret i32 [[TMP3]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@test_overflow() +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @calloc(i64 65537, i64 65537) +; IS__TUNIT____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__TUNIT____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT____-NEXT: ret i32 [[TMP3]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test_overflow() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 65537, i64 65537) +; IS__CGSCC____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__CGSCC____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC____-NEXT: ret i32 [[TMP3]] ; %1 = tail call noalias i8* @calloc(i64 65537, i64 65537) tail call void @no_sync_func(i8* %1) @@ -645,11 +815,17 @@ define i32 @test_overflow() { } define void @test14() { -; CHECK-LABEL: define {{[^@]+}}@test14() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 64, i64 4) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@test14() +; NOT_CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @calloc(i64 64, i64 4) +; NOT_CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; NOT_CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; NOT_CGSCC_OPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test14() +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 64, i64 4) +; IS__CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @calloc(i64 64, i64 4) tail call void @no_sync_func(i8* %1) @@ -658,12 +834,19 @@ define void @test14() { } define void @test15(i64 %S) { -; CHECK-LABEL: define {{[^@]+}}@test15 -; CHECK-SAME: (i64 [[S:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 [[S]]) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; IS__TUNIT____-LABEL: define {{[^@]+}}@test15 +; IS__TUNIT____-SAME: (i64 [[S:%.*]]) +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 [[S]]) +; IS__TUNIT____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT____-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test15 +; IS__CGSCC____-SAME: (i64 [[S:%.*]]) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 [[S]]) +; IS__CGSCC____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC____-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__CGSCC____-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 %S) tail call void @no_sync_func(i8* %1) @@ -672,20 +855,28 @@ define void @test15(i64 %S) { } define void @test16a(i8 %v, i8** %P) { -; IS________OPM-LABEL: define {{[^@]+}}@test16a -; IS________OPM-SAME: (i8 [[V:%.*]], i8** nocapture nofree readnone [[P:%.*]]) -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: store i8 [[V]], i8* [[TMP1]], align 1 -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree nonnull dereferenceable(1) [[TMP1]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture nonnull dereferenceable(1) [[TMP1]]) -; IS________OPM-NEXT: ret void +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test16a +; IS__TUNIT_OPM-SAME: (i8 [[V:%.*]], i8** nocapture nofree readnone [[P:%.*]]) +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: store i8 [[V]], i8* [[TMP1]], align 1 +; IS__TUNIT_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[TMP1]]) +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull dereferenceable(1) [[TMP1]]) +; IS__TUNIT_OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test16a ; IS________NPM-SAME: (i8 [[V:%.*]], i8** nocapture nofree readnone [[P:%.*]]) ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 ; IS________NPM-NEXT: store i8 [[V]], i8* [[TMP1]], align 1 -; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree nonnull dereferenceable(1) [[TMP1]]) +; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[TMP1]]) ; IS________NPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test16a +; IS__CGSCC_OPM-SAME: (i8 [[V:%.*]], i8** nocapture nofree readnone [[P:%.*]]) +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: store i8 [[V]], i8* [[TMP1]], align 1 +; IS__CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[TMP1]]) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull dereferenceable(1) [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) store i8 %v, i8* %1 @@ -695,13 +886,21 @@ define void @test16a(i8 %v, i8** %P) { } define void @test16b(i8 %v, i8** %P) { -; CHECK-LABEL: define {{[^@]+}}@test16b -; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 -; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; IS__TUNIT____-LABEL: define {{[^@]+}}@test16b +; IS__TUNIT____-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT____-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 +; IS__TUNIT____-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef [[TMP1]]) +; IS__TUNIT____-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test16b +; IS__CGSCC____-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 +; IS__CGSCC____-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef [[TMP1]]) +; IS__CGSCC____-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; IS__CGSCC____-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) store i8* %1, i8** %P @@ -711,13 +910,21 @@ define void @test16b(i8 %v, i8** %P) { } define void @test16c(i8 %v, i8** %P) { -; CHECK-LABEL: define {{[^@]+}}@test16c -; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 -; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@test16c +; NOT_CGSCC_OPM-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) +; NOT_CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; NOT_CGSCC_OPM-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 +; NOT_CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef [[TMP1]]) +; NOT_CGSCC_OPM-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; NOT_CGSCC_OPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test16c +; IS__CGSCC_OPM-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 +; IS__CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) store i8* %1, i8** %P diff --git a/llvm/test/Transforms/Attributor/internal-noalias.ll b/llvm/test/Transforms/Attributor/internal-noalias.ll index d4e27d24bc5535..b71c07bd402093 100644 --- a/llvm/test/Transforms/Attributor/internal-noalias.ll +++ b/llvm/test/Transforms/Attributor/internal-noalias.ll @@ -96,8 +96,8 @@ define dso_local i32 @visible_local(i32* %A) #0 { ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__TUNIT____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) -; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__TUNIT____-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; IS__TUNIT____-NEXT: ret i32 [[ADD]] ; @@ -107,8 +107,8 @@ define dso_local i32 @visible_local(i32* %A) #0 { ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) -; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC____-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; IS__CGSCC____-NEXT: ret i32 [[ADD]] ; @@ -124,7 +124,7 @@ entry: define internal i32 @noalias_args_argmem_ro(i32* %A, i32* %B) #1 { ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree noinline nosync nounwind readonly uwtable willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@noalias_args_argmem_ro -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__TUNIT_OPM-NEXT: [[T0:%.*]] = load i32, i32* [[A]], align 4 ; IS__TUNIT_OPM-NEXT: [[T1:%.*]] = load i32, i32* [[B]], align 4 ; IS__TUNIT_OPM-NEXT: [[ADD:%.*]] = add nsw i32 [[T0]], [[T1]] @@ -144,7 +144,7 @@ define internal i32 @noalias_args_argmem_ro(i32* %A, i32* %B) #1 { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly uwtable willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@noalias_args_argmem_ro -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__CGSCC____-NEXT: [[T0:%.*]] = load i32, i32* [[A]], align 4 ; IS__CGSCC____-NEXT: [[T1:%.*]] = load i32, i32* [[B]], align 4 ; IS__CGSCC____-NEXT: [[ADD:%.*]] = add nsw i32 [[T0]], [[T1]] @@ -161,7 +161,7 @@ define i32 @visible_local_2() { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@visible_local_2() ; IS__TUNIT_OPM-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: store i32 5, i32* [[B]], align 4 -; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__TUNIT_OPM-NEXT: ret i32 [[CALL]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn @@ -177,7 +177,7 @@ define i32 @visible_local_2() { ; IS__CGSCC____-LABEL: define {{[^@]+}}@visible_local_2() ; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC____-NEXT: ret i32 [[CALL]] ; %B = alloca i32, align 4 @@ -189,14 +189,14 @@ define i32 @visible_local_2() { define internal i32 @noalias_args_argmem_rn(i32* %A, i32* %B) #1 { ; IS__TUNIT____: Function Attrs: argmemonly nofree noinline nosync nounwind uwtable willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@noalias_args_argmem_rn -; IS__TUNIT____-SAME: (i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[B:%.*]]) +; IS__TUNIT____-SAME: (i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) ; IS__TUNIT____-NEXT: [[T0:%.*]] = load i32, i32* [[B]], align 4 ; IS__TUNIT____-NEXT: store i32 0, i32* [[B]], align 4 ; IS__TUNIT____-NEXT: ret i32 [[T0]] ; ; IS__CGSCC____: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind uwtable willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@noalias_args_argmem_rn -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull align 4 dereferenceable(4) [[B:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) ; IS__CGSCC____-NEXT: [[T0:%.*]] = load i32, i32* [[B]], align 4 ; IS__CGSCC____-NEXT: store i32 0, i32* [[B]], align 4 ; IS__CGSCC____-NEXT: ret i32 [[T0]] @@ -211,14 +211,14 @@ define i32 @visible_local_3() { ; IS__TUNIT____-LABEL: define {{[^@]+}}@visible_local_3() ; IS__TUNIT____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__TUNIT____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[B]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B]]) ; IS__TUNIT____-NEXT: ret i32 [[CALL]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@visible_local_3() ; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B]]) ; IS__CGSCC____-NEXT: ret i32 [[CALL]] ; %B = alloca i32, align 4 diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll index 2a24a168263c6e..62d195a1f66bbf 100644 --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -1758,12 +1758,12 @@ define void @call_via_pointer_with_dead_args(i32* %a, i32* %b, void (i32*, i32*, ; FIXME: We have to prevent the propagation of %fp in the new pm CGSCC pass until the CallGraphUpdater can handle the new call edge. define internal void @call_via_pointer_with_dead_args_internal_a(i32* %a, i32* %b, void (i32*, i32*, i32*, i64, i32**)* %fp) { ; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_internal_a -; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull [[FP:%.*]]) +; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull [[FP:%.*]]) ; NOT_CGSCC_NPM-NEXT: call void @called_via_pointer(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[B]], i32* [[A]], i64 -1, i32** null) ; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_internal_a -; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull [[FP:%.*]]) +; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull [[FP:%.*]]) ; IS__CGSCC____-NEXT: call void [[FP]](i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[B]], i32* [[A]], i64 -1, i32** null) ; IS__CGSCC____-NEXT: ret void ; @@ -1772,12 +1772,12 @@ define internal void @call_via_pointer_with_dead_args_internal_a(i32* %a, i32* % } define internal void @call_via_pointer_with_dead_args_internal_b(i32* %a, i32* %b, void (i32*, i32*, i32*, i64, i32**)* %fp) { ; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_internal_b -; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull [[FP:%.*]]) +; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull [[FP:%.*]]) ; NOT_CGSCC_NPM-NEXT: call void @called_via_pointer_internal_2(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[B]], i32* [[A]], i64 -1, i32** null) ; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_internal_b -; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull [[FP:%.*]]) +; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull [[FP:%.*]]) ; IS__CGSCC____-NEXT: call void [[FP]](i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[B]], i32* [[A]], i64 -1, i32** null) ; IS__CGSCC____-NEXT: ret void ; @@ -1785,17 +1785,29 @@ define internal void @call_via_pointer_with_dead_args_internal_b(i32* %a, i32* % ret void } define void @call_via_pointer_with_dead_args_caller(i32* %a, i32* %b) { -; CHECK-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_caller -; CHECK-SAME: (i32* [[A:%.*]], i32* [[B:%.*]]) -; CHECK-NEXT: [[PTR1:%.*]] = alloca i32, align 128 -; CHECK-NEXT: [[PTR2:%.*]] = alloca i32, align 128 -; CHECK-NEXT: [[PTR3:%.*]] = alloca i32, align 128 -; CHECK-NEXT: [[PTR4:%.*]] = alloca i32, align 128 -; CHECK-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR1]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer) -; CHECK-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR2]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer_internal_1) -; CHECK-NEXT: call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR3]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer) -; CHECK-NEXT: call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR4]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer_internal_2) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_caller +; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* [[B:%.*]]) +; NOT_CGSCC_NPM-NEXT: [[PTR1:%.*]] = alloca i32, align 128 +; NOT_CGSCC_NPM-NEXT: [[PTR2:%.*]] = alloca i32, align 128 +; NOT_CGSCC_NPM-NEXT: [[PTR3:%.*]] = alloca i32, align 128 +; NOT_CGSCC_NPM-NEXT: [[PTR4:%.*]] = alloca i32, align 128 +; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR1]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef @called_via_pointer) +; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR2]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef @called_via_pointer_internal_1) +; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR3]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef @called_via_pointer) +; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR4]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef @called_via_pointer_internal_2) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_caller +; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* [[B:%.*]]) +; IS__CGSCC____-NEXT: [[PTR1:%.*]] = alloca i32, align 128 +; IS__CGSCC____-NEXT: [[PTR2:%.*]] = alloca i32, align 128 +; IS__CGSCC____-NEXT: [[PTR3:%.*]] = alloca i32, align 128 +; IS__CGSCC____-NEXT: [[PTR4:%.*]] = alloca i32, align 128 +; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR1]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull @called_via_pointer) +; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR2]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull @called_via_pointer_internal_1) +; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR3]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull @called_via_pointer) +; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR4]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull @called_via_pointer_internal_2) +; IS__CGSCC____-NEXT: ret void ; %ptr1 = alloca i32, align 128 %ptr2 = alloca i32, align 128 @@ -1908,7 +1920,7 @@ define i32 @main() { ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[G_0]], 1 ; CHECK-NEXT: br label [[FOR_COND_0]] ; CHECK: for.end.0: -; CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @malloc(i64 8) +; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 8) ; CHECK-NEXT: store i8* [[CALL]], i8** bitcast (%struct.a** @e to i8**), align 8 ; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[CALL]] to %struct.a** ; CHECK-NEXT: store %struct.a* null, %struct.a** [[B]], align 8 @@ -1985,7 +1997,7 @@ define void @bad_gep() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[N:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[M:%.*]] = alloca i8, align 1 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* noalias nocapture nonnull dereferenceable(1) [[N]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* noalias nocapture noundef nonnull dereferenceable(1) [[N]]) ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: while.body: ; CHECK-NEXT: unreachable @@ -1994,7 +2006,7 @@ define void @bad_gep() { ; CHECK: if.end: ; CHECK-NEXT: unreachable ; CHECK: exit: -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* noalias nocapture nonnull dereferenceable(1) [[N]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* noalias nocapture noundef nonnull dereferenceable(1) [[N]]) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/Attributor/memory_locations.ll b/llvm/test/Transforms/Attributor/memory_locations.ll index 23bff129101805..aeb66e9dbe4a20 100644 --- a/llvm/test/Transforms/Attributor/memory_locations.ll +++ b/llvm/test/Transforms/Attributor/memory_locations.ll @@ -327,7 +327,7 @@ define void @callerB1() { ; CHECK: Function Attrs: readnone ; CHECK-LABEL: define {{[^@]+}}@callerB1() ; CHECK-NEXT: [[STACK:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[TMP1:%.*]] = call i8* @argmem_only(i8* nonnull dereferenceable(1) [[STACK]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @argmem_only(i8* noundef nonnull dereferenceable(1) [[STACK]]) ; CHECK-NEXT: ret void ; %stack = alloca i8 @@ -338,7 +338,7 @@ define void @callerB2() { ; CHECK: Function Attrs: inaccessiblememonly ; CHECK-LABEL: define {{[^@]+}}@callerB2() ; CHECK-NEXT: [[STACK:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[TMP1:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* nonnull dereferenceable(1) [[STACK]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* noundef nonnull dereferenceable(1) [[STACK]]) ; CHECK-NEXT: ret void ; %stack = alloca i8 @@ -346,20 +346,30 @@ define void @callerB2() { ret void } define void @callerC1() { -; CHECK-LABEL: define {{[^@]+}}@callerC1() -; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @unknown_ptr() -; CHECK-NEXT: [[TMP1:%.*]] = call i8* @argmem_only(i8* [[UNKNOWN]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@callerC1() +; NOT_CGSCC_NPM-NEXT: [[UNKNOWN:%.*]] = call noundef i8* @unknown_ptr() +; NOT_CGSCC_NPM-NEXT: [[TMP1:%.*]] = call i8* @argmem_only(i8* noundef [[UNKNOWN]]) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callerC1() +; IS__CGSCC_NPM-NEXT: [[UNKNOWN:%.*]] = call i8* @unknown_ptr() +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = call i8* @argmem_only(i8* noundef [[UNKNOWN]]) +; IS__CGSCC_NPM-NEXT: ret void ; %unknown = call i8* @unknown_ptr() call i8* @argmem_only(i8* %unknown) ret void } define void @callerC2() { -; CHECK-LABEL: define {{[^@]+}}@callerC2() -; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @unknown_ptr() -; CHECK-NEXT: [[TMP1:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* [[UNKNOWN]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@callerC2() +; NOT_CGSCC_OPM-NEXT: [[UNKNOWN:%.*]] = call noundef i8* @unknown_ptr() +; NOT_CGSCC_OPM-NEXT: [[TMP1:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* noundef [[UNKNOWN]]) +; NOT_CGSCC_OPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@callerC2() +; IS__CGSCC_OPM-NEXT: [[UNKNOWN:%.*]] = call i8* @unknown_ptr() +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* noundef [[UNKNOWN]]) +; IS__CGSCC_OPM-NEXT: ret void ; %unknown = call i8* @unknown_ptr() call i8* @inaccesible_argmem_only_decl(i8* %unknown) @@ -367,7 +377,7 @@ define void @callerC2() { } define void @callerD1() { ; CHECK-LABEL: define {{[^@]+}}@callerD1() -; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @argmem_only(i8* noalias nocapture align 536870912 null) +; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @argmem_only(i8* noalias nocapture noundef align 536870912 null) ; CHECK-NEXT: store i8 0, i8* [[UNKNOWN]], align 1 ; CHECK-NEXT: ret void ; @@ -377,7 +387,7 @@ define void @callerD1() { } define void @callerD2() { ; CHECK-LABEL: define {{[^@]+}}@callerD2() -; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* noalias nocapture align 536870912 null) +; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* noalias nocapture noundef align 536870912 null) ; CHECK-NEXT: store i8 0, i8* [[UNKNOWN]], align 1 ; CHECK-NEXT: ret void ; @@ -464,12 +474,12 @@ define void @writeonly_global() { define void @writeonly_global_via_arg() { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly ; IS__TUNIT____-LABEL: define {{[^@]+}}@writeonly_global_via_arg() -; IS__TUNIT____-NEXT: call void @write_global_via_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) @G) +; IS__TUNIT____-NEXT: call void @write_global_via_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) @G) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@writeonly_global_via_arg() -; IS__CGSCC____-NEXT: call void @write_global_via_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) @G) +; IS__CGSCC____-NEXT: call void @write_global_via_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) @G) ; IS__CGSCC____-NEXT: ret void ; call void @write_global_via_arg(i32* @G) @@ -499,7 +509,7 @@ define i8 @recursive_not_readnone(i8* %ptr, i1 %c) { ; CHECK-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CHECK: t: -; CHECK-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone(i8* noalias nocapture nofree nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) ; CHECK-NEXT: [[R:%.*]] = load i8, i8* [[ALLOC]], align 1 ; CHECK-NEXT: ret i8 [[R]] ; CHECK: f: @@ -520,11 +530,11 @@ f: define internal i8 @recursive_not_readnone_internal(i8* %ptr, i1 %c) { ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind ; IS__TUNIT____-LABEL: define {{[^@]+}}@recursive_not_readnone_internal -; IS__TUNIT____-SAME: (i8* noalias nocapture nofree nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) +; IS__TUNIT____-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) ; IS__TUNIT____-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 ; IS__TUNIT____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__TUNIT____: t: -; IS__TUNIT____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) ; IS__TUNIT____-NEXT: [[R:%.*]] = load i8, i8* [[ALLOC]], align 1 ; IS__TUNIT____-NEXT: ret i8 [[R]] ; IS__TUNIT____: f: @@ -533,11 +543,11 @@ define internal i8 @recursive_not_readnone_internal(i8* %ptr, i1 %c) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind ; IS__CGSCC____-LABEL: define {{[^@]+}}@recursive_not_readnone_internal -; IS__CGSCC____-SAME: (i8* nocapture nofree nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) +; IS__CGSCC____-SAME: (i8* nocapture nofree noundef nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) ; IS__CGSCC____-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 ; IS__CGSCC____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__CGSCC____: t: -; IS__CGSCC____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) ; IS__CGSCC____-NEXT: [[R:%.*]] = load i8, i8* [[ALLOC]], align 1 ; IS__CGSCC____-NEXT: ret i8 [[R]] ; IS__CGSCC____: f: @@ -560,7 +570,7 @@ define i8 @readnone_caller(i1 %c) { ; CHECK-LABEL: define {{[^@]+}}@readnone_caller ; CHECK-SAME: (i1 [[C:%.*]]) ; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[R:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree nonnull writeonly dereferenceable(1) [[A]], i1 [[C]]) +; CHECK-NEXT: [[R:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[A]], i1 [[C]]) ; CHECK-NEXT: ret i8 [[R]] ; %a = alloca i8 @@ -575,7 +585,7 @@ define internal i8 @recursive_not_readnone_internal2(i8* %ptr, i1 %c) { ; IS__TUNIT____-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 ; IS__TUNIT____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__TUNIT____: t: -; IS__TUNIT____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal2(i8* noalias nocapture nofree nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal2(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) ; IS__TUNIT____-NEXT: [[R:%.*]] = load i8, i8* [[ALLOC]], align 1 ; IS__TUNIT____-NEXT: ret i8 [[R]] ; IS__TUNIT____: f: @@ -588,7 +598,7 @@ define internal i8 @recursive_not_readnone_internal2(i8* %ptr, i1 %c) { ; IS__CGSCC____-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 ; IS__CGSCC____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__CGSCC____: t: -; IS__CGSCC____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal2(i8* noalias nocapture nofree nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal2(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) ; IS__CGSCC____-NEXT: [[R:%.*]] = load i8, i8* [[ALLOC]], align 1 ; IS__CGSCC____-NEXT: ret i8 [[R]] ; IS__CGSCC____: f: diff --git a/llvm/test/Transforms/Attributor/misc.ll b/llvm/test/Transforms/Attributor/misc.ll index 46a0449e5be6e8..3ab1f8543aeb26 100644 --- a/llvm/test/Transforms/Attributor/misc.ll +++ b/llvm/test/Transforms/Attributor/misc.ll @@ -9,31 +9,18 @@ define internal void @internal(void (i8*)* %fp) { ; ; -; IS__TUNIT____-LABEL: define {{[^@]+}}@internal -; IS__TUNIT____-SAME: (void (i8*)* nonnull [[FP:%.*]]) -; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: [[A:%.*]] = alloca i32, align 4 -; IS__TUNIT____-NEXT: call void @foo(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) -; IS__TUNIT____-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) -; IS__TUNIT____-NEXT: call void @callback1(void (i32*)* nonnull @foo) -; IS__TUNIT____-NEXT: call void @callback2(void (i8*)* nonnull bitcast (void (i32*)* @foo to void (i8*)*)) -; IS__TUNIT____-NEXT: call void @callback2(void (i8*)* nonnull [[FP]]) -; IS__TUNIT____-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* -; IS__TUNIT____-NEXT: call void [[FP]](i8* [[TMP1]]) -; IS__TUNIT____-NEXT: ret void -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@internal -; IS__CGSCC____-SAME: (void (i8*)* nonnull [[FP:%.*]]) -; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[A:%.*]] = alloca i32, align 4 -; IS__CGSCC____-NEXT: call void @foo(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) -; IS__CGSCC____-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) -; IS__CGSCC____-NEXT: call void @callback1(void (i32*)* nonnull @foo) -; IS__CGSCC____-NEXT: call void @callback2(void (i8*)* bitcast (void (i32*)* @foo to void (i8*)*)) -; IS__CGSCC____-NEXT: call void @callback2(void (i8*)* nonnull [[FP]]) -; IS__CGSCC____-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* -; IS__CGSCC____-NEXT: call void [[FP]](i8* [[TMP1]]) -; IS__CGSCC____-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@internal +; CHECK-SAME: (void (i8*)* nonnull [[FP:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @foo(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) +; CHECK-NEXT: call void @callback1(void (i32*)* noundef nonnull @foo) +; CHECK-NEXT: call void @callback2(void (i8*)* noundef bitcast (void (i32*)* @foo to void (i8*)*)) +; CHECK-NEXT: call void @callback2(void (i8*)* nonnull [[FP]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void [[FP]](i8* [[TMP1]]) +; CHECK-NEXT: ret void ; entry: %a = alloca i32, align 4 @@ -51,33 +38,19 @@ entry: define void @external(void (i8*)* %fp) { ; ; -; IS__TUNIT____-LABEL: define {{[^@]+}}@external -; IS__TUNIT____-SAME: (void (i8*)* [[FP:%.*]]) -; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: [[A:%.*]] = alloca i32, align 4 -; IS__TUNIT____-NEXT: call void @foo(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) -; IS__TUNIT____-NEXT: call void @callback1(void (i32*)* nonnull @foo) -; IS__TUNIT____-NEXT: call void @callback2(void (i8*)* nonnull bitcast (void (i32*)* @foo to void (i8*)*)) -; IS__TUNIT____-NEXT: call void @callback2(void (i8*)* [[FP]]) -; IS__TUNIT____-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) -; IS__TUNIT____-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* -; IS__TUNIT____-NEXT: call void [[FP]](i8* [[TMP1]]) -; IS__TUNIT____-NEXT: call void @internal(void (i8*)* nonnull [[FP]]) -; IS__TUNIT____-NEXT: ret void -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@external -; IS__CGSCC____-SAME: (void (i8*)* [[FP:%.*]]) -; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[A:%.*]] = alloca i32, align 4 -; IS__CGSCC____-NEXT: call void @foo(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) -; IS__CGSCC____-NEXT: call void @callback1(void (i32*)* nonnull @foo) -; IS__CGSCC____-NEXT: call void @callback2(void (i8*)* bitcast (void (i32*)* @foo to void (i8*)*)) -; IS__CGSCC____-NEXT: call void @callback2(void (i8*)* [[FP]]) -; IS__CGSCC____-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) -; IS__CGSCC____-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* -; IS__CGSCC____-NEXT: call void [[FP]](i8* [[TMP1]]) -; IS__CGSCC____-NEXT: call void @internal(void (i8*)* nonnull [[FP]]) -; IS__CGSCC____-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@external +; CHECK-SAME: (void (i8*)* [[FP:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @foo(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @callback1(void (i32*)* noundef nonnull @foo) +; CHECK-NEXT: call void @callback2(void (i8*)* noundef bitcast (void (i32*)* @foo to void (i8*)*)) +; CHECK-NEXT: call void @callback2(void (i8*)* [[FP]]) +; CHECK-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void [[FP]](i8* [[TMP1]]) +; CHECK-NEXT: call void @internal(void (i8*)* nonnull [[FP]]) +; CHECK-NEXT: ret void ; entry: %a = alloca i32, align 4 diff --git a/llvm/test/Transforms/Attributor/misc_crash.ll b/llvm/test/Transforms/Attributor/misc_crash.ll index b49cbd94322d16..e420f58af13683 100644 --- a/llvm/test/Transforms/Attributor/misc_crash.ll +++ b/llvm/test/Transforms/Attributor/misc_crash.ll @@ -29,7 +29,7 @@ define i32* @func1() { } ; UTC_ARGS: --disable -; CHECK-LABEL: define internal nonnull align 4 dereferenceable(4) i32* @func1a() +; CHECK-LABEL: define internal noundef nonnull align 4 dereferenceable(4) i32* @func1a() ; CHECK-NEXT: ret i32* getelementptr inbounds ([1 x i32], [1 x i32]* @var1, i32 0, i32 0) define internal i32* @func1a([1 x i32]* %arg) { %ptr = getelementptr inbounds [1 x i32], [1 x i32]* %arg, i64 0, i64 0 @@ -40,7 +40,7 @@ define internal i32* @func1a([1 x i32]* %arg) { define internal void @func2a(i32* %0) { ; CHECK: Function Attrs: nofree nosync nounwind willreturn writeonly ; CHECK-LABEL: define {{[^@]+}}@func2a -; CHECK-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[TMP0:%.*]]) +; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP0:%.*]]) ; CHECK-NEXT: store i32 0, i32* @var2, align 4 ; CHECK-NEXT: ret void ; @@ -50,7 +50,7 @@ define internal void @func2a(i32* %0) { define i32 @func2() { ; CHECK-LABEL: define {{[^@]+}}@func2() -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 (i32*, ...) bitcast (void (i32*)* @func2a to i32 (i32*, ...)*)(i32* nonnull align 4 dereferenceable(4) @var2) +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 (i32*, ...) bitcast (void (i32*)* @func2a to i32 (i32*, ...)*)(i32* noundef nonnull align 4 dereferenceable(4) @var2) ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @var2, align 4 ; CHECK-NEXT: ret i32 [[TMP2]] ; @@ -62,7 +62,7 @@ define i32 @func2() { define i32 @func3(i1 %false) { ; CHECK-LABEL: define {{[^@]+}}@func3 ; CHECK-SAME: (i1 [[FALSE:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 (i32*, ...) bitcast (void (i32*)* @func2a to i32 (i32*, ...)*)(i32* nonnull align 4 dereferenceable(4) @var2) +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 (i32*, ...) bitcast (void (i32*)* @func2a to i32 (i32*, ...)*)(i32* noundef nonnull align 4 dereferenceable(4) @var2) ; CHECK-NEXT: br i1 [[FALSE]], label [[USE_BB:%.*]], label [[RET_BB:%.*]] ; CHECK: use_bb: ; CHECK-NEXT: ret i32 [[TMP1]] diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll index 5314ea53f5a940..ff780b6714259b 100644 --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -56,9 +56,13 @@ define void @nocapture(i8* %a){ } define i8* @return_noalias_looks_like_capture(){ -; CHECK-LABEL: define {{[^@]+}}@return_noalias_looks_like_capture() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: ret i8* [[TMP1]] +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@return_noalias_looks_like_capture() +; NOT_CGSCC_NPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; NOT_CGSCC_NPM-NEXT: ret i8* [[TMP1]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@return_noalias_looks_like_capture() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: ret i8* [[TMP1]] ; %1 = tail call noalias i8* @malloc(i64 4) call void @nocapture(i8* %1) @@ -180,7 +184,7 @@ define i8* @test6() nounwind uwtable ssp { ; CHECK-NEXT: store i8 97, i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 1 ; CHECK-NEXT: store i8 0, i8* [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @strdup(i8* nocapture nonnull dereferenceable(2) [[ARRAYIDX]]) +; CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @strdup(i8* nocapture noundef nonnull dereferenceable(2) [[ARRAYIDX]]) ; CHECK-NEXT: ret i8* [[CALL]] ; %x = alloca [2 x i8], align 1 @@ -254,7 +258,7 @@ define i8* @test8(i32* %0) nounwind uwtable { declare void @use_i8(i8* nocapture) define internal void @test9a(i8* %a, i8* %b) { ; CHECK-LABEL: define {{[^@]+}}@test9a() -; CHECK-NEXT: call void @use_i8(i8* noalias nocapture align 536870912 null) +; CHECK-NEXT: call void @use_i8(i8* noalias nocapture noundef align 536870912 null) ; CHECK-NEXT: ret void ; call void @use_i8(i8* null) @@ -353,14 +357,23 @@ define void @test11(i8* noalias %a) { declare void @use_nocapture(i8* nocapture) declare void @use(i8*) define void @test12_1() { -; CHECK-LABEL: define {{[^@]+}}@test12_1() -; CHECK-NEXT: [[A:%.*]] = alloca i8, align 4 -; CHECK-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture nonnull align 4 dereferenceable(1) [[A]]) -; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture nonnull align 4 dereferenceable(1) [[A]]) -; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[B]]) -; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[B]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test12_1() +; NOT_CGSCC_NPM-NEXT: [[A:%.*]] = alloca i8, align 4 +; NOT_CGSCC_NPM-NEXT: [[B:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; NOT_CGSCC_NPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef nonnull align 4 dereferenceable(1) [[A]]) +; NOT_CGSCC_NPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef nonnull align 4 dereferenceable(1) [[A]]) +; NOT_CGSCC_NPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[B]]) +; NOT_CGSCC_NPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[B]]) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test12_1() +; IS__CGSCC____-NEXT: [[A:%.*]] = alloca i8, align 4 +; IS__CGSCC____-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef nonnull align 4 dereferenceable(1) [[A]]) +; IS__CGSCC____-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef nonnull align 4 dereferenceable(1) [[A]]) +; IS__CGSCC____-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[B]]) +; IS__CGSCC____-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[B]]) +; IS__CGSCC____-NEXT: ret void ; %A = alloca i8, align 4 %B = tail call noalias i8* @malloc(i64 4) @@ -372,13 +385,21 @@ define void @test12_1() { } define void @test12_2(){ -; CHECK-LABEL: define {{[^@]+}}@test12_2() -; CHECK-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[A]]) -; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[A]]) -; CHECK-NEXT: tail call void @use(i8* [[A]]) -; CHECK-NEXT: tail call void @use_nocapture(i8* nocapture [[A]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test12_2() +; NOT_CGSCC_NPM-NEXT: [[A:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; NOT_CGSCC_NPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[A]]) +; NOT_CGSCC_NPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[A]]) +; NOT_CGSCC_NPM-NEXT: tail call void @use(i8* noundef [[A]]) +; NOT_CGSCC_NPM-NEXT: tail call void @use_nocapture(i8* nocapture noundef [[A]]) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test12_2() +; IS__CGSCC____-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[A]]) +; IS__CGSCC____-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[A]]) +; IS__CGSCC____-NEXT: tail call void @use(i8* noundef [[A]]) +; IS__CGSCC____-NEXT: tail call void @use_nocapture(i8* nocapture noundef [[A]]) +; IS__CGSCC____-NEXT: ret void ; ; FIXME: This should be @use_nocapture(i8* noalias [[A]]) ; FIXME: This should be @use_nocapture(i8* noalias nocapture [[A]]) @@ -392,10 +413,15 @@ define void @test12_2(){ declare void @two_args(i8* nocapture , i8* nocapture) define void @test12_3(){ -; CHECK-LABEL: define {{[^@]+}}@test12_3() -; CHECK-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test12_3() +; NOT_CGSCC_NPM-NEXT: [[A:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; NOT_CGSCC_NPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture noundef [[A]]) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test12_3() +; IS__CGSCC____-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture noundef [[A]]) +; IS__CGSCC____-NEXT: ret void ; %A = tail call noalias i8* @malloc(i64 4) tail call void @two_args(i8* %A, i8* %A) @@ -404,28 +430,40 @@ define void @test12_3(){ define void @test12_4(){ ; IS________OPM-LABEL: define {{[^@]+}}@test12_4() -; IS________OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS________OPM-NEXT: [[A:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS________OPM-NEXT: [[B:%.*]] = tail call noalias noundef i8* @malloc(i64 4) ; IS________OPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 ; IS________OPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 ; IS________OPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 -; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[B]]) -; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) -; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) -; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) +; IS________OPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture noundef [[B]]) +; IS________OPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture noundef [[A_0]]) +; IS________OPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture [[A_1]]) +; IS________OPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A_0]], i8* nocapture noundef [[B_0]]) ; IS________OPM-NEXT: ret void ; -; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test12_4() -; NOT_TUNIT_OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) -; NOT_TUNIT_OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 4) -; NOT_TUNIT_OPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 -; NOT_TUNIT_OPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 -; NOT_TUNIT_OPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* noalias nocapture [[A]], i8* noalias nocapture [[B]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) -; NOT_TUNIT_OPM-NEXT: ret void +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test12_4() +; IS__TUNIT_NPM-NEXT: [[A:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_NPM-NEXT: [[B:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_NPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 +; IS__TUNIT_NPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 +; IS__TUNIT_NPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 +; IS__TUNIT_NPM-NEXT: tail call void @two_args(i8* noalias nocapture noundef [[A]], i8* noalias nocapture noundef [[B]]) +; IS__TUNIT_NPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture noundef [[A_0]]) +; IS__TUNIT_NPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture [[A_1]]) +; IS__TUNIT_NPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A_0]], i8* nocapture noundef [[B_0]]) +; IS__TUNIT_NPM-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test12_4() +; IS__CGSCC____-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 +; IS__CGSCC____-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 +; IS__CGSCC____-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 +; IS__CGSCC____-NEXT: tail call void @two_args(i8* noalias nocapture noundef [[A]], i8* noalias nocapture noundef [[B]]) +; IS__CGSCC____-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture noundef [[A_0]]) +; IS__CGSCC____-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture noundef [[A_1]]) +; IS__CGSCC____-NEXT: tail call void @two_args(i8* nocapture noundef [[A_0]], i8* nocapture noundef [[B_0]]) +; IS__CGSCC____-NEXT: ret void ; %A = tail call noalias i8* @malloc(i64 4) %B = tail call noalias i8* @malloc(i64 4) @@ -456,12 +494,19 @@ define void @use_i8_internal(i8* %a) { } define void @test13_use_noalias(){ -; CHECK-LABEL: define {{[^@]+}}@test13_use_noalias() -; CHECK-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* -; CHECK-NEXT: [[C2:%.*]] = bitcast i16* [[C1]] to i8* -; CHECK-NEXT: call void @use_i8_internal(i8* noalias nocapture [[C2]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test13_use_noalias() +; NOT_CGSCC_NPM-NEXT: [[M1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; NOT_CGSCC_NPM-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* +; NOT_CGSCC_NPM-NEXT: [[C2:%.*]] = bitcast i16* [[C1]] to i8* +; NOT_CGSCC_NPM-NEXT: call void @use_i8_internal(i8* noalias nocapture noundef [[C2]]) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test13_use_noalias() +; IS__CGSCC____-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* +; IS__CGSCC____-NEXT: [[C2:%.*]] = bitcast i16* [[C1]] to i8* +; IS__CGSCC____-NEXT: call void @use_i8_internal(i8* noalias nocapture noundef [[C2]]) +; IS__CGSCC____-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test13_use_noalias() ; IS__CGSCC_OPM-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) @@ -478,12 +523,12 @@ define void @test13_use_noalias(){ define void @test13_use_alias(){ ; CHECK-LABEL: define {{[^@]+}}@test13_use_alias() -; CHECK-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) +; CHECK-NEXT: [[M1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) ; CHECK-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* ; CHECK-NEXT: [[C2A:%.*]] = bitcast i16* [[C1]] to i8* ; CHECK-NEXT: [[C2B:%.*]] = bitcast i16* [[C1]] to i8* -; CHECK-NEXT: call void @use_i8_internal(i8* nocapture [[C2A]]) -; CHECK-NEXT: call void @use_i8_internal(i8* nocapture [[C2B]]) +; CHECK-NEXT: call void @use_i8_internal(i8* nocapture noundef [[C2A]]) +; CHECK-NEXT: call void @use_i8_internal(i8* nocapture noundef [[C2B]]) ; CHECK-NEXT: ret void ; %m1 = tail call noalias i8* @malloc(i64 4) @@ -570,11 +615,11 @@ define internal fastcc double @strtox(i8* %s, i8** %p, i32 %prec) unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 144, i8* nocapture nonnull align 8 dereferenceable(240) [[TMP0]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) ; CHECK-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) -; CHECK-NEXT: call void @__shlim(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i64 0) -; CHECK-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i32 1, i32 1) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 144, i8* nocapture nonnull align 8 dereferenceable(240) [[TMP0]]) +; CHECK-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 0) +; CHECK-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 1, i32 1) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) ; CHECK-NEXT: ret double [[CALL1]] ; entry: diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll index 58d8be6d60c9b5..4ea6a327db6e84 100644 --- a/llvm/test/Transforms/Attributor/nocapture-1.ll +++ b/llvm/test/Transforms/Attributor/nocapture-1.ll @@ -459,7 +459,7 @@ define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2, i1 %c) { ; CHECK-SAME: (i8* nocapture nofree readnone [[X4_2:%.*]], i8* nofree readnone returned "no-capture-maybe-returned" [[Y4_2:%.*]], i8* nocapture nofree readnone [[Z4_2:%.*]], i1 [[C:%.*]]) ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CHECK: t: -; CHECK-NEXT: call void @test4_1(i8* noalias nocapture nofree readnone align 536870912 null, i1 [[C]]) +; CHECK-NEXT: call void @test4_1(i8* noalias nocapture nofree noundef readnone align 536870912 null, i1 [[C]]) ; CHECK-NEXT: store i32* null, i32** @g, align 8 ; CHECK-NEXT: br label [[F]] ; CHECK: f: @@ -759,7 +759,7 @@ declare void @unknown(i8*) define void @test_callsite() { ; CHECK-LABEL: define {{[^@]+}}@test_callsite() ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @unknown(i8* noalias nocapture align 536870912 null) +; CHECK-NEXT: call void @unknown(i8* noalias nocapture noundef align 536870912 null) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/Attributor/nocapture-2.ll b/llvm/test/Transforms/Attributor/nocapture-2.ll index 5ed73b74c8693b..c4bc297ee2ad76 100644 --- a/llvm/test/Transforms/Attributor/nocapture-2.ll +++ b/llvm/test/Transforms/Attributor/nocapture-2.ll @@ -217,11 +217,11 @@ define float* @scc_A(i32* dereferenceable_or_null(4) %a) { ; CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] ; CHECK: cond.true: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i16* -; CHECK-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP0]]) +; CHECK-NEXT: [[CALL:%.*]] = call noundef dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[CALL]] to double* -; CHECK-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP1]]) +; CHECK-NEXT: [[CALL1:%.*]] = call noundef dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree noundef nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i32* -; CHECK-NEXT: [[CALL2:%.*]] = call float* @scc_A(i32* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP2]]) +; CHECK-NEXT: [[CALL2:%.*]] = call float* @scc_A(i32* noalias nofree noundef nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP2]]) ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL2]] to i32* ; CHECK-NEXT: br label [[COND_END:%.*]] ; CHECK: cond.false: @@ -263,11 +263,11 @@ define i64* @scc_B(double* dereferenceable_or_null(8) %a) { ; CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] ; CHECK: cond.true: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to i32* -; CHECK-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0]]) +; CHECK-NEXT: [[CALL:%.*]] = call noundef dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[CALL]] to double* -; CHECK-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP1]]) +; CHECK-NEXT: [[CALL1:%.*]] = call noundef dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree noundef nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i16* -; CHECK-NEXT: [[CALL2:%.*]] = call i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP2]]) +; CHECK-NEXT: [[CALL2:%.*]] = call i8* @scc_C(i16* noalias nofree noundef nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP2]]) ; CHECK-NEXT: br label [[COND_END:%.*]] ; CHECK: cond.false: ; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[A]] to i8* @@ -312,16 +312,16 @@ define i8* @scc_C(i16* dereferenceable_or_null(2) %a) { ; CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] ; CHECK: cond.true: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to double* -; CHECK-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0]]) +; CHECK-NEXT: [[CALL1:%.*]] = call noundef dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[CALL1]] to i8* ; CHECK-NEXT: br label [[COND_END:%.*]] ; CHECK: cond.false: -; CHECK-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[A]]) +; CHECK-NEXT: [[CALL2:%.*]] = call noundef dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[A]]) ; CHECK-NEXT: br label [[COND_END]] ; CHECK: cond.end: ; CHECK-NEXT: [[COND:%.*]] = phi i8* [ [[TMP1]], [[COND_TRUE]] ], [ [[CALL2]], [[COND_FALSE]] ] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[COND]] to i32* -; CHECK-NEXT: [[CALL3:%.*]] = call float* @scc_A(i32* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP2]]) +; CHECK-NEXT: [[CALL3:%.*]] = call float* @scc_A(i32* noalias nofree noundef nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP2]]) ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL3]] to i8* ; CHECK-NEXT: ret i8* [[TMP3]] ; diff --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll index 9290d32453bada..6e06b3b195204f 100644 --- a/llvm/test/Transforms/Attributor/nonnull.ll +++ b/llvm/test/Transforms/Attributor/nonnull.ll @@ -364,10 +364,10 @@ define void @test12(i8* nonnull %a) { declare i8* @unknown() define void @test13_helper() { ; CHECK-LABEL: define {{[^@]+}}@test13_helper() -; CHECK-NEXT: [[NONNULLPTR:%.*]] = tail call nonnull i8* @ret_nonnull() -; CHECK-NEXT: [[MAYBENULLPTR:%.*]] = tail call i8* @unknown() -; CHECK-NEXT: tail call void @test13(i8* noalias nocapture nofree nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree readnone [[MAYBENULLPTR]]) -; CHECK-NEXT: tail call void @test13(i8* noalias nocapture nofree nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree readnone [[MAYBENULLPTR]], i8* noalias nocapture nofree nonnull readnone [[NONNULLPTR]]) +; CHECK-NEXT: [[NONNULLPTR:%.*]] = tail call noundef nonnull i8* @ret_nonnull() +; CHECK-NEXT: [[MAYBENULLPTR:%.*]] = tail call noundef i8* @unknown() +; CHECK-NEXT: tail call void @test13(i8* noalias nocapture nofree noundef nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree noundef nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree noundef readnone [[MAYBENULLPTR]]) +; CHECK-NEXT: tail call void @test13(i8* noalias nocapture nofree noundef nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree noundef readnone [[MAYBENULLPTR]], i8* noalias nocapture nofree noundef nonnull readnone [[NONNULLPTR]]) ; CHECK-NEXT: ret void ; %nonnullptr = tail call i8* @ret_nonnull() @@ -379,10 +379,10 @@ define void @test13_helper() { define internal void @test13(i8* %a, i8* %b, i8* %c) { ; IS__TUNIT____: Function Attrs: nounwind ; IS__TUNIT____-LABEL: define {{[^@]+}}@test13 -; IS__TUNIT____-SAME: (i8* noalias nocapture nofree nonnull readnone [[A:%.*]], i8* noalias nocapture nofree readnone [[B:%.*]], i8* noalias nocapture nofree readnone [[C:%.*]]) -; IS__TUNIT____-NEXT: call void @use_i8_ptr(i8* noalias nocapture nofree nonnull readnone [[A]]) -; IS__TUNIT____-NEXT: call void @use_i8_ptr(i8* noalias nocapture nofree readnone [[B]]) -; IS__TUNIT____-NEXT: call void @use_i8_ptr(i8* noalias nocapture nofree readnone [[C]]) +; IS__TUNIT____-SAME: (i8* noalias nocapture nofree noundef nonnull readnone [[A:%.*]], i8* noalias nocapture nofree noundef readnone [[B:%.*]], i8* noalias nocapture nofree noundef readnone [[C:%.*]]) +; IS__TUNIT____-NEXT: call void @use_i8_ptr(i8* noalias nocapture nofree noundef nonnull readnone [[A]]) +; IS__TUNIT____-NEXT: call void @use_i8_ptr(i8* noalias nocapture nofree noundef readnone [[B]]) +; IS__TUNIT____-NEXT: call void @use_i8_ptr(i8* noalias nocapture nofree noundef readnone [[C]]) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nounwind @@ -1364,5 +1364,36 @@ define void @nonnull_assume_neg(i8* %arg) { declare void @use_i8_ptr(i8* nofree nocapture readnone) nounwind declare void @use_i8_ptr_ret(i8* nofree nocapture readnone) nounwind willreturn +define i8* @nonnull_function_ptr_1() { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@nonnull_function_ptr_1() +; IS__TUNIT____-NEXT: [[BC:%.*]] = bitcast i8* ()* @nonnull_function_ptr_1 to i8* +; IS__TUNIT____-NEXT: ret i8* [[BC]] +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@nonnull_function_ptr_1() +; IS__CGSCC____-NEXT: [[BC:%.*]] = bitcast i8* ()* @nonnull_function_ptr_1 to i8* +; IS__CGSCC____-NEXT: ret i8* [[BC]] +; + %bc = bitcast i8*()* @nonnull_function_ptr_1 to i8* + ret i8* %bc +} + +declare i8* @function_decl() +define i8* @nonnull_function_ptr_2() { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@nonnull_function_ptr_2() +; IS__TUNIT____-NEXT: [[BC:%.*]] = bitcast i8* ()* @function_decl to i8* +; IS__TUNIT____-NEXT: ret i8* [[BC]] +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@nonnull_function_ptr_2() +; IS__CGSCC____-NEXT: [[BC:%.*]] = bitcast i8* ()* @function_decl to i8* +; IS__CGSCC____-NEXT: ret i8* [[BC]] +; + %bc = bitcast i8*()* @function_decl to i8* + ret i8* %bc +} + attributes #0 = { null_pointer_is_valid } attributes #1 = { nounwind willreturn} diff --git a/llvm/test/Transforms/Attributor/noreturn_async.ll b/llvm/test/Transforms/Attributor/noreturn_async.ll index 4c0fc203eb095e..6c3526baba2bb7 100644 --- a/llvm/test/Transforms/Attributor/noreturn_async.ll +++ b/llvm/test/Transforms/Attributor/noreturn_async.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes -; RUN: opt -attributor -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s +; RUN: opt -attributor -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s ; ; This file is the same as noreturn_sync.ll but with a personality which ; indicates that the exception handler *can* catch asynchronous exceptions. As @@ -86,7 +86,7 @@ entry: ; CHECK-NOT: nounwind ; CHECK-NEXT: define ; CHECK-NEXT: entry: -; CHECK-NEXT: %call3 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(18) getelementptr inbounds ([18 x i8], [18 x i8]* @"??_C@_0BC@NKPAGFFJ@Exception?5caught?6?$AA@", i64 0, i64 0)) +; CHECK-NEXT: %call3 = call i32 (i8*, ...) @printf(i8* noundef nonnull dereferenceable(18) getelementptr inbounds ([18 x i8], [18 x i8]* @"??_C@_0BC@NKPAGFFJ@Exception?5caught?6?$AA@", i64 0, i64 0)) ; CHECK-NEXT: call void @"?overflow@@YAXXZ_may_throw"() ; CHECK-NEXT: unreachable %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @"??_C@_0BC@NKPAGFFJ@Exception?5caught?6?$AA@", i64 0, i64 0)) diff --git a/llvm/test/Transforms/Attributor/noreturn_sync.ll b/llvm/test/Transforms/Attributor/noreturn_sync.ll index 22b675427cf01f..0321b0ceafd76e 100644 --- a/llvm/test/Transforms/Attributor/noreturn_sync.ll +++ b/llvm/test/Transforms/Attributor/noreturn_sync.ll @@ -82,7 +82,7 @@ entry: ; CHECK-NOT: nounwind ; CHECK-NEXT: define ; CHECK-NEXT: entry: -; CHECK-NEXT: %call3 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(18) getelementptr inbounds ([18 x i8], [18 x i8]* @"??_C@_0BC@NKPAGFFJ@Exception?5caught?6?$AA@", i64 0, i64 0)) +; CHECK-NEXT: %call3 = call i32 (i8*, ...) @printf(i8* noundef nonnull dereferenceable(18) getelementptr inbounds ([18 x i8], [18 x i8]* @"??_C@_0BC@NKPAGFFJ@Exception?5caught?6?$AA@", i64 0, i64 0)) ; CHECK-NEXT: call void @"?overflow@@YAXXZ_may_throw"() ; CHECK-NEXT: unreachable %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @"??_C@_0BC@NKPAGFFJ@Exception?5caught?6?$AA@", i64 0, i64 0)) diff --git a/llvm/test/Transforms/Attributor/nosync.ll b/llvm/test/Transforms/Attributor/nosync.ll index 1404cc4b34c23a..102b2e86ac2132 100644 --- a/llvm/test/Transforms/Attributor/nosync.ll +++ b/llvm/test/Transforms/Attributor/nosync.ll @@ -459,7 +459,7 @@ declare void @llvm.x86.sse2.clflush(i8*) define void @i_totally_sync() { ; CHECK: Function Attrs: nounwind ; CHECK-LABEL: define {{[^@]+}}@i_totally_sync() -; CHECK-NEXT: tail call void @llvm.x86.sse2.clflush(i8* nonnull align 4 dereferenceable(4) bitcast (i32* @a to i8*)) +; CHECK-NEXT: tail call void @llvm.x86.sse2.clflush(i8* noundef nonnull align 4 dereferenceable(4) bitcast (i32* @a to i8*)) ; CHECK-NEXT: ret void ; tail call void @llvm.x86.sse2.clflush(i8* bitcast (i32* @a to i8*)) diff --git a/llvm/test/Transforms/Attributor/noundef.ll b/llvm/test/Transforms/Attributor/noundef.ll new file mode 100644 index 00000000000000..b7c1d45205a607 --- /dev/null +++ b/llvm/test/Transforms/Attributor/noundef.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM + +declare void @unknown() + +declare void @bar(i32*) + +define void @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[X:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: call void @bar(i32* noundef nonnull align 4 dereferenceable(4) [[X]]) +; CHECK-NEXT: ret void +; + %x = alloca i32 + call void @unknown() + call void @bar(i32* %x) + ret void +} diff --git a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll index 7c114500328003..701b70926aaa2d 100644 --- a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll +++ b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll @@ -78,11 +78,11 @@ define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) { ; IS__TUNIT____-NEXT: store i32 3, i32* [[R0]], align 4 ; IS__TUNIT____-NEXT: store i32 5, i32* [[R1]], align 4 ; IS__TUNIT____-NEXT: store i32 1, i32* [[W0]], align 4 -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) -; IS__TUNIT____-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) +; IS__TUNIT____-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) ; IS__TUNIT____-NEXT: [[CALL5:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) ; IS__TUNIT____-NEXT: br label [[RETURN]] ; IS__TUNIT____: return: @@ -103,11 +103,11 @@ define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) { ; IS__CGSCC____-NEXT: store i32 3, i32* [[R0]], align 4 ; IS__CGSCC____-NEXT: store i32 5, i32* [[R1]], align 4 ; IS__CGSCC____-NEXT: store i32 1, i32* [[W0]], align 4 -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) -; IS__CGSCC____-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) ; IS__CGSCC____-NEXT: [[CALL5:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) ; IS__CGSCC____-NEXT: br label [[RETURN]] ; IS__CGSCC____: return: diff --git a/llvm/test/Transforms/Attributor/readattrs.ll b/llvm/test/Transforms/Attributor/readattrs.ll index 2f2c18d293ba0f..37381026ab354a 100644 --- a/llvm/test/Transforms/Attributor/readattrs.ll +++ b/llvm/test/Transforms/Attributor/readattrs.ll @@ -241,7 +241,7 @@ define void @unsound_readnone(i8* %ignored, i8* %escaped_then_written) { ; CHECK-LABEL: define {{[^@]+}}@unsound_readnone ; CHECK-SAME: (i8* nocapture nofree readnone [[IGNORED:%.*]], i8* [[ESCAPED_THEN_WRITTEN:%.*]]) ; CHECK-NEXT: [[ADDR:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: call void @escape_readnone_ptr(i8** nonnull align 8 dereferenceable(8) [[ADDR]], i8* noalias readnone [[ESCAPED_THEN_WRITTEN]]) +; CHECK-NEXT: call void @escape_readnone_ptr(i8** noundef nonnull align 8 dereferenceable(8) [[ADDR]], i8* noalias readnone [[ESCAPED_THEN_WRITTEN]]) ; CHECK-NEXT: [[ADDR_LD:%.*]] = load i8*, i8** [[ADDR]], align 8 ; CHECK-NEXT: store i8 0, i8* [[ADDR_LD]], align 1 ; CHECK-NEXT: ret void @@ -257,7 +257,7 @@ define void @unsound_readonly(i8* %ignored, i8* %escaped_then_written) { ; CHECK-LABEL: define {{[^@]+}}@unsound_readonly ; CHECK-SAME: (i8* nocapture nofree readnone [[IGNORED:%.*]], i8* [[ESCAPED_THEN_WRITTEN:%.*]]) ; CHECK-NEXT: [[ADDR:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: call void @escape_readonly_ptr(i8** nonnull align 8 dereferenceable(8) [[ADDR]], i8* readonly [[ESCAPED_THEN_WRITTEN]]) +; CHECK-NEXT: call void @escape_readonly_ptr(i8** noundef nonnull align 8 dereferenceable(8) [[ADDR]], i8* readonly [[ESCAPED_THEN_WRITTEN]]) ; CHECK-NEXT: [[ADDR_LD:%.*]] = load i8*, i8** [[ADDR]], align 8 ; CHECK-NEXT: store i8 0, i8* [[ADDR_LD]], align 1 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/returned.ll b/llvm/test/Transforms/Attributor/returned.ll index b0007746592de5..2df7eebc6c0c59 100644 --- a/llvm/test/Transforms/Attributor/returned.ll +++ b/llvm/test/Transforms/Attributor/returned.ll @@ -314,8 +314,8 @@ define double* @ptr_scc_r1(double* %a, double* %r, double* %b) #0 { ; IS__TUNIT____-LABEL: define {{[^@]+}}@ptr_scc_r1 ; IS__TUNIT____-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone returned [[R:%.*]], double* nocapture nofree readnone [[B:%.*]]) ; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) -; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call noundef double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[A]], double* noalias nofree noundef readnone [[CALL]]) ; IS__TUNIT____-NEXT: ret double* [[CALL1]] ; ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable @@ -323,7 +323,7 @@ define double* @ptr_scc_r1(double* %a, double* %r, double* %b) #0 { ; IS__CGSCC____-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone returned [[R:%.*]], double* nocapture nofree readnone [[B:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[CALL:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone [[R]]) -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL]]) +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[A]], double* noalias nofree noundef readnone [[CALL]]) ; IS__CGSCC____-NEXT: ret double* [[CALL1]] ; entry: @@ -340,20 +340,20 @@ define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 { ; IS__TUNIT____-NEXT: [[CMP:%.*]] = icmp ugt double* [[A]], [[B]] ; IS__TUNIT____-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; IS__TUNIT____: if.then: -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) -; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[B]], double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call noundef double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[B]], double* noalias nofree readnone [[A]], double* noalias nofree noundef readnone [[CALL]]) ; IS__TUNIT____-NEXT: br label [[RETURN:%.*]] ; IS__TUNIT____: if.end: ; IS__TUNIT____-NEXT: [[CMP2:%.*]] = icmp ult double* [[A]], [[B]] ; IS__TUNIT____-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END12:%.*]] ; IS__TUNIT____: if.then3: -; IS__TUNIT____-NEXT: [[CALL4:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[B]]) -; IS__TUNIT____-NEXT: [[CALL5:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nocapture nofree readnone undef) -; IS__TUNIT____-NEXT: [[CALL6:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]]) -; IS__TUNIT____-NEXT: [[CALL7:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL6]], double* noalias nocapture nofree readnone undef) -; IS__TUNIT____-NEXT: [[CALL8:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone [[R]]) -; IS__TUNIT____-NEXT: [[CALL9:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[CALL5]], double* noalias nofree readnone [[CALL7]], double* noalias nofree readnone [[CALL8]]) -; IS__TUNIT____-NEXT: [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[CALL4]], double* noalias nofree readnone [[CALL9]], double* noalias nocapture nofree readnone undef) +; IS__TUNIT____-NEXT: [[CALL4:%.*]] = call noundef double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[B]]) +; IS__TUNIT____-NEXT: [[CALL5:%.*]] = call noundef double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nocapture nofree readnone undef) +; IS__TUNIT____-NEXT: [[CALL6:%.*]] = call noundef double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]]) +; IS__TUNIT____-NEXT: [[CALL7:%.*]] = call noundef double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree noundef readnone [[CALL6]], double* noalias nocapture nofree readnone undef) +; IS__TUNIT____-NEXT: [[CALL8:%.*]] = call noundef double* @ptr_scc_r2(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone [[R]]) +; IS__TUNIT____-NEXT: [[CALL9:%.*]] = call noundef double* @ptr_scc_r2(double* noalias nofree noundef readnone [[CALL5]], double* noalias nofree noundef readnone [[CALL7]], double* noalias nofree noundef readnone [[CALL8]]) +; IS__TUNIT____-NEXT: [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nofree noundef readnone [[CALL4]], double* noalias nofree noundef readnone [[CALL9]], double* noalias nocapture nofree noundef readnone undef) ; IS__TUNIT____-NEXT: br label [[RETURN]] ; IS__TUNIT____: if.end12: ; IS__TUNIT____-NEXT: [[CMP13:%.*]] = icmp eq double* [[A]], [[B]] @@ -378,19 +378,19 @@ define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 { ; IS__CGSCC____-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; IS__CGSCC____: if.then: ; IS__CGSCC____-NEXT: [[CALL:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone [[R]]) -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[B]], double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL]]) +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[B]], double* noalias nofree readnone [[A]], double* noalias nofree noundef readnone [[CALL]]) ; IS__CGSCC____-NEXT: br label [[RETURN:%.*]] ; IS__CGSCC____: if.end: ; IS__CGSCC____-NEXT: [[CMP2:%.*]] = icmp ult double* [[A]], [[B]] ; IS__CGSCC____-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END12:%.*]] ; IS__CGSCC____: if.then3: ; IS__CGSCC____-NEXT: [[CALL4:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone [[B]]) -; IS__CGSCC____-NEXT: [[CALL5:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nocapture nofree readnone undef) -; IS__CGSCC____-NEXT: [[CALL6:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]]) -; IS__CGSCC____-NEXT: [[CALL7:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL6]], double* noalias nocapture nofree readnone undef) -; IS__CGSCC____-NEXT: [[CALL8:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone [[R]]) -; IS__CGSCC____-NEXT: [[CALL9:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[CALL5]], double* noalias nofree readnone [[CALL7]], double* noalias nofree readnone [[CALL8]]) -; IS__CGSCC____-NEXT: [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[CALL4]], double* noalias nofree readnone [[CALL9]], double* noalias nocapture nofree readnone undef) +; IS__CGSCC____-NEXT: [[CALL5:%.*]] = call noundef double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nocapture nofree readnone undef) +; IS__CGSCC____-NEXT: [[CALL6:%.*]] = call noundef double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]]) +; IS__CGSCC____-NEXT: [[CALL7:%.*]] = call noundef double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree noundef readnone [[CALL6]], double* noalias nocapture nofree readnone undef) +; IS__CGSCC____-NEXT: [[CALL8:%.*]] = call noundef double* @ptr_scc_r2(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone [[R]]) +; IS__CGSCC____-NEXT: [[CALL9:%.*]] = call noundef double* @ptr_scc_r2(double* noalias nofree noundef readnone [[CALL5]], double* noalias nofree noundef readnone [[CALL7]], double* noalias nofree noundef readnone [[CALL8]]) +; IS__CGSCC____-NEXT: [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nofree noundef readnone [[CALL4]], double* noalias nofree noundef readnone [[CALL9]], double* noalias nocapture nofree noundef readnone undef) ; IS__CGSCC____-NEXT: br label [[RETURN]] ; IS__CGSCC____: if.end12: ; IS__CGSCC____-NEXT: [[CMP13:%.*]] = icmp eq double* [[A]], [[B]] @@ -605,7 +605,7 @@ define i32* @calls_unknown_fn(i32* %r) #0 { ; CHECK: Function Attrs: noinline nounwind uwtable ; CHECK-LABEL: define {{[^@]+}}@calls_unknown_fn ; CHECK-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]]) -; CHECK-NEXT: tail call void @unknown_fn(i32* (i32*)* nonnull @calls_unknown_fn) +; CHECK-NEXT: tail call void @unknown_fn(i32* (i32*)* noundef nonnull @calls_unknown_fn) ; CHECK-NEXT: ret i32* [[R]] ; tail call void @unknown_fn(i32* (i32*)* nonnull @calls_unknown_fn) diff --git a/llvm/test/Transforms/Attributor/undefined_behavior.ll b/llvm/test/Transforms/Attributor/undefined_behavior.ll index 22c2979e23defe..b4a02671b7cdc3 100644 --- a/llvm/test/Transforms/Attributor/undefined_behavior.ll +++ b/llvm/test/Transforms/Attributor/undefined_behavior.ll @@ -704,12 +704,12 @@ ret: define void @arg_nonnull_violation1_1() { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@arg_nonnull_violation1_1() -; IS__TUNIT____-NEXT: call void @arg_nonnull_1(i32* noalias nocapture nofree nonnull writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_1(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@arg_nonnull_violation1_1() -; IS__CGSCC____-NEXT: call void @arg_nonnull_1(i32* noalias nocapture nofree nonnull writeonly align 536870912 dereferenceable(4) null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_1(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 dereferenceable(4) null) ; IS__CGSCC____-NEXT: ret void ; call void @arg_nonnull_1(i32* null) @@ -734,13 +734,13 @@ define void @arg_nonnull_violation2_1(i1 %c) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@arg_nonnull_violation2_1 ; IS__TUNIT____-SAME: (i1 [[C:%.*]]) -; IS__TUNIT____-NEXT: call void @arg_nonnull_1(i32* nocapture nofree nonnull writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_1(i32* nocapture nofree noundef nonnull writeonly align 536870912 null) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@arg_nonnull_violation2_1 ; IS__CGSCC____-SAME: (i1 [[C:%.*]]) -; IS__CGSCC____-NEXT: call void @arg_nonnull_1(i32* nocapture nofree nonnull writeonly align 536870912 dereferenceable(4) null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_1(i32* nocapture nofree noundef nonnull writeonly align 536870912 dereferenceable(4) null) ; IS__CGSCC____-NEXT: ret void ; %null = getelementptr i32, i32* null, i32 0 @@ -774,16 +774,16 @@ define void @arg_nonnull_violation3_1(i1 %c) { ; IS__TUNIT____-NEXT: [[PTR:%.*]] = alloca i32, align 4 ; IS__TUNIT____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__TUNIT____: t: -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__TUNIT____-NEXT: br label [[RET:%.*]] ; IS__TUNIT____: f: -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__TUNIT____-NEXT: br label [[RET]] ; IS__TUNIT____: ret: ; IS__TUNIT____-NEXT: ret void @@ -794,16 +794,16 @@ define void @arg_nonnull_violation3_1(i1 %c) { ; IS__CGSCC____-NEXT: [[PTR:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__CGSCC____: t: -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__CGSCC____-NEXT: br label [[RET:%.*]] ; IS__CGSCC____: f: -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__CGSCC____-NEXT: br label [[RET]] ; IS__CGSCC____: ret: ; IS__CGSCC____-NEXT: ret void @@ -833,12 +833,12 @@ define void @arg_nonnull_violation3_2(i1 %c) { ; IS__TUNIT____-NEXT: [[PTR:%.*]] = alloca i32, align 4 ; IS__TUNIT____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__TUNIT____: t: -; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__TUNIT____-NEXT: unreachable ; IS__TUNIT____: f: -; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__TUNIT____-NEXT: unreachable ; IS__TUNIT____: ret: ; IS__TUNIT____-NEXT: ret void @@ -849,12 +849,12 @@ define void @arg_nonnull_violation3_2(i1 %c) { ; IS__CGSCC____-NEXT: [[PTR:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__CGSCC____: t: -; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__CGSCC____-NEXT: unreachable ; IS__CGSCC____: f: -; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__CGSCC____-NEXT: unreachable ; IS__CGSCC____: ret: ; IS__CGSCC____-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll index 3a487a9ed22995..7ae8cd37801171 100644 --- a/llvm/test/Transforms/Attributor/value-simplify.ll +++ b/llvm/test/Transforms/Attributor/value-simplify.ll @@ -321,12 +321,12 @@ define i32 @ipccp3() { define internal i32* @test_inalloca(i32* inalloca %a) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_inalloca -; IS__TUNIT____-SAME: (i32* inalloca noalias nofree returned writeonly align 536870912 "no-capture-maybe-returned" [[A:%.*]]) +; IS__TUNIT____-SAME: (i32* inalloca noalias nofree noundef returned writeonly align 536870912 "no-capture-maybe-returned" [[A:%.*]]) ; IS__TUNIT____-NEXT: ret i32* [[A]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_inalloca -; IS__CGSCC____-SAME: (i32* inalloca noalias nofree returned writeonly align 536870912 "no-capture-maybe-returned" [[A:%.*]]) +; IS__CGSCC____-SAME: (i32* inalloca noalias nofree noundef returned writeonly align 536870912 "no-capture-maybe-returned" [[A:%.*]]) ; IS__CGSCC____-NEXT: ret i32* [[A]] ; ret i32* %a @@ -334,12 +334,12 @@ define internal i32* @test_inalloca(i32* inalloca %a) { define i32* @complicated_args_inalloca() { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@complicated_args_inalloca() -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__TUNIT____-NEXT: ret i32* [[CALL]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@complicated_args_inalloca() -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__CGSCC____-NEXT: ret i32* [[CALL]] ; %call = call i32* @test_inalloca(i32* null) @@ -349,12 +349,12 @@ define i32* @complicated_args_inalloca() { define internal i32* @test_preallocated(i32* preallocated(i32) %a) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_preallocated -; IS__TUNIT____-SAME: (i32* noalias nofree returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]]) +; IS__TUNIT____-SAME: (i32* noalias nofree noundef returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]]) ; IS__TUNIT____-NEXT: ret i32* [[A]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_preallocated -; IS__CGSCC____-SAME: (i32* noalias nofree returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]]) +; IS__CGSCC____-SAME: (i32* noalias nofree noundef returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]]) ; IS__CGSCC____-NEXT: ret i32* [[A]] ; ret i32* %a @@ -363,25 +363,25 @@ define i32* @complicated_args_preallocated() { ; IS__TUNIT_OPM: Function Attrs: nounwind ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@complicated_args_preallocated() ; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 1) -; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree writeonly preallocated(i32) align 536870912 null) [[ATTR5:#.*]] [ "preallocated"(token [[C]]) ] +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) [[ATTR5:#.*]] [ "preallocated"(token [[C]]) ] ; IS__TUNIT_OPM-NEXT: ret i32* [[CALL]] ; ; IS__TUNIT_NPM: Function Attrs: nounwind ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@complicated_args_preallocated() ; IS__TUNIT_NPM-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 1) -; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree writeonly preallocated(i32) align 536870912 null) [[ATTR4:#.*]] [ "preallocated"(token [[C]]) ] +; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) [[ATTR4:#.*]] [ "preallocated"(token [[C]]) ] ; IS__TUNIT_NPM-NEXT: ret i32* [[CALL]] ; ; IS__CGSCC_OPM: Function Attrs: nounwind ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@complicated_args_preallocated() ; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 1) -; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree writeonly preallocated(i32) align 536870912 null) [[ATTR6:#.*]] [ "preallocated"(token [[C]]) ] +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) [[ATTR6:#.*]] [ "preallocated"(token [[C]]) ] ; IS__CGSCC_OPM-NEXT: ret i32* [[CALL]] ; ; IS__CGSCC_NPM: Function Attrs: nounwind ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@complicated_args_preallocated() ; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 1) -; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree writeonly preallocated(i32) align 536870912 null) [[ATTR5:#.*]] [ "preallocated"(token [[C]]) ] +; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) [[ATTR5:#.*]] [ "preallocated"(token [[C]]) ] ; IS__CGSCC_NPM-NEXT: ret i32* [[CALL]] ; %c = call token @llvm.call.preallocated.setup(i32 1) @@ -393,13 +393,13 @@ define internal void @test_sret(%struct.X* sret %a, %struct.X** %b) { ; ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_sret -; IS__TUNIT____-SAME: (%struct.X* noalias nofree nonnull sret writeonly align 536870912 dereferenceable(8) [[A:%.*]], %struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) +; IS__TUNIT____-SAME: (%struct.X* noalias nofree noundef nonnull sret writeonly align 536870912 dereferenceable(8) [[A:%.*]], %struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) ; IS__TUNIT____-NEXT: store %struct.X* [[A]], %struct.X** [[B]], align 8 ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_sret -; IS__CGSCC____-SAME: (%struct.X* noalias nofree nonnull sret writeonly align 536870912 dereferenceable(8) [[A:%.*]], %struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) +; IS__CGSCC____-SAME: (%struct.X* noalias nofree noundef nonnull sret writeonly align 536870912 dereferenceable(8) [[A:%.*]], %struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) ; IS__CGSCC____-NEXT: store %struct.X* [[A]], %struct.X** [[B]], align 8 ; IS__CGSCC____-NEXT: ret void ; @@ -412,14 +412,13 @@ define void @complicated_args_sret(%struct.X** %b) { ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__TUNIT____-LABEL: define {{[^@]+}}@complicated_args_sret ; IS__TUNIT____-SAME: (%struct.X** nocapture nofree writeonly [[B:%.*]]) -; IS__TUNIT____-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree writeonly align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) +; IS__TUNIT____-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@complicated_args_sret ; IS__CGSCC____-SAME: (%struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) -; IS__CGSCC____-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree nonnull writeonly align 536870912 dereferenceable(8) null, %struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B]]) -; IS__CGSCC____-NEXT: ret void +; IS__CGSCC____-NEXT: unreachable ; call void @test_sret(%struct.X* null, %struct.X** %b) ret void @@ -428,12 +427,12 @@ define void @complicated_args_sret(%struct.X** %b) { define internal %struct.X* @test_nest(%struct.X* nest %a) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_nest -; IS__TUNIT____-SAME: (%struct.X* nest noalias nofree readnone returned align 536870912 "no-capture-maybe-returned" [[A:%.*]]) +; IS__TUNIT____-SAME: (%struct.X* nest noalias nofree noundef readnone returned align 536870912 "no-capture-maybe-returned" [[A:%.*]]) ; IS__TUNIT____-NEXT: ret %struct.X* [[A]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_nest -; IS__CGSCC____-SAME: (%struct.X* nest noalias nofree readnone returned align 536870912 "no-capture-maybe-returned" [[A:%.*]]) +; IS__CGSCC____-SAME: (%struct.X* nest noalias nofree noundef readnone returned align 536870912 "no-capture-maybe-returned" [[A:%.*]]) ; IS__CGSCC____-NEXT: ret %struct.X* [[A]] ; ret %struct.X* %a @@ -441,12 +440,12 @@ define internal %struct.X* @test_nest(%struct.X* nest %a) { define %struct.X* @complicated_args_nest() { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@complicated_args_nest() -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree readnone align 536870912 null) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree noundef readnone align 536870912 null) ; IS__TUNIT____-NEXT: ret %struct.X* [[CALL]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@complicated_args_nest() -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree readnone align 536870912 null) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree noundef readnone align 536870912 null) ; IS__CGSCC____-NEXT: ret %struct.X* [[CALL]] ; %call = call %struct.X* @test_nest(%struct.X* null) @@ -457,7 +456,7 @@ define %struct.X* @complicated_args_nest() { define internal void @test_byval(%struct.X* byval %a) { ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test_byval -; IS__CGSCC_OPM-SAME: (%struct.X* noalias nocapture nofree nonnull writeonly byval align 8 dereferenceable(8) [[A:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.X* noalias nocapture nofree noundef nonnull writeonly byval align 8 dereferenceable(8) [[A:%.*]]) ; IS__CGSCC_OPM-NEXT: [[G0:%.*]] = getelementptr [[STRUCT_X:%.*]], %struct.X* [[A]], i32 0, i32 0 ; IS__CGSCC_OPM-NEXT: store i8* null, i8** [[G0]], align 8 ; IS__CGSCC_OPM-NEXT: ret void diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll new file mode 100644 index 00000000000000..4c5f9ef05bad9f --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll @@ -0,0 +1,856 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=thumbv8m.main -indvars -S < %s | FileCheck %s --check-prefix=CHECK-V8M +; RUN: opt -mtriple=thumbv8a -indvars -S < %s | FileCheck %s --check-prefix=CHECK-V8A + +define i32 @remove_loop(i32 %size) #0 { +; CHECK-V8M-LABEL: @remove_loop( +; CHECK-V8M-NEXT: entry: +; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31 +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SIZE]], 31 +; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SIZE]], i32 31 +; CHECK-V8M-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[UMIN]] +; CHECK-V8M-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 5 +; CHECK-V8M-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], 5 +; CHECK-V8M-NEXT: br label [[WHILE_COND:%.*]] +; CHECK-V8M: while.cond: +; CHECK-V8M-NEXT: br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]] +; CHECK-V8M: while.end: +; CHECK-V8M-NEXT: [[TMP5:%.*]] = sub i32 [[SIZE]], [[TMP4]] +; CHECK-V8M-NEXT: ret i32 [[TMP5]] +; +; CHECK-V8A-LABEL: @remove_loop( +; CHECK-V8A-NEXT: entry: +; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31 +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SIZE]], 31 +; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SIZE]], i32 31 +; CHECK-V8A-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[UMIN]] +; CHECK-V8A-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 5 +; CHECK-V8A-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], 5 +; CHECK-V8A-NEXT: br label [[WHILE_COND:%.*]] +; CHECK-V8A: while.cond: +; CHECK-V8A-NEXT: br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]] +; CHECK-V8A: while.end: +; CHECK-V8A-NEXT: [[TMP5:%.*]] = sub i32 [[SIZE]], [[TMP4]] +; CHECK-V8A-NEXT: ret i32 [[TMP5]] +; +entry: + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %size.addr.0 = phi i32 [ %size, %entry ], [ %sub, %while.cond ] + %cmp = icmp ugt i32 %size.addr.0, 31 + %sub = add i32 %size.addr.0, -32 + br i1 %cmp, label %while.cond, label %while.end + +while.end: ; preds = %while.cond + %size.lcssa = phi i32 [ %size.addr.0, %while.cond ] + ret i32 %size.lcssa +} + +define void @expandOuterRecurrence(i32 %arg) nounwind #0 { +; CHECK-V8M-LABEL: @expandOuterRecurrence( +; CHECK-V8M-NEXT: entry: +; CHECK-V8M-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 +; CHECK-V8M-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] +; CHECK-V8M-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK-V8M: outer.preheader: +; CHECK-V8M-NEXT: br label [[OUTER:%.*]] +; CHECK-V8M: outer: +; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ] +; CHECK-V8M-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] +; CHECK-V8M-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 +; CHECK-V8M-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] +; CHECK-V8M-NEXT: br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]] +; CHECK-V8M: inner.ph: +; CHECK-V8M-NEXT: br label [[INNER:%.*]] +; CHECK-V8M: inner: +; CHECK-V8M-NEXT: br i1 false, label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] +; CHECK-V8M: outer.inc.loopexit: +; CHECK-V8M-NEXT: br label [[OUTER_INC]] +; CHECK-V8M: outer.inc: +; CHECK-V8M-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 +; CHECK-V8M-NEXT: br i1 false, label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-V8M: exit.loopexit: +; CHECK-V8M-NEXT: br label [[EXIT]] +; CHECK-V8M: exit: +; CHECK-V8M-NEXT: ret void +; +; CHECK-V8A-LABEL: @expandOuterRecurrence( +; CHECK-V8A-NEXT: entry: +; CHECK-V8A-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 +; CHECK-V8A-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] +; CHECK-V8A-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK-V8A: outer.preheader: +; CHECK-V8A-NEXT: br label [[OUTER:%.*]] +; CHECK-V8A: outer: +; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ] +; CHECK-V8A-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] +; CHECK-V8A-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 +; CHECK-V8A-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] +; CHECK-V8A-NEXT: br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]] +; CHECK-V8A: inner.ph: +; CHECK-V8A-NEXT: br label [[INNER:%.*]] +; CHECK-V8A: inner: +; CHECK-V8A-NEXT: br i1 false, label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] +; CHECK-V8A: outer.inc.loopexit: +; CHECK-V8A-NEXT: br label [[OUTER_INC]] +; CHECK-V8A: outer.inc: +; CHECK-V8A-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 +; CHECK-V8A-NEXT: br i1 false, label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-V8A: exit.loopexit: +; CHECK-V8A-NEXT: br label [[EXIT]] +; CHECK-V8A: exit: +; CHECK-V8A-NEXT: ret void +; +entry: + %sub1 = sub nsw i32 %arg, 1 + %cmp1 = icmp slt i32 0, %sub1 + br i1 %cmp1, label %outer, label %exit + +outer: + %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ] + %sub2 = sub nsw i32 %arg, %i + %sub3 = sub nsw i32 %sub2, 1 + %cmp2 = icmp slt i32 0, %sub3 + br i1 %cmp2, label %inner.ph, label %outer.inc + +inner.ph: + br label %inner + +inner: + %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ] + %j.inc = add nsw i32 %j, 1 + %cmp3 = icmp slt i32 %j.inc, %sub3 + br i1 %cmp3, label %inner, label %outer.inc + +outer.inc: + %i.inc = add nsw i32 %i, 1 + %cmp4 = icmp slt i32 %i.inc, %sub1 + br i1 %cmp4, label %outer, label %exit + +exit: + ret void +} + +define i32 @test1(i32* %array, i32 %length, i32 %n) #0 { +; CHECK-V8M-LABEL: @test1( +; CHECK-V8M-NEXT: loop.preheader: +; CHECK-V8M-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-V8M-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-V8M-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] +; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8M-NEXT: br label [[LOOP:%.*]] +; CHECK-V8M: loop: +; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M: deopt: +; CHECK-V8M-NEXT: call void @prevent_merging() +; CHECK-V8M-NEXT: ret i32 -1 +; CHECK-V8M: guarded: +; CHECK-V8M-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-V8M-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-V8M-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-V8M-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-V8M-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-V8M-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-V8M-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-V8M: exit: +; CHECK-V8M-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-V8M-NEXT: ret i32 [[RESULT]] +; +; CHECK-V8A-LABEL: @test1( +; CHECK-V8A-NEXT: loop.preheader: +; CHECK-V8A-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-V8A-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-V8A-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] +; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8A-NEXT: br label [[LOOP:%.*]] +; CHECK-V8A: loop: +; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A: deopt: +; CHECK-V8A-NEXT: call void @prevent_merging() +; CHECK-V8A-NEXT: ret i32 -1 +; CHECK-V8A: guarded: +; CHECK-V8A-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-V8A-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-V8A-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-V8A-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-V8A-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-V8A-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-V8A-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-V8A: exit: +; CHECK-V8A-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-V8A-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +declare void @maythrow() + +define i32 @test2(i32* %array, i32 %length, i32 %n) #0 { +; CHECK-V8M-LABEL: @test2( +; CHECK-V8M-NEXT: loop.preheader: +; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP0]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[LENGTH]], i32 [[TMP0]] +; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8M-NEXT: br label [[LOOP:%.*]] +; CHECK-V8M: loop: +; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-V8M-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M: deopt: +; CHECK-V8M-NEXT: call void @prevent_merging() +; CHECK-V8M-NEXT: ret i32 -1 +; CHECK-V8M: guarded: +; CHECK-V8M-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-V8M-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-V8M-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-V8M-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-V8M-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-V8M-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[N]] +; CHECK-V8M-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-V8M: exit: +; CHECK-V8M-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-V8M-NEXT: ret i32 [[RESULT]] +; +; CHECK-V8A-LABEL: @test2( +; CHECK-V8A-NEXT: loop.preheader: +; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP0]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[LENGTH]], i32 [[TMP0]] +; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8A-NEXT: br label [[LOOP:%.*]] +; CHECK-V8A: loop: +; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-V8A-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A: deopt: +; CHECK-V8A-NEXT: call void @prevent_merging() +; CHECK-V8A-NEXT: ret i32 -1 +; CHECK-V8A: guarded: +; CHECK-V8A-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-V8A-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-V8A-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-V8A-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-V8A-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-V8A-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[N]] +; CHECK-V8A-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-V8A: exit: +; CHECK-V8A-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-V8A-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds = icmp ne i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ne i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +define i32 @two_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32 %n) #0 { +; CHECK-V8M-LABEL: @two_range_checks( +; CHECK-V8M-NEXT: loop.preheader: +; CHECK-V8M-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ult i32 [[LENGTH_2]], [[LENGTH_1]] +; CHECK-V8M-NEXT: [[UMIN1:%.*]] = select i1 [[TMP1]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] +; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-V8M-NEXT: [[UMAX:%.*]] = select i1 [[TMP2]], i32 [[N]], i32 1 +; CHECK-V8M-NEXT: [[TMP3:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8M-NEXT: [[TMP4:%.*]] = icmp ult i32 [[UMIN1]], [[TMP3]] +; CHECK-V8M-NEXT: [[UMIN2:%.*]] = select i1 [[TMP4]], i32 [[UMIN1]], i32 [[TMP3]] +; CHECK-V8M-NEXT: [[TMP5:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] +; CHECK-V8M-NEXT: br label [[LOOP:%.*]] +; CHECK-V8M: loop: +; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-V8M-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M: deopt: +; CHECK-V8M-NEXT: call void @prevent_merging() +; CHECK-V8M-NEXT: ret i32 -1 +; CHECK-V8M: guarded: +; CHECK-V8M-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-V8M-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-V8M-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-V8M-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-V8M-NEXT: [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_2:%.*]], i64 [[I_I64]] +; CHECK-V8M-NEXT: [[ARRAY_2_I:%.*]] = load i32, i32* [[ARRAY_2_I_PTR]], align 4 +; CHECK-V8M-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]] +; CHECK-V8M-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-V8M-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-V8M-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-V8M: exit: +; CHECK-V8M-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-V8M-NEXT: ret i32 [[RESULT]] +; +; CHECK-V8A-LABEL: @two_range_checks( +; CHECK-V8A-NEXT: loop.preheader: +; CHECK-V8A-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ult i32 [[LENGTH_2]], [[LENGTH_1]] +; CHECK-V8A-NEXT: [[UMIN1:%.*]] = select i1 [[TMP1]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] +; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-V8A-NEXT: [[UMAX:%.*]] = select i1 [[TMP2]], i32 [[N]], i32 1 +; CHECK-V8A-NEXT: [[TMP3:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8A-NEXT: [[TMP4:%.*]] = icmp ult i32 [[UMIN1]], [[TMP3]] +; CHECK-V8A-NEXT: [[UMIN2:%.*]] = select i1 [[TMP4]], i32 [[UMIN1]], i32 [[TMP3]] +; CHECK-V8A-NEXT: [[TMP5:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] +; CHECK-V8A-NEXT: br label [[LOOP:%.*]] +; CHECK-V8A: loop: +; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-V8A-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A: deopt: +; CHECK-V8A-NEXT: call void @prevent_merging() +; CHECK-V8A-NEXT: ret i32 -1 +; CHECK-V8A: guarded: +; CHECK-V8A-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-V8A-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-V8A-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-V8A-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-V8A-NEXT: [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_2:%.*]], i64 [[I_I64]] +; CHECK-V8A-NEXT: [[ARRAY_2_I:%.*]] = load i32, i32* [[ARRAY_2_I_PTR]], align 4 +; CHECK-V8A-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]] +; CHECK-V8A-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-V8A-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-V8A-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-V8A: exit: +; CHECK-V8A-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-V8A-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + %within.bounds.2 = icmp ult i32 %i, %length.2 + %within.bounds = and i1 %within.bounds.1, %within.bounds.2 + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.1, %array.2.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +define i32 @three_range_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32* %array.3, i32 %length.3, i32 %n) #0 { +; CHECK-V8M-LABEL: @three_range_checks( +; CHECK-V8M-NEXT: loop.preheader: +; CHECK-V8M-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_3:%.*]], [[LENGTH_2:%.*]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_3]], i32 [[LENGTH_2]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ult i32 [[UMIN]], [[LENGTH_1:%.*]] +; CHECK-V8M-NEXT: [[UMIN1:%.*]] = select i1 [[TMP1]], i32 [[UMIN]], i32 [[LENGTH_1]] +; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH_3]], [[LENGTH_2]] +; CHECK-V8M-NEXT: [[UMIN2:%.*]] = select i1 [[TMP2]], i32 [[LENGTH_3]], i32 [[LENGTH_2]] +; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN2]], [[LENGTH_1]] +; CHECK-V8M-NEXT: [[UMIN3:%.*]] = select i1 [[TMP3]], i32 [[UMIN2]], i32 [[LENGTH_1]] +; CHECK-V8M-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-V8M-NEXT: [[UMAX:%.*]] = select i1 [[TMP4]], i32 [[N]], i32 1 +; CHECK-V8M-NEXT: [[TMP5:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8M-NEXT: [[TMP6:%.*]] = icmp ult i32 [[UMIN3]], [[TMP5]] +; CHECK-V8M-NEXT: [[UMIN4:%.*]] = select i1 [[TMP6]], i32 [[UMIN3]], i32 [[TMP5]] +; CHECK-V8M-NEXT: [[TMP7:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] +; CHECK-V8M-NEXT: br label [[LOOP:%.*]] +; CHECK-V8M: loop: +; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-V8M-NEXT: br i1 [[TMP7]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M: deopt: +; CHECK-V8M-NEXT: call void @prevent_merging() +; CHECK-V8M-NEXT: ret i32 -1 +; CHECK-V8M: guarded: +; CHECK-V8M-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-V8M-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-V8M-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-V8M-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-V8M-NEXT: [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_2:%.*]], i64 [[I_I64]] +; CHECK-V8M-NEXT: [[ARRAY_2_I:%.*]] = load i32, i32* [[ARRAY_2_I_PTR]], align 4 +; CHECK-V8M-NEXT: [[LOOP_ACC_2:%.*]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]] +; CHECK-V8M-NEXT: [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_3:%.*]], i64 [[I_I64]] +; CHECK-V8M-NEXT: [[ARRAY_3_I:%.*]] = load i32, i32* [[ARRAY_3_I_PTR]], align 4 +; CHECK-V8M-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_2]], [[ARRAY_3_I]] +; CHECK-V8M-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-V8M-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-V8M-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-V8M: exit: +; CHECK-V8M-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-V8M-NEXT: ret i32 [[RESULT]] +; +; CHECK-V8A-LABEL: @three_range_checks( +; CHECK-V8A-NEXT: loop.preheader: +; CHECK-V8A-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_3:%.*]], [[LENGTH_2:%.*]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_3]], i32 [[LENGTH_2]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ult i32 [[UMIN]], [[LENGTH_1:%.*]] +; CHECK-V8A-NEXT: [[UMIN1:%.*]] = select i1 [[TMP1]], i32 [[UMIN]], i32 [[LENGTH_1]] +; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH_3]], [[LENGTH_2]] +; CHECK-V8A-NEXT: [[UMIN2:%.*]] = select i1 [[TMP2]], i32 [[LENGTH_3]], i32 [[LENGTH_2]] +; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN2]], [[LENGTH_1]] +; CHECK-V8A-NEXT: [[UMIN3:%.*]] = select i1 [[TMP3]], i32 [[UMIN2]], i32 [[LENGTH_1]] +; CHECK-V8A-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-V8A-NEXT: [[UMAX:%.*]] = select i1 [[TMP4]], i32 [[N]], i32 1 +; CHECK-V8A-NEXT: [[TMP5:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8A-NEXT: [[TMP6:%.*]] = icmp ult i32 [[UMIN3]], [[TMP5]] +; CHECK-V8A-NEXT: [[UMIN4:%.*]] = select i1 [[TMP6]], i32 [[UMIN3]], i32 [[TMP5]] +; CHECK-V8A-NEXT: [[TMP7:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] +; CHECK-V8A-NEXT: br label [[LOOP:%.*]] +; CHECK-V8A: loop: +; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-V8A-NEXT: br i1 [[TMP7]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A: deopt: +; CHECK-V8A-NEXT: call void @prevent_merging() +; CHECK-V8A-NEXT: ret i32 -1 +; CHECK-V8A: guarded: +; CHECK-V8A-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-V8A-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-V8A-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-V8A-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-V8A-NEXT: [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_2:%.*]], i64 [[I_I64]] +; CHECK-V8A-NEXT: [[ARRAY_2_I:%.*]] = load i32, i32* [[ARRAY_2_I_PTR]], align 4 +; CHECK-V8A-NEXT: [[LOOP_ACC_2:%.*]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]] +; CHECK-V8A-NEXT: [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_3:%.*]], i64 [[I_I64]] +; CHECK-V8A-NEXT: [[ARRAY_3_I:%.*]] = load i32, i32* [[ARRAY_3_I_PTR]], align 4 +; CHECK-V8A-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_2]], [[ARRAY_3_I]] +; CHECK-V8A-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-V8A-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-V8A-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-V8A: exit: +; CHECK-V8A-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-V8A-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + %within.bounds.2 = icmp ult i32 %i, %length.2 + %within.bounds.3 = icmp ult i32 %i, %length.3 + %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2 + %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3 + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.2 = add i32 %loop.acc.1, %array.2.i + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.2, %array.3.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: ; preds = %guarded, %entry + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +; Analogous to the above, but with two distinct branches (on different conditions) +define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32* %array.3, i32 %length.3, i32 %n) #0 { +; CHECK-V8M-LABEL: @distinct_checks( +; CHECK-V8M-NEXT: loop.preheader: +; CHECK-V8M-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-V8M-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 1 +; CHECK-V8M-NEXT: [[TMP2:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN]], [[TMP2]] +; CHECK-V8M-NEXT: [[UMIN1:%.*]] = select i1 [[TMP3]], i32 [[UMIN]], i32 [[TMP2]] +; CHECK-V8M-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] +; CHECK-V8M-NEXT: [[TMP5:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] +; CHECK-V8M-NEXT: br label [[LOOP:%.*]] +; CHECK-V8M: loop: +; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-V8M-NEXT: br i1 [[TMP4]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M: deopt: +; CHECK-V8M-NEXT: call void @prevent_merging() +; CHECK-V8M-NEXT: ret i32 -1 +; CHECK-V8M: guarded: +; CHECK-V8M-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-V8M-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-V8M-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-V8M-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-V8M-NEXT: br i1 [[TMP5]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8M: deopt2: +; CHECK-V8M-NEXT: call void @prevent_merging() +; CHECK-V8M-NEXT: ret i32 -1 +; CHECK-V8M: guarded1: +; CHECK-V8M-NEXT: [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_3:%.*]], i64 [[I_I64]] +; CHECK-V8M-NEXT: [[ARRAY_3_I:%.*]] = load i32, i32* [[ARRAY_3_I_PTR]], align 4 +; CHECK-V8M-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]] +; CHECK-V8M-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-V8M-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-V8M-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-V8M: exit: +; CHECK-V8M-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ] +; CHECK-V8M-NEXT: ret i32 [[RESULT]] +; +; CHECK-V8A-LABEL: @distinct_checks( +; CHECK-V8A-NEXT: loop.preheader: +; CHECK-V8A-NEXT: [[TMP0:%.*]] = icmp ult i32 [[LENGTH_2:%.*]], [[LENGTH_1:%.*]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], i32 [[LENGTH_2]], i32 [[LENGTH_1]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-V8A-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 1 +; CHECK-V8A-NEXT: [[TMP2:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ult i32 [[UMIN]], [[TMP2]] +; CHECK-V8A-NEXT: [[UMIN1:%.*]] = select i1 [[TMP3]], i32 [[UMIN]], i32 [[TMP2]] +; CHECK-V8A-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] +; CHECK-V8A-NEXT: [[TMP5:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] +; CHECK-V8A-NEXT: br label [[LOOP:%.*]] +; CHECK-V8A: loop: +; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-V8A-NEXT: br i1 [[TMP4]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A: deopt: +; CHECK-V8A-NEXT: call void @prevent_merging() +; CHECK-V8A-NEXT: ret i32 -1 +; CHECK-V8A: guarded: +; CHECK-V8A-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-V8A-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-V8A-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-V8A-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-V8A-NEXT: br i1 [[TMP5]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8A: deopt2: +; CHECK-V8A-NEXT: call void @prevent_merging() +; CHECK-V8A-NEXT: ret i32 -1 +; CHECK-V8A: guarded1: +; CHECK-V8A-NEXT: [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_3:%.*]], i64 [[I_I64]] +; CHECK-V8A-NEXT: [[ARRAY_3_I:%.*]] = load i32, i32* [[ARRAY_3_I_PTR]], align 4 +; CHECK-V8A-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]] +; CHECK-V8A-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-V8A-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-V8A-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-V8A: exit: +; CHECK-V8A-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ] +; CHECK-V8A-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded4, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded1 ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded1 ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + br i1 %within.bounds.1, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + %within.bounds.2 = icmp ult i32 %i, %length.2 + br i1 %within.bounds.2, label %guarded1, label %deopt2, !prof !0 + +deopt2: ; preds = %guarded + call void @prevent_merging() + ret i32 -1 + +guarded1: ; preds = %guarded1 + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.1, %array.3.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded1 ] + ret i32 %result +} + +define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %length, i32 %n) #0 { +; CHECK-V8M-LABEL: @duplicate_checks( +; CHECK-V8M-NEXT: loop.preheader: +; CHECK-V8M-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-V8M-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-V8M-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] +; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8M-NEXT: br label [[LOOP:%.*]] +; CHECK-V8M: loop: +; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M: deopt: +; CHECK-V8M-NEXT: call void @prevent_merging() +; CHECK-V8M-NEXT: ret i32 -1 +; CHECK-V8M: guarded: +; CHECK-V8M-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-V8M-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-V8M-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-V8M-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-V8M-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8M: deopt2: +; CHECK-V8M-NEXT: call void @prevent_merging() +; CHECK-V8M-NEXT: ret i32 -1 +; CHECK-V8M: guarded1: +; CHECK-V8M-NEXT: [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_3:%.*]], i64 [[I_I64]] +; CHECK-V8M-NEXT: [[ARRAY_3_I:%.*]] = load i32, i32* [[ARRAY_3_I_PTR]], align 4 +; CHECK-V8M-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]] +; CHECK-V8M-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-V8M-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-V8M-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-V8M: exit: +; CHECK-V8M-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ] +; CHECK-V8M-NEXT: ret i32 [[RESULT]] +; +; CHECK-V8A-LABEL: @duplicate_checks( +; CHECK-V8A-NEXT: loop.preheader: +; CHECK-V8A-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-V8A-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-V8A-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] +; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8A-NEXT: br label [[LOOP:%.*]] +; CHECK-V8A: loop: +; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A: deopt: +; CHECK-V8A-NEXT: call void @prevent_merging() +; CHECK-V8A-NEXT: ret i32 -1 +; CHECK-V8A: guarded: +; CHECK-V8A-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-V8A-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] +; CHECK-V8A-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 +; CHECK-V8A-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] +; CHECK-V8A-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8A: deopt2: +; CHECK-V8A-NEXT: call void @prevent_merging() +; CHECK-V8A-NEXT: ret i32 -1 +; CHECK-V8A: guarded1: +; CHECK-V8A-NEXT: [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_3:%.*]], i64 [[I_I64]] +; CHECK-V8A-NEXT: [[ARRAY_3_I:%.*]] = load i32, i32* [[ARRAY_3_I_PTR]], align 4 +; CHECK-V8A-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]] +; CHECK-V8A-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-V8A-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-V8A-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-V8A: exit: +; CHECK-V8A-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ] +; CHECK-V8A-NEXT: ret i32 [[RESULT]] +; +loop.preheader: ; preds = %entry + br label %loop + +loop: ; preds = %guarded4, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded1 ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded1 ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length + br i1 %within.bounds.1, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void @prevent_merging() + ret i32 -1 + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + %within.bounds.2 = icmp ult i32 %i, %length + br i1 %within.bounds.2, label %guarded1, label %deopt2, !prof !0 + +deopt2: ; preds = %guarded + call void @prevent_merging() + ret i32 -1 + +guarded1: ; preds = %guarded1 + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.1, %array.3.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded1 ] + ret i32 %result +} + +; Demonstrate that this approach works with IVs of different steps, and types +; This version uses a manually lftred exit condition to work around an issue described +; in detail on next test. +define i32 @different_ivs(i32* %array, i32 %length, i32 %n) #0 { +; CHECK-V8M-LABEL: @different_ivs( +; CHECK-V8M-NEXT: loop.preheader: +; CHECK-V8M-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-V8M-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[N64]], 1 +; CHECK-V8M-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i64 [[N64]], i64 1 +; CHECK-V8M-NEXT: [[TMP1:%.*]] = add nsw i64 [[UMAX]], -1 +; CHECK-V8M-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH:%.*]] to i64 +; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = select i1 [[TMP3]], i64 [[TMP1]], i64 [[TMP2]] +; CHECK-V8M-NEXT: [[TMP4:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-V8M-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], [[UMIN]] +; CHECK-V8M-NEXT: br label [[LOOP:%.*]] +; CHECK-V8M: loop: +; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-V8M-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-V8M-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M: deopt: +; CHECK-V8M-NEXT: call void @prevent_merging() +; CHECK-V8M-NEXT: ret i32 -1 +; CHECK-V8M: guarded: +; CHECK-V8M-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I]] +; CHECK-V8M-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-V8M-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-V8M-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 +; CHECK-V8M-NEXT: [[CONTINUE:%.*]] = icmp ult i64 [[I_NEXT]], [[N64]] +; CHECK-V8M-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-V8M: exit: +; CHECK-V8M-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-V8M-NEXT: ret i32 [[RESULT]] +; +; CHECK-V8A-LABEL: @different_ivs( +; CHECK-V8A-NEXT: loop.preheader: +; CHECK-V8A-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-V8A-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[N64]], 1 +; CHECK-V8A-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i64 [[N64]], i64 1 +; CHECK-V8A-NEXT: [[TMP1:%.*]] = add nsw i64 [[UMAX]], -1 +; CHECK-V8A-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH:%.*]] to i64 +; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = select i1 [[TMP3]], i64 [[TMP1]], i64 [[TMP2]] +; CHECK-V8A-NEXT: [[TMP4:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-V8A-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], [[UMIN]] +; CHECK-V8A-NEXT: br label [[LOOP:%.*]] +; CHECK-V8A: loop: +; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] +; CHECK-V8A-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-V8A-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A: deopt: +; CHECK-V8A-NEXT: call void @prevent_merging() +; CHECK-V8A-NEXT: ret i32 -1 +; CHECK-V8A: guarded: +; CHECK-V8A-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I]] +; CHECK-V8A-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-V8A-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-V8A-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 +; CHECK-V8A-NEXT: [[CONTINUE:%.*]] = icmp ult i64 [[I_NEXT]], [[N64]] +; CHECK-V8A-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-V8A: exit: +; CHECK-V8A-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-V8A-NEXT: ret i32 [[RESULT]] +; +loop.preheader: + %j.start = sub nuw nsw i32 %length, 1 + %n64 = zext i32 %n to i64 + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i64 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %j = phi i32 [ %j.next, %guarded ], [ %j.start, %loop.preheader ] + %within.bounds = icmp ne i32 %j, -1 + br i1 %within.bounds, label %guarded, label %deopt, !prof !0 + +deopt: + call void @prevent_merging() + ret i32 -1 + +guarded: + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i64 %i, 1 + %j.next = sub nuw i32 %j, 1 + %continue = icmp ult i64 %i.next, %n64 + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +declare void @prevent_merging() +declare void @call() + +!0 = !{!"branch_weights", i32 1048576, i32 1} +!1 = !{i32 1, i32 -2147483648} +!2 = !{i32 0, i32 50} + +attributes #0 = { minsize optsize } diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll index 0b474045d59675..f8b70afea3803c 100644 --- a/llvm/test/Transforms/InstCombine/fabs.ll +++ b/llvm/test/Transforms/InstCombine/fabs.ll @@ -4,6 +4,7 @@ ; Make sure libcalls are replaced with intrinsic calls. declare float @llvm.fabs.f32(float) +declare <2 x float> @llvm.fabs.v2f32(<2 x float>) declare double @llvm.fabs.f64(double) declare fp128 @llvm.fabs.f128(fp128) @@ -13,6 +14,8 @@ declare fp128 @fabsl(fp128) declare float @llvm.fma.f32(float, float, float) declare float @llvm.fmuladd.f32(float, float, float) +declare void @use(float) + define float @replace_fabs_call_f32(float %x) { ; CHECK-LABEL: @replace_fabs_call_f32( ; CHECK-NEXT: [[FABSF:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) @@ -116,8 +119,8 @@ define float @square_fabs_shrink_call1(float %x) { define float @square_fabs_shrink_call2(float %x) { ; CHECK-LABEL: @square_fabs_shrink_call2( ; CHECK-NEXT: [[SQ:%.*]] = fmul float [[X:%.*]], [[X]] -; CHECK-NEXT: [[TRUNC:%.*]] = call float @llvm.fabs.f32(float [[SQ]]) -; CHECK-NEXT: ret float [[TRUNC]] +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[SQ]]) +; CHECK-NEXT: ret float [[TMP1]] ; %sq = fmul float %x, %x %ext = fpext float %sq to double @@ -745,3 +748,54 @@ define half @select_fcmp_nnan_nsz_uge_negzero_unary_fneg(half %x) { %fabs = select i1 %gezero, half %x, half %negx ret half %fabs } + +define float @select_fneg(i1 %c, float %x) { +; CHECK-LABEL: @select_fneg( +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) +; CHECK-NEXT: ret float [[FABS]] +; + %n = fneg float %x + %s = select i1 %c, float %n, float %x + %fabs = call float @llvm.fabs.f32(float %s) + ret float %fabs +} + +define float @select_fneg_use1(i1 %c, float %x) { +; CHECK-LABEL: @select_fneg_use1( +; CHECK-NEXT: [[N:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: call void @use(float [[N]]) +; CHECK-NEXT: [[FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: ret float [[FABS]] +; + %n = fneg float %x + call void @use(float %n) + %s = select i1 %c, float %x, float %n + %fabs = call fast float @llvm.fabs.f32(float %s) + ret float %fabs +} + +define float @select_fneg_use2(i1 %c, float %x) { +; CHECK-LABEL: @select_fneg_use2( +; CHECK-NEXT: [[N:%.*]] = fneg arcp float [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], float [[N]], float [[X]] +; CHECK-NEXT: call void @use(float [[S]]) +; CHECK-NEXT: [[FABS:%.*]] = call nnan nsz float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: ret float [[FABS]] +; + %n = fneg arcp float %x + %s = select i1 %c, float %n, float %x + call void @use(float %s) + %fabs = call nnan nsz float @llvm.fabs.f32(float %s) + ret float %fabs +} + +define <2 x float> @select_fneg_vec(<2 x i1> %c, <2 x float> %x) { +; CHECK-LABEL: @select_fneg_vec( +; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]]) +; CHECK-NEXT: ret <2 x float> [[FABS]] +; + %n = fneg <2 x float> %x + %s = select fast <2 x i1> %c, <2 x float> %x, <2 x float> %n + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %s) + ret <2 x float> %fabs +} diff --git a/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll index 3d0fdcdf962f63..d1793343da6cd6 100644 --- a/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll +++ b/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll @@ -420,3 +420,60 @@ end: %i8 = insertvalue { i32, i32 } %i7, i32 %i3, 1 ret { i32, i32 } %i8 } + +; Most basic test - diamond structure, but with a switch, which results in multiple duplicate predecessors +define { i32, i32 } @test8({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 %c, i32 %val_left, i32 %val_right) { +; CHECK-LABEL: @test8( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[LEFT:%.*]], label [[RIGHT:%.*]] +; CHECK: left: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: switch i32 [[VAL_LEFT:%.*]], label [[IMPOSSIBLE:%.*]] [ +; CHECK-NEXT: i32 -42, label [[END:%.*]] +; CHECK-NEXT: i32 42, label [[END]] +; CHECK-NEXT: ] +; CHECK: right: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: switch i32 [[VAL_RIGHT:%.*]], label [[IMPOSSIBLE]] [ +; CHECK-NEXT: i32 42, label [[END]] +; CHECK-NEXT: i32 -42, label [[END]] +; CHECK-NEXT: ] +; CHECK: impossible: +; CHECK-NEXT: unreachable +; CHECK: end: +; CHECK-NEXT: [[I8_MERGED:%.*]] = phi { i32, i32 } [ [[AGG_RIGHT:%.*]], [[RIGHT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ], [ [[AGG_LEFT:%.*]], [[LEFT]] ], [ [[AGG_LEFT]], [[LEFT]] ] +; CHECK-NEXT: call void @baz() +; CHECK-NEXT: ret { i32, i32 } [[I8_MERGED]] +; +entry: + br i1 %c, label %left, label %right + +left: + %i0 = extractvalue { i32, i32 } %agg_left, 0 + %i2 = extractvalue { i32, i32 } %agg_left, 1 + call void @foo() + switch i32 %val_left, label %impossible [ + i32 -42, label %end + i32 42, label %end + ] + +right: + %i3 = extractvalue { i32, i32 } %agg_right, 0 + %i4 = extractvalue { i32, i32 } %agg_right, 1 + call void @bar() + switch i32 %val_right, label %impossible [ + i32 42, label %end + i32 -42, label %end + ] + +impossible: + unreachable + +end: + %i5 = phi i32 [ %i0, %left ], [ %i0, %left ], [ %i3, %right ], [ %i3, %right ] + %i6 = phi i32 [ %i2, %left ], [ %i2, %left ], [ %i4, %right ], [ %i4, %right ] + call void @baz() + %i7 = insertvalue { i32, i32 } undef, i32 %i5, 0 + %i8 = insertvalue { i32, i32 } %i7, i32 %i6, 1 + ret { i32, i32 } %i8 +} diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll new file mode 100644 index 00000000000000..da5b5a60a400ca --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll @@ -0,0 +1,644 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -loop-vectorize -tail-predication=enabled -dce -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-none-none-eabi" + +define i32 @reduction_sum_single(i32* noalias nocapture %A) { +; CHECK-LABEL: @reduction_sum_single( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 256) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2]] = add <4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !2 +; CHECK: ._crit_edge: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; +entry: + br label %.lr.ph + +.lr.ph: ; preds = %entry, %.lr.ph + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] + %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ] + %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l3 = load i32, i32* %l2, align 4 + %l7 = add i32 %sum.02, %l3 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph + %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) { +; CHECK-LABEL: @reduction_sum( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 256) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP6]] = add <4 x i32> [[TMP5]], [[WIDE_MASKED_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP6]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) +; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !5 +; CHECK: ._crit_edge: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; +entry: + br label %.lr.ph + +.lr.ph: ; preds = %entry, %.lr.ph + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] + %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ] + %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l3 = load i32, i32* %l2, align 4 + %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l5 = load i32, i32* %l4, align 4 + %l7 = add i32 %sum.02, %indvars.iv + %l8 = add i32 %l7, %l3 + %l9 = add i32 %l8, %l5 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph + %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) { +; CHECK-LABEL: @reduction_prod( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = mul <4 x i32> [[TMP4]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: br i1 false, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[PROD_02:%.*]] = phi i32 [ [[L9:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L3:%.*]] = load i32, i32* [[L2]], align 4 +; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L5:%.*]] = load i32, i32* [[L4]], align 4 +; CHECK-NEXT: [[L8:%.*]] = mul i32 [[PROD_02]], [[L3]] +; CHECK-NEXT: [[L9]] = mul i32 [[L8]], [[L5]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !7 +; CHECK: ._crit_edge: +; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ [[L9]], [[DOTLR_PH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] +; +entry: + br label %.lr.ph + +.lr.ph: ; preds = %entry, %.lr.ph + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] + %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ] + %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l3 = load i32, i32* %l2, align 4 + %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l5 = load i32, i32* %l4, align 4 + %l8 = mul i32 %prod.02, %l3 + %l9 = mul i32 %l8, %l5 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph + %prod.0.lcssa = phi i32 [ %l9, %.lr.ph ] + ret i32 %prod.0.lcssa +} + +define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_and( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = and <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = and <4 x i32> [[TMP4]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ -1, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[AND:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = and i32 [[RESULT_08]], [[L0]] +; CHECK-NEXT: [[AND]] = and i32 [[ADD]], [[L1]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !9 +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l1 = load i32, i32* %arrayidx2, align 4 + %add = and i32 %result.08, %l0 + %and = and i32 %add, %l1 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %and, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_or( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = or <4 x i32> [[TMP4]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[OR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[L1]], [[L0]] +; CHECK-NEXT: [[OR]] = or i32 [[ADD]], [[RESULT_08]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !11 +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %l1, %l0 + %or = or i32 %add, %result.08 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %or, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_xor( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = xor <4 x i32> [[TMP4]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !12 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[XOR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[L1]], [[L0]] +; CHECK-NEXT: [[XOR]] = xor i32 [[ADD]], [[RESULT_08]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !13 +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %l1, %l0 + %xor = xor i32 %add, %result.08 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %xor, %for.body ] + ret i32 %result.0.lcssa +} + +define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { +; CHECK-LABEL: @reduction_fadd( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = fadd fast <4 x float> [[TMP4]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !14 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[FADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L1:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[RESULT_08]], [[L0]] +; CHECK-NEXT: [[FADD]] = fadd fast float [[ADD]], [[L1]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !15 +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ [[FADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ] + %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv + %l0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv + %l1 = load float, float* %arrayidx2, align 4 + %add = fadd fast float %result.08, %l0 + %fadd = fadd fast float %add, %l1 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi float [ %fadd, %for.body ] + ret float %result.0.lcssa +} + +define float @reduction_fmul(float* nocapture %A, float* nocapture %B) { +; CHECK-LABEL: @reduction_fmul( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = fmul fast <4 x float> [[TMP4]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !16 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> [[TMP5]]) +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[FMUL:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L1:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fmul fast float [[RESULT_08]], [[L0]] +; CHECK-NEXT: [[FMUL]] = fmul fast float [[ADD]], [[L1]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !17 +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ [[FMUL]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ] + %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv + %l0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv + %l1 = load float, float* %arrayidx2, align 4 + %add = fmul fast float %result.08, %l0 + %fmul = fmul fast float %add, %l1 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi float [ %fmul, %for.body ] + ret float %result.0.lcssa +} + +define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_min( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !18 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 1000, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[RESULT_08]], [[L0]] +; CHECK-NEXT: [[V0]] = select i1 [[C0]], i32 [[RESULT_08]], i32 [[L0]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !19 +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[V0]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %c0 = icmp slt i32 %result.08, %l0 + %v0 = select i1 %c0, i32 %result.08, i32 %l0 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %v0, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_max( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !20 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 1000, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[C0:%.*]] = icmp ugt i32 [[RESULT_08]], [[L0]] +; CHECK-NEXT: [[V0]] = select i1 [[C0]], i32 [[RESULT_08]], i32 [[L0]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !21 +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[V0]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %c0 = icmp ugt i32 %result.08, %l0 + %v0 = select i1 %c0, i32 %result.08, i32 %l0 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %v0, %for.body ] + ret i32 %result.0.lcssa +} + +define float @reduction_fmax(float* nocapture %A, float* nocapture %B) { +; CHECK-LABEL: @reduction_fmax( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[V0:%.*]], [[FOR_BODY]] ], [ 1.000000e+03, [[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[C0:%.*]] = fcmp ogt float [[RESULT_08]], [[L0]] +; CHECK-NEXT: [[V0]] = select i1 [[C0]], float [[RESULT_08]], float [[L0]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret float [[V0]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi float [ %v0, %for.body ], [ 1000.0, %entry ] + %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv + %l0 = load float, float* %arrayidx, align 4 + %c0 = fcmp ogt float %result.08, %l0 + %v0 = select i1 %c0, float %result.08, float %l0 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi float [ %v0, %for.body ] + ret float %result.0.lcssa +} diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll new file mode 100644 index 00000000000000..aaae03b9fb3a32 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll @@ -0,0 +1,305 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -prefer-predicate-over-epilog -force-reduction-intrinsics -dce -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +define i32 @reduction_sum_single(i32* noalias nocapture %A) { +; CHECK-LABEL: @reduction_sum_single( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP24:%.*]], %pred.load.continue6 ] +; CHECK: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23:%.*]] +; CHECK: middle.block: +; CHECK: [[TMP26:%.*]] = select <4 x i1> [[TMP0:%.*]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP27:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP26]]) +; +entry: + br label %.lr.ph + +.lr.ph: ; preds = %entry, %.lr.ph + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] + %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ] + %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l3 = load i32, i32* %l2, align 4 + %l7 = add i32 %sum.02, %l3 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph + %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) { +; CHECK-LABEL: @reduction_sum( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ] +; CHECK: [[TMP44:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND:%.*]] +; CHECK: [[TMP45:%.*]] = add <4 x i32> [[TMP44]], [[TMP23:%.*]] +; CHECK: [[TMP46]] = add <4 x i32> [[TMP45]], [[TMP43:%.*]] +; CHECK: middle.block: +; CHECK: [[TMP48:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP49:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP48]]) +; +entry: + br label %.lr.ph + +.lr.ph: ; preds = %entry, %.lr.ph + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] + %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ] + %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l3 = load i32, i32* %l2, align 4 + %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l5 = load i32, i32* %l4, align 4 + %l7 = add i32 %sum.02, %indvars.iv + %l8 = add i32 %l7, %l3 + %l9 = add i32 %l8, %l5 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph + %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) { +; CHECK-LABEL: @reduction_prod( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[TMP45:%.*]], %pred.load.continue14 ] +; CHECK: [[TMP44:%.*]] = mul <4 x i32> [[VEC_PHI]], [[TMP23:%.*]] +; CHECK: [[TMP45]] = mul <4 x i32> [[TMP44]], [[TMP43:%.*]] +; CHECK: middle.block: +; CHECK: [[TMP47:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP48:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP47]]) +; +entry: + br label %.lr.ph + +.lr.ph: ; preds = %entry, %.lr.ph + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] + %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ] + %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l3 = load i32, i32* %l2, align 4 + %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l5 = load i32, i32* %l4, align 4 + %l8 = mul i32 %prod.02, %l3 + %l9 = mul i32 %l8, %l5 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph + %prod.0.lcssa = phi i32 [ %l9, %.lr.ph ] + ret i32 %prod.0.lcssa +} + +define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_and( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[TMP45:%.*]], %pred.load.continue14 ] +; CHECK: [[TMP44:%.*]] = and <4 x i32> [[VEC_PHI]], [[TMP42:%.*]] +; CHECK: [[TMP45]] = and <4 x i32> [[TMP44]], [[TMP43]] +; CHECK: middle.block: +; CHECK: [[TMP47:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP48:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[TMP47]]) +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l1 = load i32, i32* %arrayidx2, align 4 + %add = and i32 %result.08, %l0 + %and = and i32 %add, %l1 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %and, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_or( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP45:%.*]], %pred.load.continue14 ] +; CHECK: [[TMP45]] = or <4 x i32> [[TMP44:%.*]], [[VEC_PHI]] +; CHECK: middle.block: +; CHECK: [[TMP47:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP48:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> [[TMP47]]) +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %l1, %l0 + %or = or i32 %add, %result.08 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %or, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_xor( +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP45:%.*]], %pred.load.continue14 ] +; CHECK: [[TMP45]] = xor <4 x i32> [[TMP44:%.*]], [[VEC_PHI]] +; CHECK: middle.block: +; CHECK: [[TMP47:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP48:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> [[TMP47]]) +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %l1, %l0 + %xor = xor i32 %add, %result.08 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %xor, %for.body ] + ret i32 %result.0.lcssa +} + +define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { +; CHECK-LABEL: @reduction_fadd( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %vector.ph ], [ [[TMP45:%.*]], %pred.load.continue14 ] +; CHECK: [[TMP44:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP23:%.*]] +; CHECK: [[TMP45]] = fadd fast <4 x float> [[TMP44]], [[TMP43]] +; CHECK: middle.block: +; CHECK: [[TMP47:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x float> [[TMP45]], <4 x float> [[VEC_PHI]] +; CHECK: [[TMP48:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP47]]) +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ] + %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv + %l0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv + %l1 = load float, float* %arrayidx2, align 4 + %add = fadd fast float %result.08, %l0 + %fadd = fadd fast float %add, %l1 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi float [ %fadd, %for.body ] + ret float %result.0.lcssa +} + +define float @reduction_fmul(float* nocapture %A, float* nocapture %B) { +; CHECK-LABEL: @reduction_fmul( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ , %vector.ph ], [ [[TMP45:%.*]], %pred.load.continue14 ] +; CHECK: [[TMP44:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[TMP23:%.*]] +; CHECK: [[TMP45]] = fmul fast <4 x float> [[TMP44]], [[TMP43]] +; CHECK: middle.block: +; CHECK: [[TMP47:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x float> [[TMP45]], <4 x float> [[VEC_PHI]] +; CHECK: [[TMP48:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> [[TMP47]]) +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ] + %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv + %l0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv + %l1 = load float, float* %arrayidx2, align 4 + %add = fmul fast float %result.08, %l0 + %fmul = fmul fast float %add, %l1 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi float [ %fmul, %for.body ] + ret float %result.0.lcssa +} + +define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_min( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[TMP25:%.*]], %pred.load.continue6 ] +; CHECK: [[TMP24:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP23:%.*]] +; CHECK: [[TMP25]] = select <4 x i1> [[TMP24]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]] +; CHECK: middle.block: +; CHECK: [[TMP27:%.*]] = select <4 x i1> [[TMP0:%.*]], <4 x i32> [[TMP25]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP28:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP27]]) +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %c0 = icmp slt i32 %result.08, %l0 + %v0 = select i1 %c0, i32 %result.08, i32 %l0 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %v0, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_max( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[TMP25:%.*]], %pred.load.continue6 ] +; CHECK: [[TMP24:%.*]] = icmp ugt <4 x i32> [[VEC_PHI]], [[TMP23:%.*]] +; CHECK: [[TMP25]] = select <4 x i1> [[TMP24]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]] +; CHECK: middle.block: +; CHECK: [[TMP27:%.*]] = select <4 x i1> [[TMP0:%.*]], <4 x i32> [[TMP25]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP28:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> [[TMP27]]) +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %c0 = icmp ugt i32 %result.08, %l0 + %v0 = select i1 %c0, i32 %result.08, i32 %l0 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %v0, %for.body ] + ret i32 %result.0.lcssa +} diff --git a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll index daebe4b52ace5a..7f55ad12af2d71 100644 --- a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll +++ b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll @@ -1,9 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --scrub-attributes -; RUN: opt -S -passes=openmpopt -aa-pipeline=basic-aa < %s | FileCheck %s +; RUN: opt -S -passes=openmpopt -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency < %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -; FIXME: This struct should be generated after splitting at least one of the runtime calls. -; %struct.__tgt_async_info = type { i8* } +; CHECK: %struct.__tgt_async_info = type { i8* } %struct.ident_t = type { i32, i32, i32, i32, i8* } %struct.__tgt_offload_entry = type { i8*, i8*, i64, i32, i32 } @@ -58,7 +57,10 @@ define dso_local double @heavyComputation1() { ; CHECK-NEXT: %3 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i64 0, i64 0 ; CHECK-NEXT: %4 = bitcast [1 x i8*]* %.offload_ptrs to double** ; CHECK-NEXT: store double* %a, double** %4, align 8 -; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null) + +; CHECK-NEXT: %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 1, i8** %1, i8** %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null) +; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle) + ; CHECK-NEXT: %5 = bitcast double* %a to i64* ; CHECK-NEXT: %6 = load i64, i64* %5, align 8 ; CHECK-NEXT: %7 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs4, i64 0, i64 0 @@ -102,11 +104,6 @@ entry: %3 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i64 0, i64 0 %4 = bitcast [1 x i8*]* %.offload_ptrs to double** store double* %a, double** %4, align 8 - ; FIXME: This setup for the runtime call __tgt_target_data_begin_mapper should be - ; split into its "issue" and "wait" counterpars and moved upwards - ; and downwards, respectively. - ; %handle = call i8* @__tgt_target_data_begin_mapper_issue(...) - ; call void @__tgt_target_data_begin_wait(i64 -1, %struct.__tgt_async_info %handle) call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null) %5 = bitcast double* %a to i64* @@ -186,7 +183,10 @@ define dso_local i32 @heavyComputation2(double* %a, i32 %size) { ; CHECK-NEXT: store i32* %size.addr, i32** %9, align 8 ; CHECK-NEXT: %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1 ; CHECK-NEXT: store i64 4, i64* %10, align 8 -; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null) + +; CHECK-NEXT: %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 2, i8** %1, i8** %3, i64* %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null) +; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle) + ; CHECK-NEXT: %11 = load i32, i32* %size.addr, align 4 ; CHECK-NEXT: %size.casted = zext i32 %11 to i64 ; CHECK-NEXT: %12 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 0 @@ -241,12 +241,6 @@ entry: store i32* %size.addr, i32** %9, align 8 %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1 store i64 4, i64* %10, align 8 - ; FIXME: This setup for the runtime call __tgt_target_data_begin_mapper should be - ; split into its "issue" and "wait" counterpars and moved upwards - ; and downwards, respectively. Here though, the "issue" cannot be moved upwards - ; because it's not guaranteed that rand() won't modify *a. - ; %handle = call i8* @__tgt_target_data_begin_mapper_issue(...) - ; call void @__tgt_target_data_begin_wait(i64 -1, %struct.__tgt_async_info %handle) call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null) %11 = load i32, i32* %size.addr, align 4 @@ -330,7 +324,10 @@ define dso_local i32 @heavyComputation3(double* noalias %a, i32 %size) { ; CHECK-NEXT: store i32* %size.addr, i32** %9, align 8 ; CHECK-NEXT: %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1 ; CHECK-NEXT: store i64 4, i64* %10, align 8 -; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null) + +; CHECK-NEXT: %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 2, i8** %1, i8** %3, i64* %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null) +; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle) + ; CHECK-NEXT: %11 = load i32, i32* %size.addr, align 4 ; CHECK-NEXT: %size.casted = zext i32 %11 to i64 ; CHECK-NEXT: %12 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 0 @@ -386,11 +383,6 @@ entry: store i32* %size.addr, i32** %9, align 8 %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1 store i64 4, i64* %10, align 8 - ; FIXME: This setup for the runtime call __tgt_target_data_begin_mapper should be - ; split into its "issue" and "wait" counterpars and moved upwards - ; and downwards, respectively. - ; %handle = call i8* @__tgt_target_data_begin_mapper_issue(...) - ; call void @__tgt_target_data_begin_wait(i64 -1, %struct.__tgt_async_info %handle) call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null) %11 = load i32, i32* %size.addr, align 4 @@ -459,7 +451,10 @@ define dso_local i32 @dataTransferOnly1(double* noalias %a, i32 %size) { ; CHECK-NEXT: store double* %a, double** %4, align 8 ; CHECK-NEXT: %5 = getelementptr inbounds [1 x i64], [1 x i64]* %.offload_sizes, i64 0, i64 0 ; CHECK-NEXT: store i64 %0, i64* %5, align 8 -; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null) + +; CHECK-NEXT: %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 1, i8** %1, i8** %3, i64* %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null) +; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle) + ; CHECK-NEXT: %rem = urem i32 %call, %size ; CHECK-NEXT: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null) ; CHECK-NEXT: ret i32 %rem @@ -482,13 +477,6 @@ entry: store double* %a, double** %4, align 8 %5 = getelementptr inbounds [1 x i64], [1 x i64]* %.offload_sizes, i64 0, i64 0 store i64 %0, i64* %5, align 8 - ; FIXME: This setup for the runtime call __tgt_target_data_begin_mapper should be - ; split into its "issue" and "wait" counterpars and moved upwards - ; and downwards, respectively. Here though, the "wait" cannot be moved downwards - ; because it is not worthit. That is, there is no store nor call to be hoisted - ; over. - ; %handle = call i8* @__tgt_target_data_begin_mapper_issue(...) - ; call void @__tgt_target_data_begin_wait(i64 -1, %struct.__tgt_async_info %handle) call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null) %rem = urem i32 %call, %size @@ -503,7 +491,5 @@ declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8* declare dso_local i32 @rand(...) -; FIXME: These two function declarations must be generated after splitting the runtime function -; __tgt_target_data_begin_mapper. -; declare %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64, i32, i8**, i8**, i64*, i64*, i8**) -; declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info) +; CHECK: declare %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK: declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info) diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll index 07976660546f8f..b9e739a62b5b97 100644 --- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll @@ -27,7 +27,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 define void @delete_parallel_0() { ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_0() ; CHECK-NEXT: entry: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) ; CHECK-NEXT: ret void ; entry: @@ -99,9 +99,9 @@ entry: define void @delete_parallel_1() { ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_1() ; CHECK-NEXT: entry: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) ; CHECK-NEXT: ret void ; entry: @@ -190,10 +190,10 @@ define void @delete_parallel_2() { ; CHECK-NEXT: [[TMP:%.*]] = bitcast i32* [[A]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull align 4 dereferenceable(4) [[TMP]]) #0 ; CHECK-NEXT: store i32 0, i32* [[A]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP1]]) ; CHECK-NEXT: ret void @@ -214,7 +214,7 @@ entry: define internal void @.omp_outlined..3(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree nonnull align 4 dereferenceable(4) [[A:%.*]]) #6 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #6 ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #4 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 @@ -244,7 +244,7 @@ if.end: ; preds = %if.then, %entry define internal void @.omp_outlined..4(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 -; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull align 4 dereferenceable(4) [[A:%.*]]) +; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* nonnull @0, i32 [[TMP]]) @@ -286,7 +286,7 @@ declare void @__kmpc_end_master(%struct.ident_t*, i32) define internal void @.omp_outlined..5(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 -; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull align 4 dereferenceable(4) [[A:%.*]]) +; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 @@ -324,7 +324,7 @@ omp_if.end: ; preds = %entry, %omp_if.then define internal void @.omp_outlined..6(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6 -; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull align 4 dereferenceable(4) [[A:%.*]]) +; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A1:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/limit.ll b/llvm/test/Transforms/SLPVectorizer/X86/limit.ll index 41db490a754f6b..e6d78c0c0e378e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/limit.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/limit.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s --instcombine -slp-vectorizer -S | FileCheck %s +; RUN: opt < %s -slp-vectorizer -S | FileCheck %s + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -31,40 +32,40 @@ define void @addsub() { ; CHECK-NEXT: ret void ; entry: - %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 0), align 4 - %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 0), align 4 - %add = add nsw i32 %0, %1 br label %bb1 -bb1: ; preds = %entry - %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 0), align 4 - %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 0), align 4 + +bb1: + %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i64 0), align 16 + %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i64 0, i64 0), align 16 + %add = add nsw i32 %0, %1 + %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i64 0, i64 0), align 16 + %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i64 0, i64 0), align 16 %add1 = add nsw i32 %2, %3 %add2 = add nsw i32 %add, %add1 - store i32 %add2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 0), align 4 - %4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 1), align 4 - %5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 1), align 4 + store i32 %add2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 0), align 16 + %4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i64 1), align 4 + %5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i64 0, i64 1), align 4 %add3 = add nsw i32 %4, %5 - %6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 1), align 4 - %7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 1), align 4 + %6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i64 0, i64 1), align 4 + %7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i64 0, i64 1), align 4 %add4 = add nsw i32 %6, %7 %sub = sub nsw i32 %add3, %add4 - store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 1), align 4 - %8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 2), align 4 - %9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 2), align 4 + store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 1), align 4 + %8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i64 2), align 8 + %9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i64 0, i64 2), align 8 %add5 = add nsw i32 %8, %9 - %10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 2), align 4 - %11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 2), align 4 + %10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i64 0, i64 2), align 8 + %11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i64 0, i64 2), align 8 %add6 = add nsw i32 %10, %11 %add7 = add nsw i32 %add5, %add6 - store i32 %add7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 2), align 4 - %12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 3), align 4 - %13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 3), align 4 + store i32 %add7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 2), align 8 + %12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i64 3), align 4 + %13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i64 0, i64 3), align 4 %add8 = add nsw i32 %12, %13 - %14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 3), align 4 - %15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 3), align 4 + %14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i64 0, i64 3), align 4 + %15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i64 0, i64 3), align 4 %add9 = add nsw i32 %14, %15 %sub10 = sub nsw i32 %add8, %add9 - store i32 %sub10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 3), align 4 + store i32 %sub10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 3), align 4 ret void } - diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index 524f48332b7cb8..104c8c2d025faf 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -373,3 +373,29 @@ define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceabl %r = insertelement <8 x i32> undef, i32 %s, i32 0 ret <8 x i32> %r } + +; TODO: Should load v4f32. + +define <8 x float> @load_f32_insert_v8f32(float* align 16 dereferenceable(16) %p) { +; CHECK-LABEL: @load_f32_insert_v8f32( +; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> undef, float [[S]], i32 0 +; CHECK-NEXT: ret <8 x float> [[R]] +; + %s = load float, float* %p, align 4 + %r = insertelement <8 x float> undef, float %s, i32 0 + ret <8 x float> %r +} + +; TODO: Should load v4f32. + +define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) { +; CHECK-LABEL: @load_f32_insert_v2f32( +; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> undef, float [[S]], i32 0 +; CHECK-NEXT: ret <2 x float> [[R]] +; + %s = load float, float* %p, align 4 + %r = insertelement <2 x float> undef, float %s, i32 0 + ret <2 x float> %r +} diff --git a/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll b/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll index 589375ac6f55b2..bd717dfc85b388 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll +++ b/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll @@ -23,6 +23,11 @@ ; ; int boo(int, int) {} +; struct T { +; void empty(); +; }; +; void T::empty() {} + ; Following variables/arguments/members should be counted: ; - GlobalConst, ; - Global, @@ -30,16 +35,17 @@ ; - square::i, ; - cube::i, cube::squared ; - boo::1, boo::2 +; - this in T::empty() ; Skipped entities: ; - declaration of test::a, ; - non-constant member S:fn, ; - arguments of S:fn. -; CHECK: "#unique source variables":9 +; CHECK: "#unique source variables":10 ; +1 extra inline i. -; CHECK: "#source variables":10 +; CHECK: "#source variables":11 ; -1 square::i -; CHECK: "#source variables with location":9 +; CHECK: "#source variables with location":10 ; CHECK: "sum_all_local_vars(#bytes in parent scope)":[[BYTES:[0-9]+]] ; Because of the dbg.value in the middle of the function, the pc range coverage ; must be below 100%. @@ -48,11 +54,11 @@ ; CHECK: "sum_all_local_vars(#bytes in parent scope covered by DW_AT_location)": ; CHECK: "#bytes witin functions":[[FUNCSIZE:[0-9]+]] ; CHECK: "#bytes witin inlined functions":[[INLINESIZE:[0-9]+]] -; CHECK: "#bytes in __debug_info":380 +; CHECK: "#bytes in __debug_info":459 ; CHECK: "#bytes in __debug_loc":35 -; CHECK: "#bytes in __debug_abbrev":303 -; CHECK: "#bytes in __debug_line":117 -; CHECK: "#bytes in __debug_str":204 +; CHECK: "#bytes in __debug_abbrev":384 +; CHECK: "#bytes in __debug_line":126 +; CHECK: "#bytes in __debug_str":231 ; ModuleID = '/tmp/quality.cpp' source_filename = "/tmp/quality.cpp" @@ -118,6 +124,17 @@ entry: ret i32 0, !dbg !58 } +%struct.T = type { i8 } + +define void @_ZN1T5emptyEv(%struct.T* %this) #2 !dbg !59 { +entry: + %this.addr = alloca %struct.T*, align 8 + store %struct.T* %this, %struct.T** %this.addr, align 8 + call void @llvm.dbg.declare(metadata %struct.T** %this.addr, metadata !67, metadata !DIExpression()), !dbg !69 + %this1 = load %struct.T*, %struct.T** %this.addr, align 8 + ret void, !dbg !70 +} + attributes #0 = { alwaysinline nounwind ssp uwtable } attributes #1 = { nounwind readnone speculatable } attributes #2 = { noinline nounwind optnone ssp uwtable } @@ -185,3 +202,16 @@ attributes #2 = { noinline nounwind optnone ssp uwtable } !56 = !DILocation(line: 10, column: 12, scope: !52) !57 = !DILocalVariable(arg: 2, scope: !52, file: !3, line: 10, type: !8) !58 = !DILocation(line: 10, column: 17, scope: !52) + +!59 = distinct !DISubprogram(name: "empty", linkageName: "_ZN1T5emptyEv", scope: !60, file: !3, line: 25, type: !63, scopeLine: 25, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, declaration: !62, retainedNodes: !4) +!60 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "T", file: !3, line: 22, size: 8, flags: DIFlagTypePassByValue, elements: !61, identifier: "_ZTS1T") +!61 = !{!62} +!62 = !DISubprogram(name: "empty", linkageName: "_ZN1T5emptyEv", scope: !60, file: !3, line: 23, type: !63, scopeLine: 23, flags: DIFlagPrototyped, spFlags: 0) +!63 = !DISubroutineType(types: !64) +!64 = !{!65, !66} +!65 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) +!66 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !60, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer) +!67 = !DILocalVariable(name: "this", arg: 1, scope: !59, type: !68, flags: DIFlagArtificial | DIFlagObjectPointer) +!68 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !60, size: 64) +!69 = !DILocation(line: 0, scope: !59) +!70 = !DILocation(line: 25, column: 19, scope: !59) diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll new file mode 100644 index 00000000000000..e49ee40782bb77 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll @@ -0,0 +1,83 @@ +define amdgpu_kernel void @test_kernel() { + ret void +} + +; Test subtarget detection. Disassembly is only supported for GFX8 and beyond. +; +; ----------------------------------GFX10-------------------------------------- +; +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1030 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1012 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1011 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1010 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + + +; ----------------------------------GFX9--------------------------------------- +; +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx909 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx908 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx906 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx904 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx902 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx900 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + + +; ----------------------------------GFX8--------------------------------------- +; +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx810 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx803 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx802 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx801 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt diff --git a/llvm/test/tools/llvm-readobj/ELF/Inputs/got-over.exe.elf-mips b/llvm/test/tools/llvm-readobj/ELF/Inputs/got-over.exe.elf-mips deleted file mode 100644 index 27644bff3302a4..00000000000000 Binary files a/llvm/test/tools/llvm-readobj/ELF/Inputs/got-over.exe.elf-mips and /dev/null differ diff --git a/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test b/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test index 3ffdd57486a0e0..d73f55b5fe6a57 100644 --- a/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test +++ b/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test @@ -1,51 +1,75 @@ -# If the offset and/or size fields of the PT_DYNAMIC field become corrupted, -# we should report a sensible message. +## If the offset and/or size fields of the PT_DYNAMIC field become corrupted, +## we should report a sensible message. -# Creating such a malformed file is hard. The easiest way to simulate it is to -# truncate the file. Note that the section headers must first be stripped or -# llvm-readobj will fail to parse the file due to the section header table -# offset pointing outside the file. +## Case A: Test case where the size of the PT_DYNAMIC header is too large to fit in the file, +## but the start is within the file. -# RUN: yaml2obj %s -o %t.base -# RUN: llvm-objcopy --strip-sections %t.base %t.stripped +## Case A.1: the section header table is present in the object. Check that we report a warning about the +## broken PT_DYNAMIC header, check we dump the dynamic table. +# RUN: yaml2obj %s -DFILESIZE=0x131 -o %t1 +# RUN: llvm-readobj %t1 --dynamic-table 2>&1 | FileCheck -DFILE=%t1 %s --check-prefixes=WARN1,WARN1-LLVM +# RUN: llvm-readelf %t1 --dynamic-table 2>&1 | FileCheck -DFILE=%t1 %s --check-prefixes=WARN1,WARN1-GNU -# Test case where the size is too large to fit in the file, but the start is -# within the file. -# RUN: cp %t.stripped %t.truncated1 -# RUN: %python -c "with open(r'%t.truncated1', 'r+') as f: f.truncate(0x1001)" -# RUN: llvm-readobj %t.truncated1 --dynamic-table 2>&1 | \ -# RUN: FileCheck -DFILE=%t.truncated1 %s --check-prefix=WARN1 +# WARN1: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0x131) exceeds the size of the file (0x1130) -# WARN1: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0x10) exceeds the size of the file (0x1001) +# WARN1-LLVM: DynamicSection [ (1 entries) +# WARN1-LLVM-NEXT: Tag Type Name/Value +# WARN1-LLVM-NEXT: 0x0000000000000000 NULL 0x0 +# WARN1-LLVM-NEXT: ] -# Test case where the offset is too large to be in the file. -# RUN: cp %t.stripped %t.truncated2 -# RUN: %python -c "with open(r'%t.truncated2', 'r+') as f: f.truncate(0xFFF)" -# RUN: llvm-readobj %t.truncated2 --dynamic-table 2>&1 | \ -# RUN: FileCheck -DFILE=%t.truncated2 %s --check-prefix=WARN2 +# WARN1-GNU: Dynamic section at offset 0x1000 contains 1 entries: +# WARN1-GNU-NEXT: Tag Type Name/Value +# WARN1-GNU-NEXT: 0x0000000000000000 (NULL) 0x0 -# WARN2: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0x10) exceeds the size of the file (0xfff) +## Case A.2: in this case we drop section headers. The dynamic table is not dumped. +# RUN: yaml2obj %s -DFILESIZE=0x119 -DNOHEADERS=true -o %t1.noheaders +# RUN: llvm-readobj %t1.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t1.noheaders %s \ +# RUN: --check-prefix=WARN1-NOHEADERS --implicit-check-not="DynamicSection [" +# RUN: llvm-readelf %t1.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t1.noheaders %s \ +# RUN: --check-prefix=WARN1-NOHEADERS --implicit-check-not="Dynamic section" + +# WARN1-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0x119) exceeds the size of the file (0x1118) + +## Case B: Test case where the offset of the PT_DYNAMIC header is too large to be in the file. + +## Case B.1: the section header table is present in the object. Check that we report a warning about the +## broken PT_DYNAMIC header, but document that we do not dump the dynamic table, because +## return an error earlier. +# RUN: yaml2obj %s -DOFFSET=0x1131 -o %t2 +# RUN: not llvm-readobj %t2 --dynamic-table 2>&1 | FileCheck -DFILE=%t2 %s --check-prefix=WARN2 +# RUN: not llvm-readelf %t2 --dynamic-table 2>&1 | FileCheck -DFILE=%t2 %s --check-prefix=WARN2 + +# WARN2: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1131) + file size (0x10) exceeds the size of the file (0x1130) +# WARN2: error: '[[FILE]]': Invalid data was encountered while parsing the file + +## Case B.2: in this case we drop section headers. The dynamic table is not dumped. +# RUN: yaml2obj %s -DOFFSET=0x1119 -DNOHEADERS=true -o %t2.noheaders +# RUN: llvm-readobj %t2.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t2.noheaders %s \ +# RUN: --check-prefix=WARN2-NOHEADERS --implicit-check-not="DynamicSection [" +# RUN: llvm-readelf %t2.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t2.noheaders %s \ +# RUN: --check-prefix=WARN2-NOHEADERS --implicit-check-not="Dynamic section" + +# WARN2-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1119) + file size (0x10) exceeds the size of the file (0x1118) --- !ELF FileHeader: Class: ELFCLASS64 Data: ELFDATA2LSB Type: ET_EXEC - Machine: EM_X86_64 + Machine: EM_NONE Sections: - - Name: .dynamic - Type: SHT_DYNAMIC - Address: 0x1000 - AddressAlign: 0x1000 + - Name: .dynamic + Type: SHT_DYNAMIC + Address: 0x1000 + Offset: 0x1000 + ShOffset: [[OFFSET=]] Entries: - Tag: DT_NULL Value: 0 ProgramHeaders: - - Type: PT_LOAD - VAddr: 0x1000 - Sections: - - Section: .dynamic - - Type: PT_DYNAMIC - VAddr: 0x1000 + - Type: PT_DYNAMIC + FileSize: [[FILESIZE=]] Sections: - Section: .dynamic +SectionHeaderTable: + NoHeaders: [[NOHEADERS=false]] diff --git a/llvm/test/tools/llvm-readobj/ELF/mips-got-overlapped.test b/llvm/test/tools/llvm-readobj/ELF/mips-got-overlapped.test deleted file mode 100644 index c8f81ccf9d2804..00000000000000 --- a/llvm/test/tools/llvm-readobj/ELF/mips-got-overlapped.test +++ /dev/null @@ -1,45 +0,0 @@ -# Check that llvm-readobj -A correctly shows .got section -# content if there are some other zero-sized sections with the same -# address as the .got. got-over.exe.elf-mips has zero-sized .data -# section at the same offset .got section. - -# RUN: llvm-readobj -A %p/Inputs/got-over.exe.elf-mips | FileCheck %s - -# GOT-OBJ: Cannot find PLTGOT dynamic table tag. - -# CHECK: Primary GOT { -# CHECK-NEXT: Canonical gp value: 0x418270 -# CHECK-NEXT: Reserved entries [ -# CHECK-NEXT: Entry { -# CHECK-NEXT: Address: 0x410280 -# CHECK-NEXT: Access: -32752 -# CHECK-NEXT: Initial: 0x0 -# CHECK-NEXT: Purpose: Lazy resolver -# CHECK-NEXT: } -# CHECK-NEXT: Entry { -# CHECK-NEXT: Address: 0x410284 -# CHECK-NEXT: Access: -32748 -# CHECK-NEXT: Initial: 0x80000000 -# CHECK-NEXT: Purpose: Module pointer (GNU extension) -# CHECK-NEXT: } -# CHECK-NEXT: ] -# CHECK-NEXT: Local entries [ -# CHECK-NEXT: Entry { -# CHECK-NEXT: Address: 0x410288 -# CHECK-NEXT: Access: -32744 -# CHECK-NEXT: Initial: 0x4001B8 -# CHECK-NEXT: } -# CHECK-NEXT: ] -# CHECK-NEXT: Global entries [ -# CHECK-NEXT: Entry { -# CHECK-NEXT: Address: 0x41028C -# CHECK-NEXT: Access: -32740 -# CHECK-NEXT: Initial: 0x0 -# CHECK-NEXT: Value: 0x0 -# CHECK-NEXT: Type: None -# CHECK-NEXT: Section: Undefined -# CHECK-NEXT: Name: _foo -# CHECK-NEXT: } -# CHECK-NEXT: ] -# CHECK-NEXT: Number of TLS and multi-GOT entries: 0 -# CHECK-NEXT: } diff --git a/llvm/test/tools/llvm-readobj/ELF/mips-got.test b/llvm/test/tools/llvm-readobj/ELF/mips-got.test index 7475a6d57d578d..24a06dd2b3bbd7 100644 --- a/llvm/test/tools/llvm-readobj/ELF/mips-got.test +++ b/llvm/test/tools/llvm-readobj/ELF/mips-got.test @@ -579,3 +579,75 @@ Sections: # RUN: llvm-readobj -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: # NAME-ERR-NOTFOUND: warning: '[[FILE]]': unable to read the name of SHT_PROGBITS section with index 2: a section [index 2] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table + +## Check that we correctly show .got section content when there are some other zero-sized +## sections with the same address as the .got section. +## In this test the empty .data section has the same address as the .got section. + +# RUN: yaml2obj --docnum=4 %s -o %t.err7.o +# RUN: llvm-readobj -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=SAME-ADDR-LLVM +# RUN: llvm-readelf -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=SAME-ADDR-GNU + +# SAME-ADDR-LLVM: Primary GOT { +# SAME-ADDR-LLVM-NEXT: Canonical gp value: 0x9112 +# SAME-ADDR-LLVM-NEXT: Reserved entries [ +# SAME-ADDR-LLVM-NEXT: Entry { +# SAME-ADDR-LLVM-NEXT: Address: 0x1122 +# SAME-ADDR-LLVM-NEXT: Access: -32752 +# SAME-ADDR-LLVM-NEXT: Initial: 0x0 +# SAME-ADDR-LLVM-NEXT: Purpose: Lazy resolver +# SAME-ADDR-LLVM-NEXT: } +# SAME-ADDR-LLVM-NEXT: ] +# SAME-ADDR-LLVM-NEXT: Local entries [ +# SAME-ADDR-LLVM-NEXT: ] +# SAME-ADDR-LLVM-NEXT: Global entries [ +# SAME-ADDR-LLVM-NEXT: Entry { +# SAME-ADDR-LLVM-NEXT: Address: 0x112A +# SAME-ADDR-LLVM-NEXT: Access: -32744 +# SAME-ADDR-LLVM-NEXT: Initial: 0x0 +# SAME-ADDR-LLVM-NEXT: Value: 0x0 +# SAME-ADDR-LLVM-NEXT: Type: None (0x0) +# SAME-ADDR-LLVM-NEXT: Section: Undefined (0x0) +# SAME-ADDR-LLVM-NEXT: Name: foo (1) +# SAME-ADDR-LLVM-NEXT: } +# SAME-ADDR-LLVM-NEXT: ] +# SAME-ADDR-LLVM-NEXT: Number of TLS and multi-GOT entries: 0 +# SAME-ADDR-LLVM-NEXT: } + +# SAME-ADDR-GNU: Primary GOT: +# SAME-ADDR-GNU-NEXT: Canonical gp value: 0000000000009112 +# SAME-ADDR-GNU-EMPTY: +# SAME-ADDR-GNU-NEXT: Reserved entries: +# SAME-ADDR-GNU-NEXT: Address Access Initial Purpose +# SAME-ADDR-GNU-NEXT: 0000000000001122 -32752(gp) 0000000000000000 Lazy resolver +# SAME-ADDR-GNU-EMPTY: +# SAME-ADDR-GNU-NEXT: Global entries: +# SAME-ADDR-GNU-NEXT: Address Access Initial Sym.Val. Type Ndx Name +# SAME-ADDR-GNU-NEXT: 000000000000112a -32744(gp) 0000000000000000 0000000000000000 NOTYPE UND foo + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_MIPS +Sections: + - Name: .data + Type: SHT_PROGBITS + Address: 0x1122 + Size: 0 + - Name: .got + Type: SHT_PROGBITS + Address: 0x1122 + Size: 16 + - Name: .dynamic + Type: SHT_DYNAMIC + Entries: + - Tag: DT_MIPS_LOCAL_GOTNO + Value: 1 + - Tag: DT_MIPS_GOTSYM + Value: 1 + - Tag: DT_PLTGOT + Value: 0x1122 +DynamicSymbols: + - Name: foo diff --git a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error1.s b/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error1.s deleted file mode 100644 index 07fbd78b09ece2..00000000000000 --- a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error1.s +++ /dev/null @@ -1,8 +0,0 @@ -// REQUIRES: x86-registered-target -// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t -// RUN: llvm-readobj --relocations %t 2>&1 | FileCheck %s -DFILE=%t - -// CHECK: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: invalid packed relocation header - -.section .rela.dyn, "a", @0x60000001 -.ascii "APS9" diff --git a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error2.s b/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error2.s deleted file mode 100644 index ea14995e0ded11..00000000000000 --- a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error2.s +++ /dev/null @@ -1,8 +0,0 @@ -// REQUIRES: x86-registered-target -// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t -// RUN: llvm-readobj --relocations %t 2>&1 | FileCheck %s -DFILE=%t - -// CHECK: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: malformed sleb128, extends past end - -.section .rela.dyn, "a", @0x60000001 -.ascii "APS2" diff --git a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error3.s b/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error3.s deleted file mode 100644 index 766c551295ae62..00000000000000 --- a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error3.s +++ /dev/null @@ -1,10 +0,0 @@ -// REQUIRES: x86-registered-target -// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t -// RUN: llvm-readobj --relocations %t 2>&1 | FileCheck %s -DFILE=%t - -// CHECK: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: malformed sleb128, extends past end - -.section .rela.dyn, "a", @0x60000001 -.ascii "APS2" -.sleb128 4 // Number of relocations -.sleb128 0 // Initial offset diff --git a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error4.s b/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error4.s deleted file mode 100644 index 191e0b7885c46e..00000000000000 --- a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error4.s +++ /dev/null @@ -1,14 +0,0 @@ -// REQUIRES: x86-registered-target -// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t -// RUN: llvm-readobj --relocations %t 2>&1 | FileCheck %s -DFILE=%t - -// CHECK: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: malformed sleb128, extends past end - -.section .rela.dyn, "a", @0x60000001 -.ascii "APS2" -.sleb128 4 // Number of relocations -.sleb128 0 // Initial offset - -.sleb128 2 // Number of relocations in group -.sleb128 2 // RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG -.sleb128 8 // offset delta diff --git a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error5.s b/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error5.s deleted file mode 100644 index 8a6d6560f52056..00000000000000 --- a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error5.s +++ /dev/null @@ -1,14 +0,0 @@ -// REQUIRES: x86-registered-target -// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t -// RUN: llvm-readobj --relocations %t 2>&1 | FileCheck %s -DFILE=%t - -// CHECK: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: relocation group unexpectedly large - -.section .rela.dyn, "a", @0x60000001 -.ascii "APS2" -.sleb128 4 // Number of relocations -.sleb128 0 // Initial offset - -.sleb128 5 // Number of relocations in group -.sleb128 2 // RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG -.sleb128 8 // offset delta diff --git a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-errors.s b/llvm/test/tools/llvm-readobj/ELF/packed-relocs-errors.s new file mode 100644 index 00000000000000..4f2e65ed220f7a --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/packed-relocs-errors.s @@ -0,0 +1,66 @@ +# REQUIRES: x86-registered-target + +## Test that we report meaningful warnings when dumping +## broken Android's packed relocation sections. + +# RUN: split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t/asm1.s -o %t1.o +# RUN: llvm-readobj --relocations %t1.o 2>&1 | FileCheck %s -DFILE=%t1.o --check-prefix=ERR-HEADER +# RUN: llvm-readelf --relocations %t1.o 2>&1 | FileCheck %s -DFILE=%t1.o --check-prefix=ERR-HEADER + +#--- asm1.s +.section .rela.dyn, "a", @0x60000001 +.ascii "APS9" + +# ERR-HEADER: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: invalid packed relocation header + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t/asm2.s -o %t2.o +# RUN: llvm-readobj --relocations %t2.o 2>&1 | FileCheck %s -DFILE=%t2.o --check-prefix=ERR-PAST-END +# RUN: llvm-readelf --relocations %t2.o 2>&1 | FileCheck %s -DFILE=%t2.o --check-prefix=ERR-PAST-END + +#--- asm2.s +.section .rela.dyn, "a", @0x60000001 +.ascii "APS2" + +# ERR-PAST-END: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: malformed sleb128, extends past end + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t/asm3.s -o %t3.o +# RUN: llvm-readobj --relocations %t3.o 2>&1 | FileCheck %s -DFILE=%t3.o --check-prefix=ERR-PAST-END +# RUN: llvm-readelf --relocations %t3.o 2>&1 | FileCheck %s -DFILE=%t3.o --check-prefix=ERR-PAST-END + +#--- asm3.s +.section .rela.dyn, "a", @0x60000001 +.ascii "APS2" +.sleb128 4 ## Number of relocations +.sleb128 0 ## Initial offset + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t/asm4.s -o %t4.o +# RUN: llvm-readobj --relocations %t4.o 2>&1 | FileCheck %s -DFILE=%t4.o --check-prefix=ERR-PAST-END +# RUN: llvm-readelf --relocations %t4.o 2>&1 | FileCheck %s -DFILE=%t4.o --check-prefix=ERR-PAST-END + +#--- asm4.s +.section .rela.dyn, "a", @0x60000001 +.ascii "APS2" +.sleb128 4 ## Number of relocations +.sleb128 0 ## Initial offset + +.sleb128 2 ## Number of relocations in group +.sleb128 2 ## RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG +.sleb128 8 ## offset delta + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t/asm5.s -o %t5.o +# RUN: llvm-readobj --relocations %t5.o 2>&1 | FileCheck %s -DFILE=%t5.o --check-prefix=ERR-LARGE +# RUN: llvm-readelf --relocations %t5.o 2>&1 | FileCheck %s -DFILE=%t5.o --check-prefix=ERR-LARGE + +# ERR-LARGE: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: relocation group unexpectedly large + +#--- asm5.s +.section .rela.dyn, "a", @0x60000001 +.ascii "APS2" +.sleb128 4 ## Number of relocations +.sleb128 0 ## Initial offset + +.sleb128 5 ## Number of relocations in group +.sleb128 2 ## RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG +.sleb128 8 ## offset delta diff --git a/llvm/test/tools/yaml2obj/ELF/eflags.yaml b/llvm/test/tools/yaml2obj/ELF/eflags.yaml new file mode 100644 index 00000000000000..8b90a2b2c94451 --- /dev/null +++ b/llvm/test/tools/yaml2obj/ELF/eflags.yaml @@ -0,0 +1,16 @@ +## Check how the 'Flags' key can be used to encode e_flags field values. + +## Check we are able to produce no flags for EM_NONE. EM_NONE is an arbitrary +## e_machine type that has no EF_* values defined for it. +# RUN: yaml2obj %s -o %t-no-flags +# RUN: llvm-readelf --file-headers %t-no-flags | FileCheck %s --check-prefix=NOFLAGS + +# NOFLAGS: Flags: 0x0{{$}} + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_NONE + Flags: [ ] diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index ecdd227d08bda0..6b3ecd9cef1939 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -2170,6 +2170,10 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { if (!AsmInfo) reportError(Obj->getFileName(), "no assembly info for target " + TripleName); + + if (MCPU.empty()) + MCPU = Obj->tryGetCPUName().getValueOr("").str(); + std::unique_ptr STI( TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString())); if (!STI) diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 3a9fb59cae6f03..bd54b6d7e35919 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -3051,4 +3051,85 @@ TEST_F(AArch64GISelMITest, MoreElementsFreeze) { EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; } +// Test fewer elements of G_INSERT_VECTOR_ELEMENT +TEST_F(AArch64GISelMITest, FewerElementsInsertVectorElt) { + setUp(); + if (!TM) + return; + + DefineLegalizerInfo(A, {}); + + LLT P0{LLT::pointer(0, 64)}; + LLT S64{LLT::scalar(64)}; + LLT S16{LLT::scalar(16)}; + LLT V2S16{LLT::vector(2, 16)}; + LLT V3S16{LLT::vector(3, 16)}; + LLT V8S16{LLT::vector(8, 16)}; + + auto Ptr0 = B.buildIntToPtr(P0, Copies[0]); + auto VectorV8 = B.buildLoad(V8S16, Ptr0, MachinePointerInfo(), Align(8)); + auto Value = B.buildTrunc(S16, Copies[1]); + + auto Seven = B.buildConstant(S64, 7); + auto InsertV8Constant7_0 = + B.buildInsertVectorElement(V8S16, VectorV8, Value, Seven); + auto InsertV8Constant7_1 = + B.buildInsertVectorElement(V8S16, VectorV8, Value, Seven); + + B.buildStore(InsertV8Constant7_0, Ptr0, MachinePointerInfo(), Align(8), + MachineMemOperand::MOVolatile); + B.buildStore(InsertV8Constant7_1, Ptr0, MachinePointerInfo(), Align(8), + MachineMemOperand::MOVolatile); + + AInfo Info(MF->getSubtarget()); + DummyGISelObserver Observer; + LegalizerHelper Helper(*MF, Info, Observer, B); + + // Perform Legalization + B.setInsertPt(*EntryMBB, InsertV8Constant7_0->getIterator()); + + // This should index the high element of the 4th piece of an unmerge. + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.fewerElementsVector(*InsertV8Constant7_0, 0, V2S16)); + + // This case requires extracting an intermediate vector type into the target + // v4s16. + B.setInsertPt(*EntryMBB, InsertV8Constant7_1->getIterator()); + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.fewerElementsVector(*InsertV8Constant7_1, 0, V3S16)); + + const auto *CheckStr = R"( + CHECK: [[COPY0:%[0-9]+]]:_(s64) = COPY + CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY + CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY + CHECK: [[PTR0:%[0-9]+]]:_(p0) = G_INTTOPTR [[COPY0]] + CHECK: [[VEC8:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR0]]:_(p0) :: (load 16, align 8) + CHECK: [[INSERT_VAL:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]] + + + CHECK: [[UNMERGE0:%[0-9]+]]:_(<2 x s16>), [[UNMERGE1:%[0-9]+]]:_(<2 x s16>), [[UNMERGE2:%[0-9]+]]:_(<2 x s16>), [[UNMERGE3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[VEC8]] + CHECK: [[ONE:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + CHECK: [[SUB_INSERT_7:%[0-9]+]]:_(<2 x s16>) = G_INSERT_VECTOR_ELT [[UNMERGE3]]:_, [[INSERT_VAL]]:_(s16), [[ONE]] + CHECK: [[INSERT_V8_7_0:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UNMERGE0]]:_(<2 x s16>), [[UNMERGE1]]:_(<2 x s16>), [[UNMERGE2]]:_(<2 x s16>), [[SUB_INSERT_7]]:_(<2 x s16>) + + + CHECK: [[UNMERGE1_0:%[0-9]+]]:_(s16), [[UNMERGE1_1:%[0-9]+]]:_(s16), [[UNMERGE1_2:%[0-9]+]]:_(s16), [[UNMERGE1_3:%[0-9]+]]:_(s16), [[UNMERGE1_4:%[0-9]+]]:_(s16), [[UNMERGE1_5:%[0-9]+]]:_(s16), [[UNMERGE1_6:%[0-9]+]]:_(s16), [[UNMERGE1_7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[VEC8]]:_(<8 x s16>) + CHECK: [[IMPDEF_S16:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + CHECK: [[BUILD0:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UNMERGE1_0]]:_(s16), [[UNMERGE1_1]]:_(s16), [[UNMERGE1_2]]:_(s16) + CHECK: [[BUILD1:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UNMERGE1_3]]:_(s16), [[UNMERGE1_4]]:_(s16), [[UNMERGE1_5]]:_(s16) + CHECK: [[BUILD2:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UNMERGE1_6]]:_(s16), [[UNMERGE1_7]]:_(s16), [[IMPDEF_S16]]:_(s16) + CHECK: [[IMPDEF_V3S16:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + CHECK: [[ONE_1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + CHECK: [[SUB_INSERT_7_V3S16:%[0-9]+]]:_(<3 x s16>) = G_INSERT_VECTOR_ELT [[BUILD2]]:_, [[INSERT_VAL]]:_(s16), [[ONE_1]] + CHECK: [[WIDE_CONCAT:%[0-9]+]]:_(<24 x s16>) = G_CONCAT_VECTORS [[BUILD0]]:_(<3 x s16>), [[BUILD1]]:_(<3 x s16>), [[SUB_INSERT_7_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>) + CHECK: [[INSERT_V8_7_1:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[WIDE_CONCAT]]:_(<24 x s16>), 0 + + CHECK: G_STORE [[INSERT_V8_7_0]] + CHECK: G_STORE [[INSERT_V8_7_1]] + )"; + + // Check + EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; +} + } // namespace diff --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp index 792a15dcbfafd3..876e011e1ce8a6 100644 --- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp +++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp @@ -382,7 +382,7 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { return false; case MVE_ASRLi: case MVE_ASRLr: - case MVE_LSRL: + case MVE_LSRL: case MVE_SQRSHR: case MVE_SQSHL: case MVE_SRSHR: @@ -393,7 +393,7 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VABDf32: case MVE_VABDs16: case MVE_VABDs32: - case MVE_VABDs8: + case MVE_VABDs8: case MVE_VABDu16: case MVE_VABDu32: case MVE_VABDu8: @@ -609,6 +609,42 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VIWDUPu16: case MVE_VIWDUPu32: case MVE_VIWDUPu8: + case MVE_VLD20_8: + case MVE_VLD21_8: + case MVE_VLD20_16: + case MVE_VLD21_16: + case MVE_VLD20_32: + case MVE_VLD21_32: + case MVE_VLD20_8_wb: + case MVE_VLD21_8_wb: + case MVE_VLD20_16_wb: + case MVE_VLD21_16_wb: + case MVE_VLD20_32_wb: + case MVE_VLD21_32_wb: + case MVE_VLD40_8: + case MVE_VLD41_8: + case MVE_VLD42_8: + case MVE_VLD43_8: + case MVE_VLD40_16: + case MVE_VLD41_16: + case MVE_VLD42_16: + case MVE_VLD43_16: + case MVE_VLD40_32: + case MVE_VLD41_32: + case MVE_VLD42_32: + case MVE_VLD43_32: + case MVE_VLD40_8_wb: + case MVE_VLD41_8_wb: + case MVE_VLD42_8_wb: + case MVE_VLD43_8_wb: + case MVE_VLD40_16_wb: + case MVE_VLD41_16_wb: + case MVE_VLD42_16_wb: + case MVE_VLD43_16_wb: + case MVE_VLD40_32_wb: + case MVE_VLD41_32_wb: + case MVE_VLD42_32_wb: + case MVE_VLD43_32_wb: case MVE_VLDRBS16: case MVE_VLDRBS16_post: case MVE_VLDRBS16_pre: @@ -657,9 +693,9 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VLDRWU32_rq_u: case MVE_VMOVimmf32: case MVE_VMOVimmi16: - case MVE_VMOVimmi32: + case MVE_VMOVimmi32: case MVE_VMOVimmi64: - case MVE_VMOVimmi8: + case MVE_VMOVimmi8: case MVE_VMOVNi16bh: case MVE_VMOVNi16th: case MVE_VMOVNi32bh: @@ -679,7 +715,7 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VMULLTs8: case MVE_VMULLTu16: case MVE_VMULLTu32: - case MVE_VMULLTu8: + case MVE_VMULLTu8: case MVE_VMUL_qr_f16: case MVE_VMUL_qr_f32: case MVE_VMUL_qr_i16: @@ -702,7 +738,7 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VORR: case MVE_VORRimmi16: case MVE_VORRimmi32: - case MVE_VPST: + case MVE_VPST: case MVE_VQABSs16: case MVE_VQABSs32: case MVE_VQABSs8: @@ -814,7 +850,7 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VRHADDs32: case MVE_VRHADDs8: case MVE_VRHADDu16: - case MVE_VRHADDu32: + case MVE_VRHADDu32: case MVE_VRHADDu8: case MVE_VRINTf16A: case MVE_VRINTf16M: @@ -825,12 +861,12 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VRINTf32A: case MVE_VRINTf32M: case MVE_VRINTf32N: - case MVE_VRINTf32P: - case MVE_VRINTf32X: + case MVE_VRINTf32P: + case MVE_VRINTf32X: case MVE_VRINTf32Z: case MVE_VRSHL_by_vecs16: case MVE_VRSHL_by_vecs32: - case MVE_VRSHL_by_vecs8: + case MVE_VRSHL_by_vecs8: case MVE_VRSHL_by_vecu16: case MVE_VRSHL_by_vecu32: case MVE_VRSHL_by_vecu8: @@ -887,7 +923,7 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VSTRB16_rq: case MVE_VSTRB32: case MVE_VSTRB32_post: - case MVE_VSTRB32_pre: + case MVE_VSTRB32_pre: case MVE_VSTRB32_rq: case MVE_VSTRB8_rq: case MVE_VSTRBU8: @@ -957,7 +993,9 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { for (auto &Op : Desc.operands()) { // Only check instructions that access the MQPR regs. if ((Op.OperandType & MCOI::OPERAND_REGISTER) == 0 || - Op.RegClass != ARM::MQPRRegClassID) + (Op.RegClass != ARM::MQPRRegClassID && + Op.RegClass != ARM::QQPRRegClassID && + Op.RegClass != ARM::QQQQPRRegClassID)) continue; uint64_t Flags = MII->get(i).TSFlags; diff --git a/llvm/utils/gn/build/BUILD.gn b/llvm/utils/gn/build/BUILD.gn index e29cdb678a361f..3c0b905991b50f 100644 --- a/llvm/utils/gn/build/BUILD.gn +++ b/llvm/utils/gn/build/BUILD.gn @@ -34,6 +34,10 @@ config("compiler_defaults") { defines += [ "NDEBUG" ] } + if (llvm_enable_expensive_checks) { + defines += [ "EXPENSIVE_CHECKS" ] + } + asmflags = target_flags cflags = target_flags ldflags = target_flags + target_ldflags diff --git a/llvm/utils/gn/build/buildflags.gni b/llvm/utils/gn/build/buildflags.gni index 4dcdc962b7d116..eb8ac55e48e01b 100644 --- a/llvm/utils/gn/build/buildflags.gni +++ b/llvm/utils/gn/build/buildflags.gni @@ -10,4 +10,7 @@ declare_args() { # Whether to enable assertions. llvm_enable_assertions = true + + # Whether to enable expensive checks. + llvm_enable_expensive_checks = false } diff --git a/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn index c807389846b78f..6f3b5d43e673d5 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn @@ -43,6 +43,7 @@ unittest("ToolingTests") { "RecursiveASTVisitorTests/CXXOperatorCallExprTraverser.cpp", "RecursiveASTVisitorTests/Callbacks.cpp", "RecursiveASTVisitorTests/Class.cpp", + "RecursiveASTVisitorTests/Concept.cpp", "RecursiveASTVisitorTests/ConstructExpr.cpp", "RecursiveASTVisitorTests/DeclRefExpr.cpp", "RecursiveASTVisitorTests/ImplicitCtor.cpp", diff --git a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn index 5f8058699d7293..32480e51a4c270 100644 --- a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn @@ -1,6 +1,7 @@ import("//compiler-rt/target.gni") import("//compiler-rt/test/test.gni") import("//llvm/triples.gni") +import("//llvm/utils/gn/build/buildflags.gni") import("//llvm/utils/gn/build/libs/zlib/enable.gni") import("//llvm/utils/gn/build/toolchain/compiler.gni") import("//llvm/utils/gn/build/write_cmake_config.gni") @@ -51,12 +52,17 @@ write_cmake_config("lit_common_configured") { "SANITIZER_CAN_USE_CXXABI_PYBOOL=True", "COMPILER_RT_HAS_LLD_PYBOOL=True", "COMPILER_RT_HAS_GWP_ASAN_PYBOOL=False", - "LLVM_ENABLE_EXPENSIVE_CHECKS_PYBOOL=False", "HAVE_RPC_XDR_H=0", "ANDROID_NDK_VERSION=19", "ANDROID_SERIAL_FOR_TESTING=$android_serial_for_testing", ] + if (llvm_enable_expensive_checks) { + values += [ "LLVM_ENABLE_EXPENSIVE_CHECKS_PYBOOL=True" ] + } else { + values += [ "LLVM_ENABLE_EXPENSIVE_CHECKS_PYBOOL=False" ] + } + if (host_cpu == "x64") { values += [ "HOST_ARCH=x86_64" ] } else { diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index c9e7c45fc118ba..35c6890efd6da9 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -284,9 +284,9 @@ write_cmake_config("config") { } if (llvm_enable_terminfo) { - values += [ "HAVE_TERMINFO=1" ] + values += [ "LLVM_ENABLE_TERMINFO=1" ] } else { - values += [ "HAVE_TERMINFO=" ] + values += [ "LLVM_ENABLE_TERMINFO=" ] } if (llvm_enable_dia_sdk) { diff --git a/mlir/docs/CAPI.md b/mlir/docs/CAPI.md index 6adb9db3331c5a..a8fcfbafb8b15a 100644 --- a/mlir/docs/CAPI.md +++ b/mlir/docs/CAPI.md @@ -75,6 +75,28 @@ check if an object is null by using `mlirXIsNull(MlirX)`. API functions do _not_ expect null objects as arguments unless explicitly stated otherwise. API functions _may_ return null objects. +### Type Hierarchies + +MLIR objects can form type hierarchies in C++. For example, all IR classes +representing types are derived from `mlir::Type`, some of them may also be also +derived from common base classes such as `mlir::ShapedType` or dialect-specific +base classes. Type hierarchies are exposed to C API through naming conventions +as follows. + +- Only the top-level class of each hierarchy is exposed, e.g. `MlirType` is + defined as a type but `MlirShapedType` is not. This avoids the need for + explicit upcasting when passing an object of a derived type to a function + that expects a base type (this happens more often in core/standard APIs, + while downcasting usually involves further checks anyway). +- A type `Y` that derives from `X` provides a function `int mlirXIsAY(MlirX)` + that returns a non-zero value if the given dynamic instance of `X` is also + an instance of `Y`. For example, `int MlirTypeIsAInteger(MlirType)`. +- A function that expects a derived type as its first argument takes the base + type instead and documents the expectation by using `Y` in its name + `MlirY<...>(MlirX, ...)`. This function asserts that the dynamic instance of + its first argument is `Y`, and it is the responsibility of the caller to + ensure it is indeed the case. + ### Conversion To String and Printing IR objects can be converted to a string representation, for example for @@ -96,11 +118,11 @@ allocation and avoid unnecessary allocation and copying inside the printer. For convenience, `mlirXDump(MlirX)` functions are provided to print the given object to the standard error stream. -### Common Patterns +## Common Patterns The API adopts the following patterns for recurrent functionality in MLIR. -#### Indexed Components +### Indexed Components An object has an _indexed component_ if it has fields accessible using a zero-based contiguous integer index, typically arrays. For example, an @@ -120,7 +142,7 @@ Note that the name of subobject in the function does not necessarily match the type of the subobject. For example, `mlirOperationGetOperand` returns a `MlirValue`. -#### Iterable Components +### Iterable Components An object has an _iterable component_ if it has iterators accessing its fields in some order other than integer indexing, typically linked lists. For example, @@ -146,3 +168,17 @@ for (iter = mlirXGetFirst(x); !mlirYIsNull(iter); /* User 'iter'. */ } ``` + +## Extending the API + +### Extensions for Dialect Attributes and Types + +Dialect attributes and types can follow the example of standard attrbutes and +types, provided that implementations live in separate directories, i.e. +`include/mlir-c/<...>Dialect/` and `lib/CAPI/<...>Dialect/`. The core APIs +provide implementation-private headers in `include/mlir/CAPI/IR` that allow one +to convert between opaque C structures for core IR components and their C++ +counterparts. `wrap` converts a C++ class into a C structure and `unwrap` does +the inverse conversion. Once the a C++ object is available, the API +implementation should rely on `isa` to implement `mlirXIsAY` and is expected to +use `cast` inside other API calls. diff --git a/mlir/docs/Tutorials/Toy/Ch-7.md b/mlir/docs/Tutorials/Toy/Ch-7.md index cbab1e1cadb0cf..c20b8d95617d76 100644 --- a/mlir/docs/Tutorials/Toy/Ch-7.md +++ b/mlir/docs/Tutorials/Toy/Ch-7.md @@ -190,11 +190,10 @@ public: assert(!elementTypes.empty() && "expected at least 1 element type"); // Call into a helper 'get' method in 'TypeBase' to get a uniqued instance - // of this type. The first two parameters are the context to unique in and - // the kind of the type. The parameters after the type kind are forwarded to - // the storage instance. + // of this type. The first parameter is the context to unique in. The + // parameters after the type kind are forwarded to the storage instance. mlir::MLIRContext *ctx = elementTypes.front().getContext(); - return Base::get(ctx, ToyTypes::Struct, elementTypes); + return Base::get(ctx, elementTypes); } /// Returns the element types of this struct type. diff --git a/mlir/examples/standalone/standalone-opt/CMakeLists.txt b/mlir/examples/standalone/standalone-opt/CMakeLists.txt index 854fd556ae868c..06bbb4712645a3 100644 --- a/mlir/examples/standalone/standalone-opt/CMakeLists.txt +++ b/mlir/examples/standalone/standalone-opt/CMakeLists.txt @@ -10,3 +10,5 @@ add_llvm_executable(standalone-opt standalone-opt.cpp) llvm_update_compile_flags(standalone-opt) target_link_libraries(standalone-opt PRIVATE ${LIBS}) + +mlir_check_all_link_libraries(standalone-opt) diff --git a/mlir/examples/standalone/standalone-opt/standalone-opt.cpp b/mlir/examples/standalone/standalone-opt/standalone-opt.cpp index b33dab26a71367..86cf6791844602 100644 --- a/mlir/examples/standalone/standalone-opt/standalone-opt.cpp +++ b/mlir/examples/standalone/standalone-opt/standalone-opt.cpp @@ -24,9 +24,16 @@ int main(int argc, char **argv) { mlir::registerAllDialects(); mlir::registerAllPasses(); - - mlir::registerDialect(); // TODO: Register standalone passes here. - return failed(mlir::MlirOptMain(argc, argv, "Standalone optimizer driver\n")); + mlir::DialectRegistry registry; + registry.insert(); + registry.insert(); + // Add the following to include *all* MLIR Core dialects, or selectively + // include what you need like above. You only need to register dialects that + // will be *parsed* by the tool, not the one generated + // registerAllDialects(registry); + + return failed( + mlir::MlirOptMain(argc, argv, "Standalone optimizer driver\n", registry)); } diff --git a/mlir/examples/standalone/test/Standalone/standalone-opt.mlir b/mlir/examples/standalone/test/Standalone/standalone-opt.mlir index fac08144ec39cd..1a78a9d8cb9eca 100644 --- a/mlir/examples/standalone/test/Standalone/standalone-opt.mlir +++ b/mlir/examples/standalone/test/Standalone/standalone-opt.mlir @@ -1,3 +1,3 @@ // RUN: standalone-opt --show-dialects | FileCheck %s -// CHECK: Registered Dialects: +// CHECK: Available Dialects: // CHECK: standalone diff --git a/mlir/examples/toy/Ch2/toyc.cpp b/mlir/examples/toy/Ch2/toyc.cpp index d0880ce0971b6e..99232d8f24a4a5 100644 --- a/mlir/examples/toy/Ch2/toyc.cpp +++ b/mlir/examples/toy/Ch2/toyc.cpp @@ -68,10 +68,9 @@ std::unique_ptr parseInputFile(llvm::StringRef filename) { } int dumpMLIR() { - // Register our Dialect with MLIR. - mlir::registerDialect(); - - mlir::MLIRContext context; + mlir::MLIRContext context(/*loadAllDialects=*/false); + // Load our Dialect in this MLIR Context. + context.getOrLoadDialect(); // Handle '.toy' input to the compiler. if (inputType != InputType::MLIR && diff --git a/mlir/examples/toy/Ch3/toyc.cpp b/mlir/examples/toy/Ch3/toyc.cpp index f9d5631719e8b6..d0430ce16e54a8 100644 --- a/mlir/examples/toy/Ch3/toyc.cpp +++ b/mlir/examples/toy/Ch3/toyc.cpp @@ -102,10 +102,10 @@ int loadMLIR(llvm::SourceMgr &sourceMgr, mlir::MLIRContext &context, } int dumpMLIR() { - // Register our Dialect with MLIR. - mlir::registerDialect(); + mlir::MLIRContext context(/*loadAllDialects=*/false); + // Load our Dialect in this MLIR Context. + context.getOrLoadDialect(); - mlir::MLIRContext context; mlir::OwningModuleRef module; llvm::SourceMgr sourceMgr; mlir::SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context); diff --git a/mlir/examples/toy/Ch4/toyc.cpp b/mlir/examples/toy/Ch4/toyc.cpp index e11f35c5f7e10c..9f95887d270738 100644 --- a/mlir/examples/toy/Ch4/toyc.cpp +++ b/mlir/examples/toy/Ch4/toyc.cpp @@ -103,10 +103,10 @@ int loadMLIR(llvm::SourceMgr &sourceMgr, mlir::MLIRContext &context, } int dumpMLIR() { - // Register our Dialect with MLIR. - mlir::registerDialect(); + mlir::MLIRContext context(/*loadAllDialects=*/false); + // Load our Dialect in this MLIR Context. + context.getOrLoadDialect(); - mlir::MLIRContext context; mlir::OwningModuleRef module; llvm::SourceMgr sourceMgr; mlir::SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context); diff --git a/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp index 3097681ea3fad3..92fd246a135886 100644 --- a/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp +++ b/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp @@ -256,6 +256,9 @@ struct TransposeOpLowering : public ConversionPattern { namespace { struct ToyToAffineLoweringPass : public PassWrapper { + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } void runOnFunction() final; }; } // end anonymous namespace. diff --git a/mlir/examples/toy/Ch5/toyc.cpp b/mlir/examples/toy/Ch5/toyc.cpp index ed0496957093bb..16faac02fc60d0 100644 --- a/mlir/examples/toy/Ch5/toyc.cpp +++ b/mlir/examples/toy/Ch5/toyc.cpp @@ -106,10 +106,10 @@ int loadMLIR(llvm::SourceMgr &sourceMgr, mlir::MLIRContext &context, } int dumpMLIR() { - // Register our Dialect with MLIR. - mlir::registerDialect(); + mlir::MLIRContext context(/*loadAllDialects=*/false); + // Load our Dialect in this MLIR Context. + context.getOrLoadDialect(); - mlir::MLIRContext context; mlir::OwningModuleRef module; llvm::SourceMgr sourceMgr; mlir::SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context); diff --git a/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp index cac3415f48d68f..f3857f35e25c95 100644 --- a/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp +++ b/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp @@ -255,6 +255,9 @@ struct TransposeOpLowering : public ConversionPattern { namespace { struct ToyToAffineLoweringPass : public PassWrapper { + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } void runOnFunction() final; }; } // end anonymous namespace. diff --git a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp index 74b32dc0ca1102..19bf27e1864d18 100644 --- a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp +++ b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp @@ -159,6 +159,9 @@ class PrintOpLowering : public ConversionPattern { namespace { struct ToyToLLVMLoweringPass : public PassWrapper> { + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } void runOnOperation() final; }; } // end anonymous namespace diff --git a/mlir/examples/toy/Ch6/toyc.cpp b/mlir/examples/toy/Ch6/toyc.cpp index bdcdf1af7ea831..9504a38b8784c9 100644 --- a/mlir/examples/toy/Ch6/toyc.cpp +++ b/mlir/examples/toy/Ch6/toyc.cpp @@ -255,10 +255,10 @@ int main(int argc, char **argv) { // If we aren't dumping the AST, then we are compiling with/to MLIR. - // Register our Dialect with MLIR. - mlir::registerDialect(); + mlir::MLIRContext context(/*loadAllDialects=*/false); + // Load our Dialect in this MLIR Context. + context.getOrLoadDialect(); - mlir::MLIRContext context; mlir::OwningModuleRef module; if (int error = loadAndProcessMLIR(context, module)) return error; diff --git a/mlir/examples/toy/Ch7/include/toy/Dialect.h b/mlir/examples/toy/Ch7/include/toy/Dialect.h index b695169924012a..4eceb422efa63d 100644 --- a/mlir/examples/toy/Ch7/include/toy/Dialect.h +++ b/mlir/examples/toy/Ch7/include/toy/Dialect.h @@ -63,13 +63,6 @@ class ToyDialect : public mlir::Dialect { // Toy Types //===----------------------------------------------------------------------===// -/// Create a local enumeration with all of the types that are defined by Toy. -namespace ToyTypes { -enum Types { - Struct = mlir::Type::FIRST_TOY_TYPE, -}; -} // end namespace ToyTypes - /// This class defines the Toy struct type. It represents a collection of /// element types. All derived types in MLIR must inherit from the CRTP class /// 'Type::TypeBase'. It takes as template parameters the concrete type diff --git a/mlir/examples/toy/Ch7/mlir/Dialect.cpp b/mlir/examples/toy/Ch7/mlir/Dialect.cpp index e233a554993450..04c796ce6d0b7c 100644 --- a/mlir/examples/toy/Ch7/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch7/mlir/Dialect.cpp @@ -474,11 +474,10 @@ StructType StructType::get(llvm::ArrayRef elementTypes) { assert(!elementTypes.empty() && "expected at least 1 element type"); // Call into a helper 'get' method in 'TypeBase' to get a uniqued instance - // of this type. The first two parameters are the context to unique in and the - // kind of the type. The parameters after the type kind are forwarded to the - // storage instance. + // of this type. The first parameter is the context to unique in. The + // parameters after the type kind are forwarded to the storage instance. mlir::MLIRContext *ctx = elementTypes.front().getContext(); - return Base::get(ctx, ToyTypes::Struct, elementTypes); + return Base::get(ctx, elementTypes); } /// Returns the element types of this struct type. diff --git a/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp index 3097681ea3fad3..92fd246a135886 100644 --- a/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp +++ b/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp @@ -256,6 +256,9 @@ struct TransposeOpLowering : public ConversionPattern { namespace { struct ToyToAffineLoweringPass : public PassWrapper { + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } void runOnFunction() final; }; } // end anonymous namespace. diff --git a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp index 74b32dc0ca1102..19bf27e1864d18 100644 --- a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp +++ b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp @@ -159,6 +159,9 @@ class PrintOpLowering : public ConversionPattern { namespace { struct ToyToLLVMLoweringPass : public PassWrapper> { + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } void runOnOperation() final; }; } // end anonymous namespace diff --git a/mlir/examples/toy/Ch7/toyc.cpp b/mlir/examples/toy/Ch7/toyc.cpp index c1cc207a406ce2..cb3b455dc7ecbe 100644 --- a/mlir/examples/toy/Ch7/toyc.cpp +++ b/mlir/examples/toy/Ch7/toyc.cpp @@ -256,10 +256,10 @@ int main(int argc, char **argv) { // If we aren't dumping the AST, then we are compiling with/to MLIR. - // Register our Dialect with MLIR. - mlir::registerDialect(); + mlir::MLIRContext context(/*loadAllDialects=*/false); + // Load our Dialect in this MLIR Context. + context.getOrLoadDialect(); - mlir::MLIRContext context; mlir::OwningModuleRef module; if (int error = loadAndProcessMLIR(context, module)) return error; diff --git a/mlir/include/mlir-c/AffineMap.h b/mlir/include/mlir-c/AffineMap.h new file mode 100644 index 00000000000000..bef13fd0bfa84f --- /dev/null +++ b/mlir/include/mlir-c/AffineMap.h @@ -0,0 +1,25 @@ +/*===-- mlir-c/AffineMap.h - C API for MLIR Affine maps -----------*- C -*-===*\ +|* *| +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| +|* Exceptions. *| +|* See https://llvm.org/LICENSE.txt for license information. *| +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef MLIR_C_AFFINEMAP_H +#define MLIR_C_AFFINEMAP_H + +#include "mlir-c/IR.h" + +#ifdef __cplusplus +extern "C" { +#endif + +DEFINE_C_API_STRUCT(MlirAffineMap, const void); + +#ifdef __cplusplus +} +#endif + +#endif // MLIR_C_AFFINEMAP_H diff --git a/mlir/include/mlir-c/IR.h b/mlir/include/mlir-c/IR.h index 6b5be2d0195b09..68546bf35625a2 100644 --- a/mlir/include/mlir-c/IR.h +++ b/mlir/include/mlir-c/IR.h @@ -56,8 +56,6 @@ DEFINE_C_API_STRUCT(MlirType, const void); DEFINE_C_API_STRUCT(MlirLocation, const void); DEFINE_C_API_STRUCT(MlirModule, const void); -#undef DEFINE_C_API_STRUCT - /** Named MLIR attribute. * * A named attribute is essentially a (name, attribute) pair where the name is @@ -314,6 +312,9 @@ void mlirValuePrint(MlirValue value, MlirPrintCallback callback, /** Parses a type. The type is owned by the context. */ MlirType mlirTypeParseGet(MlirContext context, const char *type); +/** Checks if two types are equal. */ +int mlirTypeEqual(MlirType t1, MlirType t2); + /** Prints a location by sending chunks of the string representation and * forwarding `userData to `callback`. Note that the callback may be called * several times with consecutive chunks of the string. */ diff --git a/mlir/include/mlir-c/Registration.h b/mlir/include/mlir-c/Registration.h index 5e5aa0ed29a277..05d4aacdaa8a15 100644 --- a/mlir/include/mlir-c/Registration.h +++ b/mlir/include/mlir-c/Registration.h @@ -10,14 +10,16 @@ #ifndef MLIR_C_REGISTRATION_H #define MLIR_C_REGISTRATION_H +#include "mlir-c/IR.h" + #ifdef __cplusplus extern "C" { #endif -/** Registers all dialects known to core MLIR with the system. This must be - * called before creating an MlirContext if it needs access to the registered - * dialects. */ -void mlirRegisterAllDialects(); +/** Registers all dialects known to core MLIR with the provided Context. + * This is needed before creating IR for these Dialects. + */ +void mlirRegisterAllDialects(MlirContext context); #ifdef __cplusplus } diff --git a/mlir/include/mlir-c/StandardTypes.h b/mlir/include/mlir-c/StandardTypes.h new file mode 100644 index 00000000000000..ad28ea5467171a --- /dev/null +++ b/mlir/include/mlir-c/StandardTypes.h @@ -0,0 +1,249 @@ +/*===-- mlir-c/StandardTypes.h - C API for MLIR Standard types ----*- C -*-===*\ +|* *| +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| +|* Exceptions. *| +|* See https://llvm.org/LICENSE.txt for license information. *| +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef MLIR_C_STANDARDTYPES_H +#define MLIR_C_STANDARDTYPES_H + +#include "mlir-c/AffineMap.h" +#include "mlir-c/IR.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/*============================================================================*/ +/* Integer types. */ +/*============================================================================*/ + +/** Checks whether the given type is an integer type. */ +int mlirTypeIsAInteger(MlirType type); + +/** Creates a signless integer type of the given bitwidth in the context. The + * type is owned by the context. */ +MlirType mlirIntegerTypeGet(MlirContext ctx, unsigned bitwidth); + +/** Creates a signed integer type of the given bitwidth in the context. The type + * is owned by the context. */ +MlirType mlirIntegerTypeSignedGet(MlirContext ctx, unsigned bitwidth); + +/** Creates an unsigned integer type of the given bitwidth in the context. The + * type is owned by the context. */ +MlirType mlirIntegerTypeUnsignedGet(MlirContext ctx, unsigned bitwidth); + +/** Returns the bitwidth of an integer type. */ +unsigned mlirIntegerTypeGetWidth(MlirType type); + +/** Checks whether the given integer type is signless. */ +int mlirIntegerTypeIsSignless(MlirType type); + +/** Checks whether the given integer type is signed. */ +int mlirIntegerTypeIsSigned(MlirType type); + +/** Checks whether the given integer type is unsigned. */ +int mlirIntegerTypeIsUnsigned(MlirType type); + +/*============================================================================*/ +/* Index type. */ +/*============================================================================*/ + +/** Checks whether the given type is an index type. */ +int mlirTypeIsAIndex(MlirType type); + +/** Creates an index type in the given context. The type is owned by the + * context. */ +MlirType mlirIndexTypeGet(MlirContext ctx); + +/*============================================================================*/ +/* Floating-point types. */ +/*============================================================================*/ + +/** Checks whether the given type is a bf16 type. */ +int mlirTypeIsABF16(MlirType type); + +/** Creates a bf16 type in the given context. The type is owned by the + * context. */ +MlirType mlirBF16TypeGet(MlirContext ctx); + +/** Checks whether the given type is an f16 type. */ +int mlirTypeIsAF16(MlirType type); + +/** Creates an f16 type in the given context. The type is owned by the + * context. */ +MlirType mlirF16TypeGet(MlirContext ctx); + +/** Checks whether the given type is an f32 type. */ +int mlirTypeIsAF32(MlirType type); + +/** Creates an f32 type in the given context. The type is owned by the + * context. */ +MlirType mlirF32TypeGet(MlirContext ctx); + +/** Checks whether the given type is an f64 type. */ +int mlirTypeIsAF64(MlirType type); + +/** Creates a f64 type in the given context. The type is owned by the + * context. */ +MlirType mlirF64TypeGet(MlirContext ctx); + +/*============================================================================*/ +/* None type. */ +/*============================================================================*/ + +/** Checks whether the given type is a None type. */ +int mlirTypeIsANone(MlirType type); + +/** Creates a None type in the given context. The type is owned by the + * context. */ +MlirType mlirNoneTypeGet(MlirContext ctx); + +/*============================================================================*/ +/* Complex type. */ +/*============================================================================*/ + +/** Checks whether the given type is a Complex type. */ +int mlirTypeIsAComplex(MlirType type); + +/** Creates a complex type with the given element type in the same context as + * the element type. The type is owned by the context. */ +MlirType mlirComplexTypeGet(MlirType elementType); + +/** Returns the element type of the given complex type. */ +MlirType mlirComplexTypeGetElementType(MlirType type); + +/*============================================================================*/ +/* Shaped type. */ +/*============================================================================*/ + +/** Checks whether the given type is a Shaped type. */ +int mlirTypeIsAShaped(MlirType type); + +/** Returns the element type of the shaped type. */ +MlirType mlirShapedTypeGetElementType(MlirType type); + +/** Checks whether the given shaped type is ranked. */ +int mlirShapedTypeHasRank(MlirType type); + +/** Returns the rank of the given ranked shaped type. */ +int64_t mlirShapedTypeGetRank(MlirType type); + +/** Checks whether the given shaped type has a static shape. */ +int mlirShapedTypeHasStaticShape(MlirType type); + +/** Checks wither the dim-th dimension of the given shaped type is dynamic. */ +int mlirShapedTypeIsDynamicDim(MlirType type, intptr_t dim); + +/** Returns the dim-th dimension of the given ranked shaped type. */ +int64_t mlirShapedTypeGetDimSize(MlirType type, intptr_t dim); + +/** Checks whether the given value is used as a placeholder for dynamic sizes + * in shaped types. */ +int mlirShapedTypeIsDynamicSize(int64_t size); + +/** Checks whether the given value is used as a placeholder for dynamic strides + * and offsets in shaped types. */ +int mlirShapedTypeIsDynamicStrideOrOffset(int64_t val); + +/*============================================================================*/ +/* Vector type. */ +/*============================================================================*/ + +/** Checks whether the given type is a Vector type. */ +int mlirTypeIsAVector(MlirType type); + +/** Creates a vector type of the shape identified by its rank and dimensios, + * with the given element type in the same context as the element type. The type + * is owned by the context. */ +MlirType mlirVectorTypeGet(intptr_t rank, int64_t *shape, MlirType elementType); + +/*============================================================================*/ +/* Ranked / Unranked Tensor type. */ +/*============================================================================*/ + +/** Checks whether the given type is a Tensor type. */ +int mlirTypeIsATensor(MlirType type); + +/** Checks whether the given type is a ranked tensor type. */ +int mlirTypeIsARankedTensor(MlirType type); + +/** Checks whether the given type is an unranked tensor type. */ +int mlirTypeIsAUnrankedTensor(MlirType type); + +/** Creates a tensor type of a fixed rank with the given shape and element type + * in the same context as the element type. The type is owned by the context. */ +MlirType mlirRankedTensorTypeGet(intptr_t rank, int64_t *shape, + MlirType elementType); + +/** Creates an unranked tensor type with the given element type in the same + * context as the element type. The type is owned by the context. */ +MlirType mlirUnrankedTensorTypeGet(MlirType elementType); + +/*============================================================================*/ +/* Ranked / Unranked MemRef type. */ +/*============================================================================*/ + +/** Checks whether the given type is a MemRef type. */ +int mlirTypeIsAMemRef(MlirType type); + +/** Checks whether the given type is an UnrankedMemRef type. */ +int mlirTypeIsAUnrankedMemRef(MlirType type); + +/** Creates a MemRef type with the given rank and shape, a potentially empty + * list of affine layout maps, the given memory space and element type, in the + * same context as element type. The type is owned by the context. */ +MlirType mlirMemRefTypeGet(MlirType elementType, intptr_t rank, int64_t *shape, + intptr_t numMaps, MlirAttribute *affineMaps, + unsigned memorySpace); + +/** Creates a MemRef type with the given rank, shape, memory space and element + * type in the same context as the element type. The type has no affine maps, + * i.e. represents a default row-major contiguous memref. The type is owned by + * the context. */ +MlirType mlirMemRefTypeContiguousGet(MlirType elementType, intptr_t rank, + int64_t *shape, unsigned memorySpace); + +/** Creates an Unranked MemRef type with the given element type and in the given + * memory space. The type is owned by the context of element type. */ +MlirType mlirUnrankedMemRefTypeGet(MlirType elementType, unsigned memorySpace); + +/** Returns the number of affine layout maps in the given MemRef type. */ +intptr_t mlirMemRefTypeGetNumAffineMaps(MlirType type); + +/** Returns the pos-th affine map of the given MemRef type. */ +MlirAffineMap mlirMemRefTypeGetAffineMap(MlirType type, intptr_t pos); + +/** Returns the memory space of the given MemRef type. */ +unsigned mlirMemRefTypeGetMemorySpace(MlirType type); + +/** Returns the memory spcae of the given Unranked MemRef type. */ +unsigned mlirUnrankedMemrefGetMemorySpace(MlirType type); + +/*============================================================================*/ +/* Tuple type. */ +/*============================================================================*/ + +/** Checks whether the given type is a tuple type. */ +int mlirTypeIsATuple(MlirType type); + +/** Creates a tuple type that consists of the given list of elemental types. The + * type is owned by the context. */ +MlirType mlirTupleTypeGet(MlirContext ctx, intptr_t numElements, + MlirType *elements); + +/** Returns the number of types contained in a tuple. */ +intptr_t mlirTupleTypeGetNumTypes(MlirType type); + +/** Returns the pos-th type in the tuple type. */ +MlirType mlirTupleTypeGetType(MlirType type, intptr_t pos); + +#ifdef __cplusplus +} +#endif + +#endif // MLIR_C_STANDARDTYPES_H diff --git a/mlir/include/mlir/CAPI/AffineMap.h b/mlir/include/mlir/CAPI/AffineMap.h new file mode 100644 index 00000000000000..cea48ffae8b6d8 --- /dev/null +++ b/mlir/include/mlir/CAPI/AffineMap.h @@ -0,0 +1,24 @@ +//===- AffineMap.h - C API Utils for Affine Maps ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains declarations of implementation details of the C API for +// MLIR Affine maps. This file should not be included from C++ code other than +// C API implementation nor from C code. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CAPI_AFFINEMAP_H +#define MLIR_CAPI_AFFINEMAP_H + +#include "mlir-c/AffineMap.h" +#include "mlir/CAPI/Wrap.h" +#include "mlir/IR/AffineMap.h" + +DEFINE_C_API_METHODS(MlirAffineMap, mlir::AffineMap) + +#endif // MLIR_CAPI_AFFINEMAP_H diff --git a/mlir/include/mlir/CAPI/IR.h b/mlir/include/mlir/CAPI/IR.h new file mode 100644 index 00000000000000..9a60ecf04fc892 --- /dev/null +++ b/mlir/include/mlir/CAPI/IR.h @@ -0,0 +1,34 @@ +//===- IR.h - C API Utils for Core MLIR classes -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains declarations of implementation details of the C API for +// core MLIR classes. This file should not be included from C++ code other than +// C API implementation nor from C code. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_INCLUDE_MLIR_CAPI_IR_H +#define MLIR_INCLUDE_MLIR_CAPI_IR_H + +#include "mlir/CAPI/Wrap.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Module.h" +#include "mlir/IR/Operation.h" + +DEFINE_C_API_PTR_METHODS(MlirContext, mlir::MLIRContext) +DEFINE_C_API_PTR_METHODS(MlirOperation, mlir::Operation) +DEFINE_C_API_PTR_METHODS(MlirBlock, mlir::Block) +DEFINE_C_API_PTR_METHODS(MlirRegion, mlir::Region) + +DEFINE_C_API_METHODS(MlirAttribute, mlir::Attribute) +DEFINE_C_API_METHODS(MlirLocation, mlir::Location) +DEFINE_C_API_METHODS(MlirType, mlir::Type) +DEFINE_C_API_METHODS(MlirValue, mlir::Value) +DEFINE_C_API_METHODS(MlirModule, mlir::ModuleOp) + +#endif // MLIR_INCLUDE_MLIR_CAPI_IR_H diff --git a/mlir/include/mlir/CAPI/Wrap.h b/mlir/include/mlir/CAPI/Wrap.h new file mode 100644 index 00000000000000..940007caac060a --- /dev/null +++ b/mlir/include/mlir/CAPI/Wrap.h @@ -0,0 +1,56 @@ +//===- Wrap.h - C API Utilities ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains common definitions for wrapping opaque C++ pointers into +// C structures for the purpose of C API. This file should not be included from +// C++ code other than C API implementation nor from C code. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CAPI_WRAP_H +#define MLIR_CAPI_WRAP_H + +#include "mlir-c/IR.h" +#include "mlir/Support/LLVM.h" + +/* ========================================================================== */ +/* Definitions of methods for non-owning structures used in C API. */ +/* ========================================================================== */ + +#define DEFINE_C_API_PTR_METHODS(name, cpptype) \ + static inline name wrap(cpptype *cpp) { return name{cpp}; } \ + static inline cpptype *unwrap(name c) { \ + return static_cast(c.ptr); \ + } + +#define DEFINE_C_API_METHODS(name, cpptype) \ + static inline name wrap(cpptype cpp) { \ + return name{cpp.getAsOpaquePointer()}; \ + } \ + static inline cpptype unwrap(name c) { \ + return cpptype::getFromOpaquePointer(c.ptr); \ + } + +template +static llvm::ArrayRef unwrapList(size_t size, CTy *first, + llvm::SmallVectorImpl &storage) { + static_assert( + std::is_same())), CppTy>::value, + "incompatible C and C++ types"); + + if (size == 0) + return llvm::None; + + assert(storage.empty() && "expected to populate storage"); + storage.reserve(size); + for (size_t i = 0; i < size; ++i) + storage.push_back(unwrap(*(first + i))); + return storage; +} + +#endif // MLIR_CAPI_WRAP_H diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 4ff23d71a5c0bf..0a043c01e98140 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -66,6 +66,11 @@ def ConvertAffineToStandard : Pass<"lower-affine"> { `affine.apply`. }]; let constructor = "mlir::createLowerAffinePass()"; + let dependentDialects = [ + "scf::SCFDialect", + "StandardOpsDialect", + "vector::VectorDialect" + ]; } //===----------------------------------------------------------------------===// @@ -76,6 +81,7 @@ def ConvertAVX512ToLLVM : Pass<"convert-avx512-to-llvm", "ModuleOp"> { let summary = "Convert the operations from the avx512 dialect into the LLVM " "dialect"; let constructor = "mlir::createConvertAVX512ToLLVMPass()"; + let dependentDialects = ["LLVM::LLVMDialect", "LLVM::LLVMAVX512Dialect"]; } //===----------------------------------------------------------------------===// @@ -98,6 +104,7 @@ def GpuToLLVMConversionPass : Pass<"gpu-to-llvm", "ModuleOp"> { def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> { let summary = "Generate NVVM operations for gpu operations"; let constructor = "mlir::createLowerGpuOpsToNVVMOpsPass()"; + let dependentDialects = ["NVVM::NVVMDialect"]; let options = [ Option<"indexBitwidth", "index-bitwidth", "unsigned", /*default=kDeriveIndexBitwidthFromDataLayout*/"0", @@ -112,6 +119,7 @@ def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> { def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> { let summary = "Generate ROCDL operations for gpu operations"; let constructor = "mlir::createLowerGpuOpsToROCDLOpsPass()"; + let dependentDialects = ["ROCDL::ROCDLDialect"]; let options = [ Option<"indexBitwidth", "index-bitwidth", "unsigned", /*default=kDeriveIndexBitwidthFromDataLayout*/"0", @@ -126,6 +134,7 @@ def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> { def ConvertGPUToSPIRV : Pass<"convert-gpu-to-spirv", "ModuleOp"> { let summary = "Convert GPU dialect to SPIR-V dialect"; let constructor = "mlir::createConvertGPUToSPIRVPass()"; + let dependentDialects = ["spirv::SPIRVDialect"]; } //===----------------------------------------------------------------------===// @@ -136,6 +145,7 @@ def ConvertGpuLaunchFuncToVulkanLaunchFunc : Pass<"convert-gpu-launch-to-vulkan-launch", "ModuleOp"> { let summary = "Convert gpu.launch_func to vulkanLaunch external call"; let constructor = "mlir::createConvertGpuLaunchFuncToVulkanLaunchFuncPass()"; + let dependentDialects = ["spirv::SPIRVDialect"]; } def ConvertVulkanLaunchFuncToVulkanCalls @@ -143,6 +153,7 @@ def ConvertVulkanLaunchFuncToVulkanCalls let summary = "Convert vulkanLaunch external call to Vulkan runtime external " "calls"; let constructor = "mlir::createConvertVulkanLaunchFuncToVulkanCallsPass()"; + let dependentDialects = ["LLVM::LLVMDialect"]; } //===----------------------------------------------------------------------===// @@ -153,6 +164,7 @@ def ConvertLinalgToLLVM : Pass<"convert-linalg-to-llvm", "ModuleOp"> { let summary = "Convert the operations from the linalg dialect into the LLVM " "dialect"; let constructor = "mlir::createConvertLinalgToLLVMPass()"; + let dependentDialects = ["scf::SCFDialect", "LLVM::LLVMDialect"]; } //===----------------------------------------------------------------------===// @@ -163,6 +175,7 @@ def ConvertLinalgToStandard : Pass<"convert-linalg-to-std", "ModuleOp"> { let summary = "Convert the operations from the linalg dialect into the " "Standard dialect"; let constructor = "mlir::createConvertLinalgToStandardPass()"; + let dependentDialects = ["StandardOpsDialect"]; } //===----------------------------------------------------------------------===// @@ -172,6 +185,7 @@ def ConvertLinalgToStandard : Pass<"convert-linalg-to-std", "ModuleOp"> { def ConvertLinalgToSPIRV : Pass<"convert-linalg-to-spirv", "ModuleOp"> { let summary = "Convert Linalg ops to SPIR-V ops"; let constructor = "mlir::createLinalgToSPIRVPass()"; + let dependentDialects = ["spirv::SPIRVDialect"]; } //===----------------------------------------------------------------------===// @@ -182,6 +196,7 @@ def SCFToStandard : Pass<"convert-scf-to-std"> { let summary = "Convert SCF dialect to Standard dialect, replacing structured" " control flow with a CFG"; let constructor = "mlir::createLowerToCFGPass()"; + let dependentDialects = ["StandardOpsDialect"]; } //===----------------------------------------------------------------------===// @@ -191,6 +206,7 @@ def SCFToStandard : Pass<"convert-scf-to-std"> { def ConvertAffineForToGPU : FunctionPass<"convert-affine-for-to-gpu"> { let summary = "Convert top-level AffineFor Ops to GPU kernels"; let constructor = "mlir::createAffineForToGPUPass()"; + let dependentDialects = ["gpu::GPUDialect"]; let options = [ Option<"numBlockDims", "gpu-block-dims", "unsigned", /*default=*/"1u", "Number of GPU block dimensions for mapping">, @@ -202,6 +218,7 @@ def ConvertAffineForToGPU : FunctionPass<"convert-affine-for-to-gpu"> { def ConvertParallelLoopToGpu : Pass<"convert-parallel-loops-to-gpu"> { let summary = "Convert mapped scf.parallel ops to gpu launch operations"; let constructor = "mlir::createParallelLoopToGpuPass()"; + let dependentDialects = ["AffineDialect", "gpu::GPUDialect"]; } //===----------------------------------------------------------------------===// @@ -212,6 +229,7 @@ def ConvertShapeToStandard : Pass<"convert-shape-to-std", "ModuleOp"> { let summary = "Convert operations from the shape dialect into the standard " "dialect"; let constructor = "mlir::createConvertShapeToStandardPass()"; + let dependentDialects = ["StandardOpsDialect"]; } //===----------------------------------------------------------------------===// @@ -221,6 +239,7 @@ def ConvertShapeToStandard : Pass<"convert-shape-to-std", "ModuleOp"> { def ConvertShapeToSCF : FunctionPass<"convert-shape-to-scf"> { let summary = "Convert operations from the shape dialect to the SCF dialect"; let constructor = "mlir::createConvertShapeToSCFPass()"; + let dependentDialects = ["scf::SCFDialect"]; } //===----------------------------------------------------------------------===// @@ -230,6 +249,7 @@ def ConvertShapeToSCF : FunctionPass<"convert-shape-to-scf"> { def ConvertSPIRVToLLVM : Pass<"convert-spirv-to-llvm", "ModuleOp"> { let summary = "Convert SPIR-V dialect to LLVM dialect"; let constructor = "mlir::createConvertSPIRVToLLVMPass()"; + let dependentDialects = ["LLVM::LLVMDialect"]; } //===----------------------------------------------------------------------===// @@ -264,6 +284,7 @@ def ConvertStandardToLLVM : Pass<"convert-std-to-llvm", "ModuleOp"> { LLVM IR types. }]; let constructor = "mlir::createLowerToLLVMPass()"; + let dependentDialects = ["LLVM::LLVMDialect"]; let options = [ Option<"useAlignedAlloc", "use-aligned-alloc", "bool", /*default=*/"false", "Use aligned_alloc in place of malloc for heap allocations">, @@ -291,11 +312,13 @@ def ConvertStandardToLLVM : Pass<"convert-std-to-llvm", "ModuleOp"> { def LegalizeStandardForSPIRV : Pass<"legalize-std-for-spirv"> { let summary = "Legalize standard ops for SPIR-V lowering"; let constructor = "mlir::createLegalizeStdOpsForSPIRVLoweringPass()"; + let dependentDialects = ["spirv::SPIRVDialect"]; } def ConvertStandardToSPIRV : Pass<"convert-std-to-spirv", "ModuleOp"> { let summary = "Convert Standard Ops to SPIR-V dialect"; let constructor = "mlir::createConvertStandardToSPIRVPass()"; + let dependentDialects = ["spirv::SPIRVDialect"]; } //===----------------------------------------------------------------------===// @@ -306,6 +329,7 @@ def ConvertVectorToSCF : FunctionPass<"convert-vector-to-scf"> { let summary = "Lower the operations from the vector dialect into the SCF " "dialect"; let constructor = "mlir::createConvertVectorToSCFPass()"; + let dependentDialects = ["AffineDialect", "scf::SCFDialect"]; let options = [ Option<"fullUnroll", "full-unroll", "bool", /*default=*/"false", "Perform full unrolling when converting vector transfers to SCF">, @@ -320,6 +344,7 @@ def ConvertVectorToLLVM : Pass<"convert-vector-to-llvm", "ModuleOp"> { let summary = "Lower the operations from the vector dialect into the LLVM " "dialect"; let constructor = "mlir::createConvertVectorToLLVMPass()"; + let dependentDialects = ["LLVM::LLVMDialect"]; let options = [ Option<"reassociateFPReductions", "reassociate-fp-reductions", "bool", /*default=*/"false", @@ -335,6 +360,7 @@ def ConvertVectorToROCDL : Pass<"convert-vector-to-rocdl", "ModuleOp"> { let summary = "Lower the operations from the vector dialect into the ROCDL " "dialect"; let constructor = "mlir::createConvertVectorToROCDLPass()"; + let dependentDialects = ["ROCDL::ROCDLDialect"]; } #endif // MLIR_CONVERSION_PASSES diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td index 810640058155fb..f43fabd19aaefe 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.td +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -94,6 +94,7 @@ def AffineLoopUnrollAndJam : FunctionPass<"affine-loop-unroll-jam"> { def AffineVectorize : FunctionPass<"affine-super-vectorize"> { let summary = "Vectorize to a target independent n-D vector abstraction"; let constructor = "mlir::createSuperVectorizePass()"; + let dependentDialects = ["vector::VectorDialect"]; let options = [ ListOption<"vectorSizes", "virtual-vector-size", "int64_t", "Specify an n-D virtual vector size for vectorization", diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h index 04700f0aa17dbb..2f465f07a97e42 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h @@ -15,6 +15,7 @@ #define MLIR_DIALECT_LLVMIR_LLVMDIALECT_H_ #include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/Function.h" #include "mlir/IR/OpDefinition.h" diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index e824f97bc28544..226743587bd9d5 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -19,6 +19,11 @@ include "mlir/IR/OpBase.td" def LLVM_Dialect : Dialect { let name = "llvm"; let cppNamespace = "LLVM"; + + /// FIXME: at the moment this is a dependency of the translation to LLVM IR, + /// not really one of this dialect per-se. + let dependentDialects = ["omp::OpenMPDialect"]; + let hasRegionArgAttrVerify = 1; let hasOperationAttrVerify = 1; let extraClassDeclaration = [{ diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h index b71964b5d0f84d..e9a62cf5bac5c2 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h @@ -64,34 +64,6 @@ class LLVMIntegerType; /// structs, the entire type is the identifier) and are thread-safe. class LLVMType : public Type { public: - enum Kind { - // Keep non-parametric types contiguous in the enum. - VoidType = FIRST_LLVM_TYPE + 1, - HalfType, - BFloatType, - FloatType, - DoubleType, - FP128Type, - X86FP80Type, - PPCFP128Type, - X86MMXType, - LabelType, - TokenType, - MetadataType, - // End of non-parametric types. - FunctionType, - IntegerType, - PointerType, - FixedVectorType, - ScalableVectorType, - ArrayType, - StructType, - FIRST_NEW_LLVM_TYPE = VoidType, - LAST_NEW_LLVM_TYPE = StructType, - FIRST_TRIVIAL_TYPE = VoidType, - LAST_TRIVIAL_TYPE = MetadataType - }; - /// Inherit base constructors. using Type::Type; @@ -256,27 +228,24 @@ class LLVMType : public Type { //===----------------------------------------------------------------------===// // Batch-define trivial types. -#define DEFINE_TRIVIAL_LLVM_TYPE(ClassName, Kind) \ +#define DEFINE_TRIVIAL_LLVM_TYPE(ClassName) \ class ClassName : public Type::TypeBase { \ public: \ using Base::Base; \ - static ClassName get(MLIRContext *context) { \ - return Base::get(context, Kind); \ - } \ } -DEFINE_TRIVIAL_LLVM_TYPE(LLVMVoidType, LLVMType::VoidType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMHalfType, LLVMType::HalfType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMBFloatType, LLVMType::BFloatType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMFloatType, LLVMType::FloatType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMDoubleType, LLVMType::DoubleType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMFP128Type, LLVMType::FP128Type); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMX86FP80Type, LLVMType::X86FP80Type); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMPPCFP128Type, LLVMType::PPCFP128Type); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMX86MMXType, LLVMType::X86MMXType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMTokenType, LLVMType::TokenType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMLabelType, LLVMType::LabelType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMMetadataType, LLVMType::MetadataType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMVoidType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMHalfType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMBFloatType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMFloatType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMDoubleType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMFP128Type); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMX86FP80Type); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMPPCFP128Type); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMX86MMXType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMTokenType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMLabelType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMMetadataType); #undef DEFINE_TRIVIAL_LLVM_TYPE diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h index 86d437c9b561b7..9cc5314bdb901f 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h @@ -14,6 +14,7 @@ #ifndef MLIR_DIALECT_LLVMIR_NVVMDIALECT_H_ #define MLIR_DIALECT_LLVMIR_NVVMDIALECT_H_ +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" #include "mlir/Interfaces/SideEffectInterfaces.h" diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 5f022e32b801d6..7d47e5012ac9a0 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -23,6 +23,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" def NVVM_Dialect : Dialect { let name = "nvvm"; let cppNamespace = "NVVM"; + let dependentDialects = ["LLVM::LLVMDialect"]; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h b/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h index bf761c357f9074..eb40373c3f1171 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h @@ -22,6 +22,7 @@ #ifndef MLIR_DIALECT_LLVMIR_ROCDLDIALECT_H_ #define MLIR_DIALECT_LLVMIR_ROCDLDIALECT_H_ +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" #include "mlir/Interfaces/SideEffectInterfaces.h" diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td index 0cd11690daa8ba..f85c4f02899b46 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -23,6 +23,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" def ROCDL_Dialect : Dialect { let name = "rocdl"; let cppNamespace = "ROCDL"; + let dependentDialects = ["LLVM::LLVMDialect"]; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTypes.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTypes.h index 17e803db82114c..18b2c3aaa53d11 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTypes.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTypes.h @@ -16,11 +16,6 @@ namespace mlir { class MLIRContext; namespace linalg { -enum LinalgTypes { - Range = Type::FIRST_LINALG_TYPE, - LAST_USED_LINALG_TYPE = Range, -}; - #include "mlir/Dialect/Linalg/IR/LinalgOpsDialect.h.inc" /// A RangeType represents a minimal range abstraction (min, max, step). @@ -36,11 +31,6 @@ class RangeType : public Type::TypeBase { public: // Used for generic hooks in TypeBase. using Base::Base; - /// Construction hook. - static RangeType get(MLIRContext *context) { - /// Custom, uniq'ed construction in the MLIRContext. - return Base::get(context, LinalgTypes::Range); - } }; } // namespace linalg diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td index 11f12ad30eb6c0..dcf4b5ec06cb6f 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -30,17 +30,20 @@ def LinalgFusion : FunctionPass<"linalg-fusion"> { def LinalgFusionOfTensorOps : Pass<"linalg-fusion-for-tensor-ops"> { let summary = "Fuse operations on RankedTensorType in linalg dialect"; let constructor = "mlir::createLinalgFusionOfTensorOpsPass()"; + let dependentDialects = ["AffineDialect"]; } def LinalgLowerToAffineLoops : FunctionPass<"convert-linalg-to-affine-loops"> { let summary = "Lower the operations from the linalg dialect into affine " "loops"; let constructor = "mlir::createConvertLinalgToAffineLoopsPass()"; + let dependentDialects = ["AffineDialect"]; } def LinalgLowerToLoops : FunctionPass<"convert-linalg-to-loops"> { let summary = "Lower the operations from the linalg dialect into loops"; let constructor = "mlir::createConvertLinalgToLoopsPass()"; + let dependentDialects = ["scf::SCFDialect", "AffineDialect"]; } def LinalgOnTensorsToBuffers : Pass<"convert-linalg-on-tensors-to-buffers", "ModuleOp"> { @@ -54,6 +57,7 @@ def LinalgLowerToParallelLoops let summary = "Lower the operations from the linalg dialect into parallel " "loops"; let constructor = "mlir::createConvertLinalgToParallelLoopsPass()"; + let dependentDialects = ["AffineDialect", "scf::SCFDialect"]; } def LinalgPromotion : FunctionPass<"linalg-promote-subviews"> { @@ -70,6 +74,9 @@ def LinalgPromotion : FunctionPass<"linalg-promote-subviews"> { def LinalgTiling : FunctionPass<"linalg-tile"> { let summary = "Tile operations in the linalg dialect"; let constructor = "mlir::createLinalgTilingPass()"; + let dependentDialects = [ + "AffineDialect", "scf::SCFDialect" + ]; let options = [ ListOption<"tileSizes", "linalg-tile-sizes", "int64_t", "Test generation of dynamic promoted buffers", @@ -86,6 +93,7 @@ def LinalgTilingToParallelLoops "Test generation of dynamic promoted buffers", "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated"> ]; + let dependentDialects = ["AffineDialect", "scf::SCFDialect"]; } #endif // MLIR_DIALECT_LINALG_PASSES diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index 794ebcbc264516..beef1a70096e67 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -198,19 +198,23 @@ enum class DistributionMethod { }; /// Callback function type used to get processor ID, and number of processors -/// used for distribution. +/// used for distribution for all parallel loops generated. struct ProcInfo { Value procId; Value nprocs; }; -using ProcInfoCallBackFn = - std::function; +using ProcInfoCallBackFn = std::function( + OpBuilder &b, Location loc, ArrayRef parallelLoopRanges)>; /// Options that allow distribution of loops generated in Linalg transforms to /// processors while generating the loops. struct LinalgLoopDistributionOptions { - /// Callback function that returns the Value for processor ID, and number of - /// processors used to execute a given loop. + /// Callback function that returns the Values for processor ID (`procId`), and + /// number of processors (`nprocs`) used to execute the parallel loops. The + /// number of `{procId, nprocs}` pairs returned must be equal to the number of + /// `parallelLoopRanges` passed into the callback, which in-turn is same as + /// the number of parallel loops for which the `distributionMethod` is + /// specified below. ProcInfoCallBackFn procInfo; /// Specification of how to distribute the `scf.parallel` loops that are /// generated. As the `scf.parallel` loop is generated, the elements of this diff --git a/mlir/include/mlir/Dialect/Quant/QuantTypes.h b/mlir/include/mlir/Dialect/Quant/QuantTypes.h index ccdc289a9a7c7b..567b63936dd371 100644 --- a/mlir/include/mlir/Dialect/Quant/QuantTypes.h +++ b/mlir/include/mlir/Dialect/Quant/QuantTypes.h @@ -31,15 +31,6 @@ struct UniformQuantizedPerAxisTypeStorage; } // namespace detail -namespace QuantizationTypes { -enum Kind { - Any = Type::FIRST_QUANTIZATION_TYPE, - UniformQuantized, - UniformQuantizedPerAxis, - LAST_USED_QUANTIZATION_TYPE = UniformQuantizedPerAxis, -}; -} // namespace QuantizationTypes - /// Enumeration of bit-mapped flags related to quantized types. namespace QuantizationFlags { enum FlagValue { diff --git a/mlir/include/mlir/Dialect/SCF/Passes.td b/mlir/include/mlir/Dialect/SCF/Passes.td index 483d0ba7c7be08..6f3cf0e1264235 100644 --- a/mlir/include/mlir/Dialect/SCF/Passes.td +++ b/mlir/include/mlir/Dialect/SCF/Passes.td @@ -36,6 +36,7 @@ def SCFParallelLoopTiling : FunctionPass<"parallel-loop-tiling"> { "Factors to tile parallel loops by", "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated"> ]; + let dependentDialects = ["AffineDialect"]; } #endif // MLIR_DIALECT_SCF_PASSES diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVAttributes.h b/mlir/include/mlir/Dialect/SPIRV/SPIRVAttributes.h index 6788d5952cd47b..b1909b36755358 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVAttributes.h +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVAttributes.h @@ -32,15 +32,6 @@ struct TargetEnvAttributeStorage; struct VerCapExtAttributeStorage; } // namespace detail -/// SPIR-V dialect-specific attribute kinds. -namespace AttrKind { -enum Kind { - InterfaceVarABI = Attribute::FIRST_SPIRV_ATTR, /// Interface var ABI - TargetEnv, /// Target environment - VerCapExt, /// (version, extension, capability) triple -}; -} // namespace AttrKind - /// An attribute that specifies the information regarding the interface /// variable: descriptor set, binding, storage class. class InterfaceVarABIAttr diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h b/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h index a9d120b5d114a0..2d224effdee356 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h @@ -65,19 +65,6 @@ struct StructTypeStorage; } // namespace detail -namespace TypeKind { -enum Kind { - Array = Type::FIRST_SPIRV_TYPE, - CooperativeMatrix, - Image, - Matrix, - Pointer, - RuntimeArray, - Struct, - LAST_SPIRV_TYPE = Struct, -}; -} - // Base SPIR-V type for providing availability queries. class SPIRVType : public Type { public: diff --git a/mlir/include/mlir/Dialect/Shape/IR/Shape.h b/mlir/include/mlir/Dialect/Shape/IR/Shape.h index 3168e87b3df005..cc601bdedaca60 100644 --- a/mlir/include/mlir/Dialect/Shape/IR/Shape.h +++ b/mlir/include/mlir/Dialect/Shape/IR/Shape.h @@ -29,56 +29,28 @@ namespace shape { /// Alias type for extent tensors. RankedTensorType getExtentTensorType(MLIRContext *ctx); -namespace ShapeTypes { -enum Kind { - Component = Type::FIRST_SHAPE_TYPE, - Element, - Shape, - Size, - ValueShape, - Witness, - LAST_SHAPE_TYPE = Witness -}; -} // namespace ShapeTypes - /// The component type corresponding to shape, element type and attribute. class ComponentType : public Type::TypeBase { public: using Base::Base; - - static ComponentType get(MLIRContext *context) { - return Base::get(context, ShapeTypes::Kind::Component); - } }; /// The element type of the shaped type. class ElementType : public Type::TypeBase { public: using Base::Base; - - static ElementType get(MLIRContext *context) { - return Base::get(context, ShapeTypes::Kind::Element); - } }; /// The shape descriptor type represents rank and dimension sizes. class ShapeType : public Type::TypeBase { public: using Base::Base; - - static ShapeType get(MLIRContext *context) { - return Base::get(context, ShapeTypes::Kind::Shape); - } }; /// The type of a single dimension. class SizeType : public Type::TypeBase { public: using Base::Base; - - static SizeType get(MLIRContext *context) { - return Base::get(context, ShapeTypes::Kind::Size); - } }; /// The ValueShape represents a (potentially unknown) runtime value and shape. @@ -86,10 +58,6 @@ class ValueShapeType : public Type::TypeBase { public: using Base::Base; - - static ValueShapeType get(MLIRContext *context) { - return Base::get(context, ShapeTypes::Kind::ValueShape); - } }; /// The Witness represents a runtime constraint, to be used as shape related @@ -97,10 +65,6 @@ class ValueShapeType class WitnessType : public Type::TypeBase { public: using Base::Base; - - static WitnessType get(MLIRContext *context) { - return Base::get(context, ShapeTypes::Kind::Witness); - } }; #define GET_OP_CLASSES diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 088f262790d6c3..510d485d019f18 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -814,6 +814,39 @@ def CeilFOp : FloatUnaryOp<"ceilf"> { }]; } +//===----------------------------------------------------------------------===// +// FloorFOp +//===----------------------------------------------------------------------===// + +def FloorFOp : FloatUnaryOp<"floorf"> { + let summary = "floor of the specified value"; + let description = [{ + Syntax: + + ``` + operation ::= ssa-id `=` `std.floorf` ssa-use `:` type + ``` + + The `floorf` operation computes the floor of a given value. It takes one + operand and returns one result of the same type. This type may be a float + scalar type, a vector whose element type is float, or a tensor of floats. + It has no standard attributes. + + Example: + + ```mlir + // Scalar floor value. + %a = floorf %b : f64 + + // SIMD vector element-wise floor value. + %f = floorf %g : vector<4xf32> + + // Tensor element-wise floor value. + %x = floorf %y : tensor<4x?xf8> + ``` + }]; +} + //===----------------------------------------------------------------------===// // CmpFOp //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h index d946c7591c2a05..dd4960a02c5c67 100644 --- a/mlir/include/mlir/IR/AffineMap.h +++ b/mlir/include/mlir/IR/AffineMap.h @@ -196,6 +196,14 @@ class AffineMap { friend ::llvm::hash_code hash_value(AffineMap arg); + /// Methods supporting C API. + const void *getAsOpaquePointer() const { + return static_cast(map); + } + static AffineMap getFromOpaquePointer(const void *pointer) { + return AffineMap(reinterpret_cast(const_cast(pointer))); + } + private: ImplType *map; diff --git a/mlir/include/mlir/IR/AttributeSupport.h b/mlir/include/mlir/IR/AttributeSupport.h index 31e6285164ab21..35084a20493f58 100644 --- a/mlir/include/mlir/IR/AttributeSupport.h +++ b/mlir/include/mlir/IR/AttributeSupport.h @@ -137,15 +137,23 @@ namespace detail { // MLIRContext. This class manages all creation and uniquing of attributes. class AttributeUniquer { public: - /// Get an uniqued instance of attribute T. + /// Get an uniqued instance of a parametric attribute T. template - static T get(MLIRContext *ctx, unsigned kind, Args &&... args) { + static typename std::enable_if_t< + !std::is_same::value, T> + get(MLIRContext *ctx, Args &&...args) { return ctx->getAttributeUniquer().get( - T::getTypeID(), [ctx](AttributeStorage *storage) { initializeAttributeStorage(storage, ctx, T::getTypeID()); }, - kind, std::forward(args)...); + T::getTypeID(), std::forward(args)...); + } + /// Get an uniqued instance of a singleton attribute T. + template + static typename std::enable_if_t< + std::is_same::value, T> + get(MLIRContext *ctx) { + return ctx->getAttributeUniquer().get(T::getTypeID()); } template @@ -156,6 +164,26 @@ class AttributeUniquer { std::forward(args)...); } + /// Register a parametric attribute instance T with the uniquer. + template + static typename std::enable_if_t< + !std::is_same::value> + registerAttribute(MLIRContext *ctx) { + ctx->getAttributeUniquer() + .registerParametricStorageType(T::getTypeID()); + } + /// Register a singleton attribute instance T with the uniquer. + template + static typename std::enable_if_t< + std::is_same::value> + registerAttribute(MLIRContext *ctx) { + ctx->getAttributeUniquer() + .registerSingletonStorageType( + T::getTypeID(), [ctx](AttributeStorage *storage) { + initializeAttributeStorage(storage, ctx, T::getTypeID()); + }); + } + private: /// Initialize the given attribute storage instance. static void initializeAttributeStorage(AttributeStorage *storage, diff --git a/mlir/include/mlir/IR/Attributes.h b/mlir/include/mlir/IR/Attributes.h index 75ac2adc302c1b..aa8f2eafb896b0 100644 --- a/mlir/include/mlir/IR/Attributes.h +++ b/mlir/include/mlir/IR/Attributes.h @@ -54,14 +54,6 @@ struct SparseElementsAttributeStorage; /// passed by value. class Attribute { public: - /// Integer identifier for all the concrete attribute kinds. - enum Kind { - // Reserve attribute kinds for dialect specific extensions. -#define DEFINE_SYM_KIND_RANGE(Dialect) \ - FIRST_##Dialect##_ATTR, LAST_##Dialect##_ATTR = FIRST_##Dialect##_ATTR + 0xff, -#include "DialectSymbolRegistry.def" - }; - /// Utility class for implementing attributes. template class... Traits> @@ -94,9 +86,6 @@ class Attribute { // Support dyn_cast'ing Attribute to itself. static bool classof(Attribute) { return true; } - /// Return the classification for this attribute. - unsigned getKind() const { return impl->getKind(); } - /// Return a unique identifier for the concrete attribute type. This is used /// to support dynamic type casting. TypeID getTypeID() { return impl->getAbstractAttribute().getTypeID(); } @@ -173,54 +162,6 @@ class AttributeInterface friend InterfaceBase; }; -//===----------------------------------------------------------------------===// -// StandardAttributes -//===----------------------------------------------------------------------===// - -namespace StandardAttributes { -enum Kind { - AffineMap = Attribute::FIRST_STANDARD_ATTR, - Array, - Dictionary, - Float, - Integer, - IntegerSet, - Opaque, - String, - SymbolRef, - Type, - Unit, - - /// Elements Attributes. - DenseIntOrFPElements, - DenseStringElements, - OpaqueElements, - SparseElements, - FIRST_ELEMENTS_ATTR = DenseIntOrFPElements, - LAST_ELEMENTS_ATTR = SparseElements, - - /// Locations. - CallSiteLocation, - FileLineColLocation, - FusedLocation, - NameLocation, - OpaqueLocation, - UnknownLocation, - - // Represents a location as a 'void*' pointer to a front-end's opaque - // location information, which must live longer than the MLIR objects that - // refer to it. OpaqueLocation's are never serialized. - // - // TODO: OpaqueLocation, - - // Represents a value inlined through a function call. - // TODO: InlinedLocation, - - FIRST_LOCATION_ATTR = CallSiteLocation, - LAST_LOCATION_ATTR = UnknownLocation, -}; -} // namespace StandardAttributes - //===----------------------------------------------------------------------===// // AffineMapAttr //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/Dialect.h b/mlir/include/mlir/IR/Dialect.h index 4f9e4cb3618b65..0fe1a7f29724d5 100644 --- a/mlir/include/mlir/IR/Dialect.h +++ b/mlir/include/mlir/IR/Dialect.h @@ -16,6 +16,8 @@ #include "mlir/IR/OperationSupport.h" #include "mlir/Support/TypeID.h" +#include + namespace mlir { class DialectAsmParser; class DialectAsmPrinter; @@ -23,7 +25,7 @@ class DialectInterface; class OpBuilder; class Type; -using DialectAllocatorFunction = std::function; +using DialectAllocatorFunction = std::function; /// Dialects are groups of MLIR operations and behavior associated with the /// entire group. For example, hooks into other systems for constant folding, @@ -154,21 +156,15 @@ class Dialect { void addOperation(AbstractOperation opInfo); - /// This method is used by derived classes to add their types to the set. + /// Register a set of type classes with this dialect. template void addTypes() { - (void)std::initializer_list{ - 0, (addType(Args::getTypeID(), AbstractType::get(*this)), 0)...}; + (void)std::initializer_list{0, (addType(), 0)...}; } - void addType(TypeID typeID, AbstractType &&typeInfo); - /// This method is used by derived classes to add their attributes to the set. + /// Register a set of attribute classes with this dialect. template void addAttributes() { - (void)std::initializer_list{ - 0, - (addAttribute(Args::getTypeID(), AbstractAttribute::get(*this)), - 0)...}; + (void)std::initializer_list{0, (addAttribute(), 0)...}; } - void addAttribute(TypeID typeID, AbstractAttribute &&attrInfo); /// Enable support for unregistered operations. void allowUnknownOperations(bool allow = true) { unknownOpsAllowed = allow; } @@ -189,6 +185,22 @@ class Dialect { Dialect(const Dialect &) = delete; void operator=(Dialect &) = delete; + /// Register an attribute instance with this dialect. + template void addAttribute() { + // Add this attribute to the dialect and register it with the uniquer. + addAttribute(T::getTypeID(), AbstractAttribute::get(*this)); + detail::AttributeUniquer::registerAttribute(context); + } + void addAttribute(TypeID typeID, AbstractAttribute &&attrInfo); + + /// Register a type instance with this dialect. + template void addType() { + // Add this type to the dialect and register it with the uniquer. + addType(T::getTypeID(), AbstractType::get(*this)); + detail::TypeUniquer::registerType(context); + } + void addType(TypeID typeID, AbstractType &&typeInfo); + /// The namespace of this dialect. StringRef name; @@ -212,30 +224,87 @@ class Dialect { /// A collection of registered dialect interfaces. DenseMap> registeredInterfaces; - /// Registers a specific dialect creation function with the global registry. - /// Used through the registerDialect template. - /// Registrations are deduplicated by dialect TypeID and only the first - /// registration will be used. - static void - registerDialectAllocator(TypeID typeID, - const DialectAllocatorFunction &function); - template friend void registerDialect(); friend class MLIRContext; }; -/// Registers all dialects and hooks from the global registries with the -/// specified MLIRContext. +/// The DialectRegistry maps a dialect namespace to a constructor for the +/// matching dialect. +/// This allows for decoupling the list of dialects "available" from the +/// dialects loaded in the Context. The parser in particular will lazily load +/// dialects in in the Context as operations are encountered. +class DialectRegistry { + using MapTy = + std::map>; + +public: + template + void insert() { + insert(TypeID::get(), + ConcreteDialect::getDialectNamespace(), + static_cast(([](MLIRContext *ctx) { + // Just allocate the dialect, the context + // takes ownership of it. + return ctx->getOrLoadDialect(); + }))); + } + + template + void insert() { + insert(); + insert(); + } + + /// Add a new dialect constructor to the registry. + void insert(TypeID typeID, StringRef name, DialectAllocatorFunction ctor); + + /// Load a dialect for this namespace in the provided context. + Dialect *loadByName(StringRef name, MLIRContext *context); + + // Register all dialects available in the current registry with the registry + // in the provided context. + void appendTo(DialectRegistry &destination) { + for (const auto &nameAndRegistrationIt : registry) + destination.insert(nameAndRegistrationIt.second.first, + nameAndRegistrationIt.first, + nameAndRegistrationIt.second.second); + } + // Load all dialects available in the registry in the provided context. + void loadAll(MLIRContext *context) { + for (const auto &nameAndRegistrationIt : registry) + nameAndRegistrationIt.second.second(context); + } + + MapTy::const_iterator begin() const { return registry.begin(); } + MapTy::const_iterator end() const { return registry.end(); } + +private: + MapTy registry; +}; + +/// Deprecated: this provides a global registry for convenience, while we're +/// transitionning the registration mechanism to a stateless approach. +DialectRegistry &getGlobalDialectRegistry(); + +/// Registers all dialects from the global registries with the +/// specified MLIRContext. This won't load the dialects in the context, +/// but only make them available for lazy loading by name. /// Note: This method is not thread-safe. void registerAllDialects(MLIRContext *context); +/// Register and return the dialect with the given namespace in the provided +/// context. Returns nullptr is there is no constructor registered for this +/// dialect. +inline Dialect *registerDialect(StringRef name, MLIRContext *context) { + return getGlobalDialectRegistry().loadByName(name, context); +} + /// Utility to register a dialect. Client can register their dialect with the /// global registry by calling registerDialect(); /// Note: This method is not thread-safe. template void registerDialect() { - Dialect::registerDialectAllocator( - TypeID::get(), - [](MLIRContext *ctx) { ctx->getOrCreateDialect(); }); + getGlobalDialectRegistry().insert(); } /// DialectRegistration provides a global initializer that registers a Dialect diff --git a/mlir/include/mlir/IR/DialectSymbolRegistry.def b/mlir/include/mlir/IR/DialectSymbolRegistry.def deleted file mode 100644 index acba383e911354..00000000000000 --- a/mlir/include/mlir/IR/DialectSymbolRegistry.def +++ /dev/null @@ -1,44 +0,0 @@ -//===- DialectSymbolRegistry.def - MLIR Dialect Symbol Registry -*- C++ -*-===// -// -// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file enumerates the different dialects that define custom classes -// within the attribute or type system. -// -//===----------------------------------------------------------------------===// - -DEFINE_SYM_KIND_RANGE(STANDARD) -DEFINE_SYM_KIND_RANGE(TENSORFLOW_CONTROL) -DEFINE_SYM_KIND_RANGE(TENSORFLOW_EXECUTOR) -DEFINE_SYM_KIND_RANGE(TENSORFLOW) -DEFINE_SYM_KIND_RANGE(LLVM) -DEFINE_SYM_KIND_RANGE(QUANTIZATION) -DEFINE_SYM_KIND_RANGE(IREE) // IREE stands for IR Execution Engine -DEFINE_SYM_KIND_RANGE(LINALG) // Linear Algebra Dialect -DEFINE_SYM_KIND_RANGE(FIR) // Flang Fortran IR Dialect -DEFINE_SYM_KIND_RANGE(OPENACC) // OpenACC IR Dialect -DEFINE_SYM_KIND_RANGE(OPENMP) // OpenMP IR Dialect -DEFINE_SYM_KIND_RANGE(TOY) // Toy language (tutorial) Dialect -DEFINE_SYM_KIND_RANGE(SPIRV) // SPIR-V dialect -DEFINE_SYM_KIND_RANGE(XLA_HLO) // XLA HLO dialect -DEFINE_SYM_KIND_RANGE(SHAPE) // Shape dialect -DEFINE_SYM_KIND_RANGE(TF_FRAMEWORK) // TF Framework dialect - -// The following ranges are reserved for experimenting with MLIR dialects in a -// private context without having to register them here. -DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_0) -DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_1) -DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_2) -DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_3) -DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_4) -DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_5) -DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_6) -DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_7) -DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_8) -DEFINE_SYM_KIND_RANGE(PRIVATE_EXPERIMENTAL_9) - -#undef DEFINE_SYM_KIND_RANGE diff --git a/mlir/include/mlir/IR/FunctionSupport.h b/mlir/include/mlir/IR/FunctionSupport.h index 7e281f393af946..3d467cd4f3642f 100644 --- a/mlir/include/mlir/IR/FunctionSupport.h +++ b/mlir/include/mlir/IR/FunctionSupport.h @@ -428,7 +428,7 @@ LogicalResult FunctionLike::verifyTrait(Operation *op) { if (!attr.first.strref().contains('.')) return funcOp.emitOpError("arguments may only have dialect attributes"); auto dialectNamePair = attr.first.strref().split('.'); - if (auto *dialect = ctx->getRegisteredDialect(dialectNamePair.first)) { + if (auto *dialect = ctx->getLoadedDialect(dialectNamePair.first)) { if (failed(dialect->verifyRegionArgAttribute(op, /*regionIndex=*/0, /*argIndex=*/i, attr))) return failure(); @@ -444,7 +444,7 @@ LogicalResult FunctionLike::verifyTrait(Operation *op) { if (!attr.first.strref().contains('.')) return funcOp.emitOpError("results may only have dialect attributes"); auto dialectNamePair = attr.first.strref().split('.'); - if (auto *dialect = ctx->getRegisteredDialect(dialectNamePair.first)) { + if (auto *dialect = ctx->getLoadedDialect(dialectNamePair.first)) { if (failed(dialect->verifyRegionResultAttribute(op, /*regionIndex=*/0, /*resultIndex=*/i, attr))) diff --git a/mlir/include/mlir/IR/MLIRContext.h b/mlir/include/mlir/IR/MLIRContext.h index 0192a8ae06af87..e8a5d6e6d2368b 100644 --- a/mlir/include/mlir/IR/MLIRContext.h +++ b/mlir/include/mlir/IR/MLIRContext.h @@ -19,10 +19,12 @@ namespace mlir { class AbstractOperation; class DiagnosticEngine; class Dialect; +class DialectRegistry; class InFlightDiagnostic; class Location; class MLIRContextImpl; class StorageUniquer; +DialectRegistry &getGlobalDialectRegistry(); /// MLIRContext is the top-level object for a collection of MLIR modules. It /// holds immortal uniqued objects like types, and the tables used to unique @@ -34,34 +36,69 @@ class StorageUniquer; /// class MLIRContext { public: - explicit MLIRContext(); + /// Create a new Context. + /// The loadAllDialects parameters allows to load all dialects from the global + /// registry on Context construction. It is deprecated and will be removed + /// soon. + explicit MLIRContext(bool loadAllDialects = true); ~MLIRContext(); - /// Return information about all registered IR dialects. - std::vector getRegisteredDialects(); + /// Return information about all IR dialects loaded in the context. + std::vector getLoadedDialects(); + + /// Return the dialect registry associated with this context. + DialectRegistry &getDialectRegistry(); + + /// Return information about all available dialects in the registry in this + /// context. + std::vector getAvailableDialects(); /// Get a registered IR dialect with the given namespace. If an exact match is /// not found, then return nullptr. - Dialect *getRegisteredDialect(StringRef name); + Dialect *getLoadedDialect(StringRef name); /// Get a registered IR dialect for the given derived dialect type. The /// derived type must provide a static 'getDialectNamespace' method. - template T *getRegisteredDialect() { - return static_cast(getRegisteredDialect(T::getDialectNamespace())); + template + T *getLoadedDialect() { + return static_cast(getLoadedDialect(T::getDialectNamespace())); } /// Get (or create) a dialect for the given derived dialect type. The derived /// type must provide a static 'getDialectNamespace' method. template - T *getOrCreateDialect() { - return static_cast(getOrCreateDialect( - T::getDialectNamespace(), TypeID::get(), [this]() { + T *getOrLoadDialect() { + return static_cast( + getOrLoadDialect(T::getDialectNamespace(), TypeID::get(), [this]() { std::unique_ptr dialect(new T(this)); - dialect->dialectID = TypeID::get(); return dialect; })); } + /// Load a dialect in the context. + template + void loadDialect() { + getOrLoadDialect(); + } + + /// Load a list dialects in the context. + template + void loadDialect() { + getOrLoadDialect(); + loadDialect(); + } + + /// Deprecated: load all globally registered dialects into this context. + /// This method will be removed soon, it can be used temporarily as we're + /// phasing out the global registry. + void loadAllGloballyRegisteredDialects(); + + /// Get (or create) a dialect for the given derived dialect name. + /// The dialect will be loaded from the registry if no dialect is found. + /// If no dialect is loaded for this name and none is available in the + /// registry, returns nullptr. + Dialect *getOrLoadDialect(StringRef name); + /// Return true if we allow to create operation for unregistered dialects. bool allowsUnregisteredDialects(); @@ -123,10 +160,12 @@ class MLIRContext { const std::unique_ptr impl; /// Get a dialect for the provided namespace and TypeID: abort the program if - /// a dialect exist for this namespace with different TypeID. Returns a - /// pointer to the dialect owned by the context. - Dialect *getOrCreateDialect(StringRef dialectNamespace, TypeID dialectID, - function_ref()> ctor); + /// a dialect exist for this namespace with different TypeID. If a dialect has + /// not been loaded for this namespace/TypeID yet, use the provided ctor to + /// create one on the fly and load it. Returns a pointer to the dialect owned + /// by the context. + Dialect *getOrLoadDialect(StringRef dialectNamespace, TypeID dialectID, + function_ref()> ctor); MLIRContext(const MLIRContext &) = delete; void operator=(const MLIRContext &) = delete; diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index 9cc57a61728949..a28410f028d5f0 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -244,6 +244,11 @@ class Dialect { // The description of the dialect. string description = ?; + // A list of dialects this dialect will load on construction as dependencies. + // These are dialects that this dialect may involved in canonicalization + // pattern or interfaces. + list dependentDialects = []; + // The C++ namespace that ops of this dialect should be placed into. // // By default, uses the name of the dialect as the only namespace. To avoid diff --git a/mlir/include/mlir/IR/OpImplementation.h b/mlir/include/mlir/IR/OpImplementation.h index 0124ef5f7c0a4e..df54919ade1e49 100644 --- a/mlir/include/mlir/IR/OpImplementation.h +++ b/mlir/include/mlir/IR/OpImplementation.h @@ -756,7 +756,7 @@ class OpAsmDialectInterface /// all attributes of the given kind in the form : [0-9]+. These /// aliases must not contain `.`. virtual void getAttributeKindAliases( - SmallVectorImpl> &aliases) const {} + SmallVectorImpl> &aliases) const {} /// Hook for defining Attribute aliases. These aliases must not contain `.` or /// end with a numeric digit([0-9]+). virtual void getAttributeAliases( diff --git a/mlir/include/mlir/IR/StandardTypes.h b/mlir/include/mlir/IR/StandardTypes.h index 6ceddec5337749..e309595415d14c 100644 --- a/mlir/include/mlir/IR/StandardTypes.h +++ b/mlir/include/mlir/IR/StandardTypes.h @@ -38,33 +38,6 @@ struct TupleTypeStorage; } // namespace detail -namespace StandardTypes { -enum Kind { - // Floating point. - BF16 = Type::Kind::FIRST_STANDARD_TYPE, - F16, - F32, - F64, - FIRST_FLOATING_POINT_TYPE = BF16, - LAST_FLOATING_POINT_TYPE = F64, - - // Target pointer sized integer, used (e.g.) in affine mappings. - Index, - - // Derived types. - Integer, - Vector, - RankedTensor, - UnrankedTensor, - MemRef, - UnrankedMemRef, - Complex, - Tuple, - None, -}; - -} // namespace StandardTypes - //===----------------------------------------------------------------------===// // ComplexType //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/StorageUniquerSupport.h b/mlir/include/mlir/IR/StorageUniquerSupport.h index 48026c219082a5..75bc40abdaeff9 100644 --- a/mlir/include/mlir/IR/StorageUniquerSupport.h +++ b/mlir/include/mlir/IR/StorageUniquerSupport.h @@ -82,29 +82,29 @@ class StorageUserBase : public BaseT, public Traits... { return detail::InterfaceMap::template get...>(); } -protected: /// Get or create a new ConcreteT instance within the ctx. This /// function is guaranteed to return a non null object and will assert if /// the arguments provided are invalid. template - static ConcreteT get(MLIRContext *ctx, unsigned kind, Args... args) { + static ConcreteT get(MLIRContext *ctx, Args... args) { // Ensure that the invariants are correct for construction. assert(succeeded(ConcreteT::verifyConstructionInvariants( generateUnknownStorageLocation(ctx), args...))); - return UniquerT::template get(ctx, kind, args...); + return UniquerT::template get(ctx, args...); } /// Get or create a new ConcreteT instance within the ctx, defined at /// the given, potentially unknown, location. If the arguments provided are /// invalid then emit errors and return a null object. template - static ConcreteT getChecked(LocationT loc, unsigned kind, Args... args) { + static ConcreteT getChecked(LocationT loc, Args... args) { // If the construction invariants fail then we return a null attribute. if (failed(ConcreteT::verifyConstructionInvariants(loc, args...))) return ConcreteT(); - return UniquerT::template get(loc.getContext(), kind, args...); + return UniquerT::template get(loc.getContext(), args...); } +protected: /// Mutate the current storage instance. This will not change the unique key. /// The arguments are forwarded to 'ConcreteT::mutate'. template LogicalResult mutate(Args &&...args) { diff --git a/mlir/include/mlir/IR/TypeSupport.h b/mlir/include/mlir/IR/TypeSupport.h index aa2daefd26c462..ace5eaa733454d 100644 --- a/mlir/include/mlir/IR/TypeSupport.h +++ b/mlir/include/mlir/IR/TypeSupport.h @@ -121,15 +121,23 @@ namespace detail { /// A utility class to get, or create, unique instances of types within an /// MLIRContext. This class manages all creation and uniquing of types. struct TypeUniquer { - /// Get an uniqued instance of a type T. + /// Get an uniqued instance of a parametric type T. template - static T get(MLIRContext *ctx, unsigned kind, Args &&... args) { + static typename std::enable_if_t< + !std::is_same::value, T> + get(MLIRContext *ctx, Args &&...args) { return ctx->getTypeUniquer().get( - T::getTypeID(), [&](TypeStorage *storage) { storage->initialize(AbstractType::lookup(T::getTypeID(), ctx)); }, - kind, std::forward(args)...); + T::getTypeID(), std::forward(args)...); + } + /// Get an uniqued instance of a singleton type T. + template + static typename std::enable_if_t< + std::is_same::value, T> + get(MLIRContext *ctx) { + return ctx->getTypeUniquer().get(T::getTypeID()); } /// Change the mutable component of the given type instance in the provided @@ -141,6 +149,25 @@ struct TypeUniquer { return ctx->getTypeUniquer().mutate(T::getTypeID(), impl, std::forward(args)...); } + + /// Register a parametric type instance T with the uniquer. + template + static typename std::enable_if_t< + !std::is_same::value> + registerType(MLIRContext *ctx) { + ctx->getTypeUniquer().registerParametricStorageType( + T::getTypeID()); + } + /// Register a singleton type instance T with the uniquer. + template + static typename std::enable_if_t< + std::is_same::value> + registerType(MLIRContext *ctx) { + ctx->getTypeUniquer().registerSingletonStorageType( + T::getTypeID(), [&](TypeStorage *storage) { + storage->initialize(AbstractType::lookup(T::getTypeID(), ctx)); + }); + } }; } // namespace detail diff --git a/mlir/include/mlir/IR/Types.h b/mlir/include/mlir/IR/Types.h index 8101690daeb648..ad7e436068bc54 100644 --- a/mlir/include/mlir/IR/Types.h +++ b/mlir/include/mlir/IR/Types.h @@ -34,11 +34,11 @@ struct OpaqueTypeStorage; /// /// Some types are "primitives" meaning they do not have any parameters, for /// example the Index type. Parametric types have additional information that -/// differentiates the types of the same kind between them, for example the -/// Integer type has bitwidth, making i8 and i16 belong to the same kind by be -/// different instances of the IntegerType. Type parameters are part of the -/// unique immutable key. The mutable component of the type can be modified -/// after the type is created, but cannot affect the identity of the type. +/// differentiates the types of the same class, for example the Integer type has +/// bitwidth, making i8 and i16 belong to the same kind by be different +/// instances of the IntegerType. Type parameters are part of the unique +/// immutable key. The mutable component of the type can be modified after the +/// type is created, but cannot affect the identity of the type. /// /// Types are constructed and uniqued via the 'detail::TypeUniquer' class. /// @@ -53,20 +53,19 @@ struct OpaqueTypeStorage; /// * This method is expected to return failure if a type cannot be /// constructed with 'args', success otherwise. /// * 'args' must correspond with the arguments passed into the -/// 'TypeBase::get' call after the type kind. +/// 'TypeBase::get' call. /// /// /// Type storage objects inherit from TypeStorage and contain the following: -/// - The type kind (for LLVM-style RTTI). /// - The dialect that defined the type. /// - Any parameters of the type. /// - An optional mutable component. /// For non-parametric types, a convenience DefaultTypeStorage is provided. /// Parametric storage types must derive TypeStorage and respect the following: /// - Define a type alias, KeyTy, to a type that uniquely identifies the -/// instance of the type within its kind. +/// instance of the type. /// * The key type must be constructible from the values passed into the -/// detail::TypeUniquer::get call after the type kind. +/// detail::TypeUniquer::get call. /// * If the KeyTy does not have an llvm::DenseMapInfo specialization, the /// storage class must define a hashing method: /// 'static unsigned hashKey(const KeyTy &)' @@ -84,23 +83,6 @@ struct OpaqueTypeStorage; // the key. class Type { public: - /// Integer identifier for all the concrete type kinds. - /// Note: This is not an enum class as each dialect will likely define a - /// separate enumeration for the specific types that they define. Not being an - /// enum class also simplifies the handling of type kinds by not requiring - /// casts for each use. - enum Kind { - // Builtin types. - Function, - Opaque, - LAST_BUILTIN_TYPE = Opaque, - - // Reserve type kinds for dialect specific type system extensions. -#define DEFINE_SYM_KIND_RANGE(Dialect) \ - FIRST_##Dialect##_TYPE, LAST_##Dialect##_TYPE = FIRST_##Dialect##_TYPE + 0xff, -#include "DialectSymbolRegistry.def" - }; - /// Utility class for implementing types. template class... Traits> @@ -136,9 +118,6 @@ class Type { /// dynamic type casting. TypeID getTypeID() { return impl->getAbstractType().getTypeID(); } - /// Return the classification for this type. - unsigned getKind() const; - /// Return the LLVMContext in which this type was uniqued. MLIRContext *getContext() const; diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h index b76b26fe348346..147ececc4c5a96 100644 --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -35,30 +35,35 @@ namespace mlir { +// Add all the MLIR dialects to the provided registry. +inline void registerAllDialects(DialectRegistry ®istry) { + // clang-format off + registry.insert(); + // clang-format on +} + // This function should be called before creating any MLIRContext if one expect // all the possible dialects to be made available to the context automatically. inline void registerAllDialects() { - static bool init_once = []() { - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - return true; - }(); - (void)init_once; + static bool initOnce = + ([]() { registerAllDialects(getGlobalDialectRegistry()); }(), true); + (void)initOnce; } } // namespace mlir diff --git a/mlir/include/mlir/InitAllTranslations.h b/mlir/include/mlir/InitAllTranslations.h index 31ca0254cf8999..a1771dab144c04 100644 --- a/mlir/include/mlir/InitAllTranslations.h +++ b/mlir/include/mlir/InitAllTranslations.h @@ -28,7 +28,7 @@ void registerAVX512ToLLVMIRTranslation(); // expects all the possible translations to be made available to the context // automatically. inline void registerAllTranslations() { - static bool init_once = []() { + static bool initOnce = []() { registerFromLLVMIRTranslation(); registerFromSPIRVTranslation(); registerToLLVMIRTranslation(); @@ -38,7 +38,7 @@ inline void registerAllTranslations() { registerAVX512ToLLVMIRTranslation(); return true; }(); - (void)init_once; + (void)initOnce; } } // namespace mlir diff --git a/mlir/include/mlir/Pass/Pass.h b/mlir/include/mlir/Pass/Pass.h index 8de31d9443190d..cd4c06acd070b4 100644 --- a/mlir/include/mlir/Pass/Pass.h +++ b/mlir/include/mlir/Pass/Pass.h @@ -9,6 +9,7 @@ #ifndef MLIR_PASS_PASS_H #define MLIR_PASS_PASS_H +#include "mlir/IR/Dialect.h" #include "mlir/IR/Function.h" #include "mlir/Pass/AnalysisManager.h" #include "mlir/Pass/PassRegistry.h" @@ -57,6 +58,13 @@ class Pass { /// Returns the derived pass name. virtual StringRef getName() const = 0; + /// Register dependent dialects for the current pass. + /// A pass is expected to register the dialects it will create entities for + /// (Operations, Types, Attributes), other than dialect that exists in the + /// input. For example, a pass that converts from Linalg to Affine would + /// register the Affine dialect but does not need to register Linalg. + virtual void getDependentDialects(DialectRegistry ®istry) const {} + /// Returns the command line argument used when registering this pass. Return /// an empty string if one does not exist. virtual StringRef getArgument() const { diff --git a/mlir/include/mlir/Pass/PassBase.td b/mlir/include/mlir/Pass/PassBase.td index 54b44031559e72..7a2feff4fe0454 100644 --- a/mlir/include/mlir/Pass/PassBase.td +++ b/mlir/include/mlir/Pass/PassBase.td @@ -78,6 +78,9 @@ class PassBase { // A C++ constructor call to create an instance of this pass. code constructor = [{}]; + // A list of dialects this pass may produce entities in. + list dependentDialects = []; + // A set of options provided by this pass. list