From 5627ae6c507d62ef52d30fe80a0120f2ee033123 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 9 Oct 2020 15:17:26 +0200 Subject: [PATCH 01/16] [clangd] Support CodeActionParams.only Differential Revision: https://reviews.llvm.org/D89126 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 28 +++++++----- clang-tools-extra/clangd/Protocol.cpp | 5 ++- clang-tools-extra/clangd/Protocol.h | 13 +++++- .../clangd/test/code-action-request.test | 43 ++++++++++++++++++- 4 files changed, 76 insertions(+), 13 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 99c2465a579c01..3164b6cbfb146e 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -993,12 +993,24 @@ void ClangdLSPServer::onCodeAction(const CodeActionParams &Params, if (!Code) return Reply(llvm::make_error( "onCodeAction called for non-added file", ErrorCode::InvalidParams)); + + // Checks whether a particular CodeActionKind is included in the response. + auto KindAllowed = [Only(Params.context.only)](llvm::StringRef Kind) { + if (Only.empty()) + return true; + return llvm::any_of(Only, [&](llvm::StringRef Base) { + return Kind.consume_front(Base) && (Kind.empty() || Kind.startswith(".")); + }); + }; + // We provide a code action for Fixes on the specified diagnostics. std::vector FixIts; - for (const Diagnostic &D : Params.context.diagnostics) { - for (auto &F : getFixes(File.file(), D)) { - FixIts.push_back(toCodeAction(F, Params.textDocument.uri)); - FixIts.back().diagnostics = {D}; + if (KindAllowed(CodeAction::QUICKFIX_KIND)) { + for (const Diagnostic &D : Params.context.diagnostics) { + for (auto &F : getFixes(File.file(), D)) { + FixIts.push_back(toCodeAction(F, Params.textDocument.uri)); + FixIts.back().diagnostics = {D}; + } } } @@ -1038,14 +1050,10 @@ void ClangdLSPServer::onCodeAction(const CodeActionParams &Params, } return Reply(llvm::json::Array(Commands)); }; - Server->enumerateTweaks( File.file(), Params.range, - [&](const Tweak &T) { - if (!Opts.TweakFilter(T)) - return false; - // FIXME: also consider CodeActionContext.only - return true; + [this, KindAllowed(std::move(KindAllowed))](const Tweak &T) { + return Opts.TweakFilter(T) && KindAllowed(T.kind()); }, std::move(ConsumeActions)); } diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp index 0103a06ff95102..d11307d04cee93 100644 --- a/clang-tools-extra/clangd/Protocol.cpp +++ b/clang-tools-extra/clangd/Protocol.cpp @@ -599,7 +599,10 @@ llvm::json::Value toJSON(const PublishDiagnosticsParams &PDP) { bool fromJSON(const llvm::json::Value &Params, CodeActionContext &R, llvm::json::Path P) { llvm::json::ObjectMapper O(Params, P); - return O && O.map("diagnostics", R.diagnostics); + if (!O || !O.map("diagnostics", R.diagnostics)) + return false; + O.map("only", R.only); + return true; } llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Diagnostic &D) { diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h index 165a4a89e1cd0d..f846accbdde7c7 100644 --- a/clang-tools-extra/clangd/Protocol.h +++ b/clang-tools-extra/clangd/Protocol.h @@ -863,8 +863,19 @@ struct PublishDiagnosticsParams { llvm::json::Value toJSON(const PublishDiagnosticsParams &); struct CodeActionContext { - /// An array of diagnostics. + /// An array of diagnostics known on the client side overlapping the range + /// provided to the `textDocument/codeAction` request. They are provided so + /// that the server knows which errors are currently presented to the user for + /// the given range. There is no guarantee that these accurately reflect the + /// error state of the resource. The primary parameter to compute code actions + /// is the provided range. std::vector diagnostics; + + /// Requested kind of actions to return. + /// + /// Actions not of this kind are filtered out by the client before being + /// shown. So servers can omit computing them. + std::vector only; }; bool fromJSON(const llvm::json::Value &, CodeActionContext &, llvm::json::Path); diff --git a/clang-tools-extra/clangd/test/code-action-request.test b/clang-tools-extra/clangd/test/code-action-request.test index 78e90ce6c42395..f16f77989b473a 100644 --- a/clang-tools-extra/clangd/test/code-action-request.test +++ b/clang-tools-extra/clangd/test/code-action-request.test @@ -51,6 +51,47 @@ # CHECK-NEXT: } # CHECK-NEXT: ] --- +{ + "jsonrpc": "2.0", + "id": 2, + "method": "textDocument/codeAction", + "params": { + "textDocument": { "uri": "test:///main.cpp" }, + "range": { + "start": {"line": 0, "character": 0}, + "end": {"line": 0, "character": 4} + }, + "context": { + "diagnostics": [], + "only": ["quickfix"] + } + } +} +# CHECK: "id": 2, +# CHECK-NEXT: "jsonrpc": "2.0", +# CHECK-NEXT: "result": [] +--- +{ + "jsonrpc": "2.0", + "id": 3, + "method": "textDocument/codeAction", + "params": { + "textDocument": { "uri": "test:///main.cpp" }, + "range": { + "start": {"line": 0, "character": 0}, + "end": {"line": 0, "character": 4} + }, + "context": { + "diagnostics": [], + "only": ["refactor"] + } + } +} +# CHECK: "id": 3, +# CHECK-NEXT: "jsonrpc": "2.0", +# CHECK-NEXT: "result": [ +# CHECK-NEXT: { +--- {"jsonrpc":"2.0","id":4,"method":"workspace/executeCommand","params":{"command":"clangd.applyTweak","arguments":[{"file":"test:///main.cpp","selection":{"end":{"character":4,"line":0},"start":{"character":0,"line":0}},"tweakID":"ExpandAutoType"}]}} # CHECK: "newText": "int", # CHECK-NEXT: "range": { @@ -64,7 +105,7 @@ # CHECK-NEXT: } # CHECK-NEXT: } --- -{"jsonrpc":"2.0","id":4,"method":"shutdown"} +{"jsonrpc":"2.0","id":5,"method":"shutdown"} --- {"jsonrpc":"2.0","method":"exit"} --- From a5b2e795c3b26fae16d774a48694e7419ad652f1 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Thu, 29 Oct 2020 15:27:21 +0700 Subject: [PATCH 02/16] [NFC][SCEV] Refactor monotonic predicate checks to return enums instead of bools This patch gets rid of output parameter which is not needed for most users and prepares this API for further refactoring. --- llvm/include/llvm/Analysis/ScalarEvolution.h | 23 +++++--- llvm/lib/Analysis/ScalarEvolution.cpp | 57 ++++++++++--------- .../lib/Transforms/Scalar/LoopPredication.cpp | 3 +- llvm/lib/Transforms/Utils/LoopPeel.cpp | 4 +- 4 files changed, 46 insertions(+), 41 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 017efb994f57b1..37f4ad43639f95 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -939,17 +939,23 @@ class ScalarEvolution { bool isKnownOnEveryIteration(ICmpInst::Predicate Pred, const SCEVAddRecExpr *LHS, const SCEV *RHS); - /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X" - /// is monotonically increasing or decreasing. In the former case set - /// `Increasing` to true and in the latter case set `Increasing` to false. - /// /// A predicate is said to be monotonically increasing if may go from being /// false to being true as the loop iterates, but never the other way /// around. A predicate is said to be monotonically decreasing if may go /// from being true to being false as the loop iterates, but never the other /// way around. - bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, - bool &Increasing); + enum MonotonicPredicateType { + MonotonicallyIncreasing, + MonotonicallyDecreasing + }; + + /// If, for all loop invariant X, the predicate "LHS `Pred` X" is + /// monotonically increasing or decreasing, returns + /// Some(MonotonicallyIncreasing) and Some(MonotonicallyDecreasing) + /// respectively. If we could not prove either of these facts, returns None. + Optional + getMonotonicPredicateType(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred); /// Return true if the result of the predicate LHS `Pred` RHS is loop /// invariant with respect to L. Set InvariantPred, InvariantLHS and @@ -1881,8 +1887,9 @@ class ScalarEvolution { /// Try to prove NSW or NUW on \p AR relying on ConstantRange manipulation. SCEV::NoWrapFlags proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR); - bool isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, - ICmpInst::Predicate Pred, bool &Increasing); + Optional + getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred); /// Return SCEV no-wrap flags that can be proven based on reasoning about /// how poison produced from no-wrap flags on this value (e.g. a nuw add) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index bca8e28849e766..8bc2595d52612e 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -9236,31 +9236,30 @@ bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred, isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS); } -bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS, - ICmpInst::Predicate Pred, - bool &Increasing) { - bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing); +Optional +ScalarEvolution::getMonotonicPredicateType(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred) { + auto Result = getMonotonicPredicateTypeImpl(LHS, Pred); #ifndef NDEBUG // Verify an invariant: inverting the predicate should turn a monotonically // increasing change to a monotonically decreasing one, and vice versa. - bool IncreasingSwapped; - bool ResultSwapped = isMonotonicPredicateImpl( - LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped); + if (Result) { + auto ResultSwapped = + getMonotonicPredicateTypeImpl(LHS, ICmpInst::getSwappedPredicate(Pred)); - assert(Result == ResultSwapped && "should be able to analyze both!"); - if (ResultSwapped) - assert(Increasing == !IncreasingSwapped && + assert(ResultSwapped.hasValue() && "should be able to analyze both!"); + assert(ResultSwapped.getValue() != Result.getValue() && "monotonicity should flip as we flip the predicate"); + } #endif return Result; } -bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, - ICmpInst::Predicate Pred, - bool &Increasing) { - +Optional +ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred) { // A zero step value for LHS means the induction variable is essentially a // loop invariant value. We don't really depend on the predicate actually // flipping from false to true (for increasing predicates, and the other way @@ -9273,38 +9272,41 @@ bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, switch (Pred) { default: - return false; // Conservative answer + return None; // Conservative answer case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: if (!LHS->hasNoUnsignedWrap()) - return false; + return None; - Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE; - return true; + return Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE + ? MonotonicallyIncreasing + : MonotonicallyDecreasing; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: { if (!LHS->hasNoSignedWrap()) - return false; + return None; const SCEV *Step = LHS->getStepRecurrence(*this); if (isKnownNonNegative(Step)) { - Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE; - return true; + return Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE + ? MonotonicallyIncreasing + : MonotonicallyDecreasing; } if (isKnownNonPositive(Step)) { - Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE; - return true; + return Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE + ? MonotonicallyIncreasing + : MonotonicallyDecreasing; } - return false; + return None; } } @@ -9330,10 +9332,9 @@ bool ScalarEvolution::isLoopInvariantPredicate( if (!ArLHS || ArLHS->getLoop() != L) return false; - bool Increasing; - if (!isMonotonicPredicate(ArLHS, Pred, Increasing)) + auto MonotonicType = getMonotonicPredicateType(ArLHS, Pred); + if (!MonotonicType) return false; - // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to // true as the loop iterates, and the backedge is control dependent on // "ArLHS `Pred` RHS" == true then we can reason as follows: @@ -9351,7 +9352,7 @@ bool ScalarEvolution::isLoopInvariantPredicate( // // A similar reasoning applies for a monotonically decreasing predicate, by // replacing true with false and false with true in the above two bullets. - + bool Increasing = *MonotonicType == ScalarEvolution::MonotonicallyIncreasing; auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred); if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS)) diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index 27df56f6ea96c0..3ca5b985c365ee 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -454,8 +454,7 @@ static bool isSafeToTruncateWideIVType(const DataLayout &DL, // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the // IV wraps around, and the truncation of the IV would lose the range of // iterations between 2^32 and 2^64. - bool Increasing; - if (!SE.isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing)) + if (!SE.getMonotonicPredicateType(LatchCheck.IV, LatchCheck.Pred)) return false; // The active bits should be less than the bits in the RangeCheckType. This // guarantees that truncating the latch check to RangeCheckType is a safe diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index a08b5781ac3e0f..27a61a2078684a 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -227,11 +227,9 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, // consider AddRecs of the loop we are trying to peel. if (!LeftAR->isAffine() || LeftAR->getLoop() != &L) continue; - bool Increasing; if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) && - !SE.isMonotonicPredicate(LeftAR, Pred, Increasing)) + !SE.getMonotonicPredicateType(LeftAR, Pred)) continue; - (void)Increasing; // Check if extending the current DesiredPeelCount lets us evaluate Pred // or !Pred in the loop body statically. From 1d773a4ff05d0dcfab112719b82b2bd5d0c93ff5 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 27 Oct 2020 10:58:34 +0100 Subject: [PATCH 03/16] [CMake] Support inter-proto dependencies in generate_protos. Differential Revision: https://reviews.llvm.org/D90215 --- .../clangd/index/remote/CMakeLists.txt | 11 +++------ llvm/cmake/modules/FindGRPC.cmake | 23 ++++++++++++++++++- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/clang-tools-extra/clangd/index/remote/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/CMakeLists.txt index 554288df0bcbb6..a07dd994b5a377 100644 --- a/clang-tools-extra/clangd/index/remote/CMakeLists.txt +++ b/clang-tools-extra/clangd/index/remote/CMakeLists.txt @@ -1,13 +1,8 @@ if (CLANGD_ENABLE_REMOTE) - generate_protos(RemoteIndexServiceProto "Service.proto" GRPC) generate_protos(RemoteIndexProto "Index.proto") - # Ensure dependency headers are generated before dependent protos are built. - # FIXME: this should be encapsulated in generate_protos. - # FIXME: CMake docs say OBJECT_DEPENDS isn't needed, but I can't get the - # recommended add_dependencies() approach to work. - set_source_files_properties( - ${CMAKE_CURRENT_BINARY_DIR}/Service.pb.cc - PROPERTIES OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Index.pb.h) + generate_protos(RemoteIndexServiceProto "Service.proto" + DEPENDS "Index.proto" + GRPC) include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../) diff --git a/llvm/cmake/modules/FindGRPC.cmake b/llvm/cmake/modules/FindGRPC.cmake index 9e837a80661c20..f2c9bee38c93b1 100644 --- a/llvm/cmake/modules/FindGRPC.cmake +++ b/llvm/cmake/modules/FindGRPC.cmake @@ -84,8 +84,10 @@ endif() # Proto headers are generated in ${CMAKE_CURRENT_BINARY_DIR}. # Libraries that use these headers should adjust the include path. # If the "GRPC" argument is given, services are also generated. +# The DEPENDS list should name *.proto source files that are imported. +# They may be relative to the source dir or absolute (for generated protos). function(generate_protos LibraryName ProtoFile) - cmake_parse_arguments(PARSE_ARGV 2 PROTO "GRPC" "" "") + cmake_parse_arguments(PARSE_ARGV 2 PROTO "GRPC" "" "DEPENDS") get_filename_component(ProtoSourceAbsolutePath "${CMAKE_CURRENT_SOURCE_DIR}/${ProtoFile}" ABSOLUTE) get_filename_component(ProtoSourcePath ${ProtoSourceAbsolutePath} PATH) get_filename_component(Basename ${ProtoSourceAbsolutePath} NAME_WLE) @@ -111,4 +113,23 @@ function(generate_protos LibraryName ProtoFile) add_clang_library(${LibraryName} ${GeneratedProtoSource} PARTIAL_SOURCES_INTENDED LINK_LIBS grpc++ protobuf) + + # Ensure dependency headers are generated before dependent protos are built. + # DEPENDS arg is a list of "Foo.proto". While they're logically relative to + # the source dir, the generated headers we need are in the binary dir. + foreach(ImportedProto IN LISTS PROTO_DEPENDS) + # Foo.proto -> Foo.pb.h + STRING(REGEX REPLACE "\\.proto$" ".pb.h" ImportedHeader "${ImportedProto}") + # Foo.pb.h -> ${CMAKE_CURRENT_BINARY_DIR}/Foo.pb.h + get_filename_component(ImportedHeader "${ImportedHeader}" + ABSOLUTE + BASE_DIR "${CMAKE_CURRENT_BINARY_DIR}") + # Compilation of each generated source depends on ${BINARY}/Foo.pb.h. + foreach(Generated IN LISTS GeneratedProtoSource) + # FIXME: CMake docs suggest OBJECT_DEPENDS isn't needed, but I can't get + # the recommended add_dependencies() approach to work. + set_source_files_properties("${Generated}" + PROPERTIES OBJECT_DEPENDS "${ImportedHeader}") + endforeach(Generated) + endforeach(ImportedProto) endfunction() From a4b6b1e1c83fdfc5954e0fb631c2e6237236589e Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 29 Oct 2020 09:13:23 +0000 Subject: [PATCH 04/16] [InterleaveAccess] Recognise Interleave loads through binary operations Instcombine will currently sink identical shuffles though vector binary operations. This is probably generally useful, but can break up the code pattern we use to represent an interleaving load group. This patch reverses that in the InterleaveAccessPass to re-recognise the pattern of shuffles sunk past binary operations and folds them back if an interleave group can be created. Differential Revision: https://reviews.llvm.org/D89489 --- llvm/lib/CodeGen/InterleavedAccessPass.cpp | 99 +++++++++--- llvm/test/CodeGen/AArch64/vldn_shuffle.ll | 84 +++------- llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll | 147 ++++++------------ .../AArch64/binopshuffles.ll | 101 +++++++----- 4 files changed, 209 insertions(+), 222 deletions(-) diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index c4d83547a06c63..73771609a79235 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -66,6 +66,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/Local.h" #include #include @@ -118,6 +119,14 @@ class InterleavedAccess : public FunctionPass { /// replacements are also performed. bool tryReplaceExtracts(ArrayRef Extracts, ArrayRef Shuffles); + + /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them + /// to binop(shuffle(x), shuffle(y)) to allow the formation of an + /// interleaving load. Any newly created shuffles that operate on \p LI will + /// be added to \p Shuffles. + bool tryReplaceBinOpShuffles(ArrayRef BinOpShuffles, + SmallVectorImpl &Shuffles, + LoadInst *LI); }; } // end anonymous namespace. @@ -283,61 +292,85 @@ bool InterleavedAccess::lowerInterleavedLoad( if (!LI->isSimple() || isa(LI->getType())) return false; + // Check if all users of this load are shufflevectors. If we encounter any + // users that are extractelement instructions or binary operators, we save + // them to later check if they can be modified to extract from one of the + // shufflevectors instead of the load. + SmallVector Shuffles; SmallVector Extracts; + // BinOpShuffles need to be handled a single time in case both operands of the + // binop are the same load. + SmallSetVector BinOpShuffles; - // Check if all users of this load are shufflevectors. If we encounter any - // users that are extractelement instructions, we save them to later check if - // they can be modifed to extract from one of the shufflevectors instead of - // the load. - for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) { - auto *Extract = dyn_cast(*UI); + for (auto *User : LI->users()) { + auto *Extract = dyn_cast(User); if (Extract && isa(Extract->getIndexOperand())) { Extracts.push_back(Extract); continue; } - ShuffleVectorInst *SVI = dyn_cast(*UI); + auto *BI = dyn_cast(User); + if (BI && BI->hasOneUse()) { + if (auto *SVI = dyn_cast(*BI->user_begin())) { + BinOpShuffles.insert(SVI); + continue; + } + } + auto *SVI = dyn_cast(User); if (!SVI || !isa(SVI->getOperand(1))) return false; Shuffles.push_back(SVI); } - if (Shuffles.empty()) + if (Shuffles.empty() && BinOpShuffles.empty()) return false; unsigned Factor, Index; unsigned NumLoadElements = cast(LI->getType())->getNumElements(); + auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0]; // Check if the first shufflevector is DE-interleave shuffle. - if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index, - MaxFactor, NumLoadElements)) + if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor, + NumLoadElements)) return false; // Holds the corresponding index for each DE-interleave shuffle. SmallVector Indices; - Indices.push_back(Index); - Type *VecTy = Shuffles[0]->getType(); + Type *VecTy = FirstSVI->getType(); // Check if other shufflevectors are also DE-interleaved of the same type // and factor as the first shufflevector. - for (unsigned i = 1; i < Shuffles.size(); i++) { - if (Shuffles[i]->getType() != VecTy) + for (auto *Shuffle : Shuffles) { + if (Shuffle->getType() != VecTy) return false; - - if (!isDeInterleaveMaskOfFactor(Shuffles[i]->getShuffleMask(), Factor, + if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor, Index)) return false; Indices.push_back(Index); } + for (auto *Shuffle : BinOpShuffles) { + if (Shuffle->getType() != VecTy) + return false; + if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor, + Index)) + return false; + + if (cast(Shuffle->getOperand(0))->getOperand(0) == LI) + Indices.push_back(Index); + if (cast(Shuffle->getOperand(0))->getOperand(1) == LI) + Indices.push_back(Index); + } // Try and modify users of the load that are extractelement instructions to // use the shufflevector instructions instead of the load. if (!tryReplaceExtracts(Extracts, Shuffles)) return false; + if (!tryReplaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI)) + return false; LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n"); @@ -352,6 +385,34 @@ bool InterleavedAccess::lowerInterleavedLoad( return true; } +bool InterleavedAccess::tryReplaceBinOpShuffles( + ArrayRef BinOpShuffles, + SmallVectorImpl &Shuffles, LoadInst *LI) { + for (auto *SVI : BinOpShuffles) { + BinaryOperator *BI = cast(SVI->getOperand(0)); + ArrayRef Mask = SVI->getShuffleMask(); + + auto *NewSVI1 = new ShuffleVectorInst( + BI->getOperand(0), UndefValue::get(BI->getOperand(0)->getType()), Mask, + SVI->getName(), SVI); + auto *NewSVI2 = new ShuffleVectorInst( + BI->getOperand(1), UndefValue::get(BI->getOperand(1)->getType()), Mask, + SVI->getName(), SVI); + Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2, + BI->getName(), SVI); + SVI->replaceAllUsesWith(NewBI); + LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI + << "\n With : " << *NewSVI1 << "\n And : " + << *NewSVI2 << "\n And : " << *NewBI << "\n"); + RecursivelyDeleteTriviallyDeadInstructions(SVI); + if (NewSVI1->getOperand(0) == LI) + Shuffles.push_back(NewSVI1); + if (NewSVI2->getOperand(0) == LI) + Shuffles.push_back(NewSVI2); + } + return true; +} + bool InterleavedAccess::tryReplaceExtracts( ArrayRef Extracts, ArrayRef Shuffles) { @@ -421,7 +482,7 @@ bool InterleavedAccess::lowerInterleavedStore( if (!SI->isSimple()) return false; - ShuffleVectorInst *SVI = dyn_cast(SI->getValueOperand()); + auto *SVI = dyn_cast(SI->getValueOperand()); if (!SVI || !SVI->hasOneUse() || isa(SVI->getType())) return false; @@ -461,10 +522,10 @@ bool InterleavedAccess::runOnFunction(Function &F) { bool Changed = false; for (auto &I : instructions(F)) { - if (LoadInst *LI = dyn_cast(&I)) + if (auto *LI = dyn_cast(&I)) Changed |= lowerInterleavedLoad(LI, DeadInsts); - if (StoreInst *SI = dyn_cast(&I)) + if (auto *SI = dyn_cast(&I)) Changed |= lowerInterleavedStore(SI, DeadInsts); } diff --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll index 4c501e5403f971..99100a2ab4c829 100644 --- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll +++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll @@ -7,13 +7,10 @@ define void @vld2(float* nocapture readonly %pSrc, float* noalias nocapture %pDs ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: .LBB0_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp q1, q0, [x0], #32 -; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s -; CHECK-NEXT: fmul v1.4s, v1.4s, v1.4s -; CHECK-NEXT: uzp1 v2.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp2 v0.4s, v1.4s, v0.4s -; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s -; CHECK-NEXT: str q0, [x1, x8] +; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x0], #32 +; CHECK-NEXT: fmul v2.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v2.4s, v1.4s, v1.4s +; CHECK-NEXT: str q2, [x1, x8] ; CHECK-NEXT: add x8, x8, #16 // =16 ; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096 ; CHECK-NEXT: b.ne .LBB0_1 @@ -50,27 +47,11 @@ define void @vld3(float* nocapture readonly %pSrc, float* noalias nocapture %pDs ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: .LBB1_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp q2, q0, [x0, #16] -; CHECK-NEXT: ldr q1, [x0], #48 -; CHECK-NEXT: fmul v2.4s, v2.4s, v2.4s -; CHECK-NEXT: fmul v1.4s, v1.4s, v1.4s -; CHECK-NEXT: ext v3.16b, v2.16b, v1.16b, #8 -; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s -; CHECK-NEXT: ext v5.16b, v1.16b, v3.16b, #12 -; CHECK-NEXT: ext v3.16b, v3.16b, v2.16b, #4 -; CHECK-NEXT: dup v4.4s, v0.s[1] -; CHECK-NEXT: mov v2.s[0], v1.s[2] -; CHECK-NEXT: dup v1.4s, v0.s[2] -; CHECK-NEXT: mov v0.s[2], v0.s[0] -; CHECK-NEXT: ext v5.16b, v5.16b, v5.16b, #12 -; CHECK-NEXT: ext v3.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: ext v0.16b, v0.16b, v2.16b, #8 -; CHECK-NEXT: mov v5.s[3], v4.s[3] -; CHECK-NEXT: mov v3.s[3], v1.s[3] -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fadd v1.4s, v3.4s, v5.4s -; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s -; CHECK-NEXT: str q0, [x1, x8] +; CHECK-NEXT: ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48 +; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v3.4s, v1.4s, v1.4s +; CHECK-NEXT: fmla v3.4s, v2.4s, v2.4s +; CHECK-NEXT: str q3, [x1, x8] ; CHECK-NEXT: add x8, x8, #16 // =16 ; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096 ; CHECK-NEXT: b.ne .LBB1_1 @@ -110,37 +91,15 @@ define void @vld4(float* nocapture readonly %pSrc, float* noalias nocapture %pDs ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: .LBB2_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp q2, q3, [x0, #32] -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0], #64 ; CHECK-NEXT: add x9, x1, x8 ; CHECK-NEXT: add x8, x8, #32 // =32 -; CHECK-NEXT: fmul v3.4s, v3.4s, v3.4s -; CHECK-NEXT: fmul v2.4s, v2.4s, v2.4s -; CHECK-NEXT: fmul v1.4s, v1.4s, v1.4s -; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s -; CHECK-NEXT: zip1 v5.4s, v2.4s, v3.4s -; CHECK-NEXT: trn2 v7.4s, v2.4s, v3.4s -; CHECK-NEXT: zip1 v4.4s, v0.4s, v1.4s -; CHECK-NEXT: trn2 v6.4s, v0.4s, v1.4s -; CHECK-NEXT: ext v5.16b, v2.16b, v5.16b, #8 -; CHECK-NEXT: ext v7.16b, v2.16b, v7.16b, #8 -; CHECK-NEXT: zip2 v1.4s, v0.4s, v1.4s -; CHECK-NEXT: ext v4.16b, v5.16b, v4.16b, #8 -; CHECK-NEXT: zip2 v5.4s, v2.4s, v3.4s -; CHECK-NEXT: ext v0.16b, v6.16b, v0.16b, #8 -; CHECK-NEXT: ext v6.16b, v7.16b, v6.16b, #8 -; CHECK-NEXT: mov v2.s[3], v3.s[2] -; CHECK-NEXT: ext v0.16b, v5.16b, v0.16b, #8 -; CHECK-NEXT: ext v3.16b, v4.16b, v4.16b, #8 -; CHECK-NEXT: ext v4.16b, v6.16b, v6.16b, #8 -; CHECK-NEXT: ext v1.16b, v2.16b, v1.16b, #8 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fadd v2.4s, v4.4s, v3.4s -; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 -; CHECK-NEXT: fadd v3.4s, v0.4s, v1.4s -; CHECK-NEXT: add x0, x0, #64 // =64 -; CHECK-NEXT: st2 { v2.4s, v3.4s }, [x9] +; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v4.4s, v1.4s, v1.4s +; CHECK-NEXT: fmul v5.4s, v2.4s, v2.4s +; CHECK-NEXT: fmla v5.4s, v3.4s, v3.4s +; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x9] ; CHECK-NEXT: b.ne .LBB2_1 ; CHECK-NEXT: // %bb.2: // %while.end ; CHECK-NEXT: ret @@ -184,16 +143,13 @@ define void @twosrc(float* nocapture readonly %pSrc, float* nocapture readonly % ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8 ; CHECK-NEXT: add x10, x1, x8 -; CHECK-NEXT: ldp q0, q1, [x9] -; CHECK-NEXT: ldp q3, q2, [x10] +; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x9] +; CHECK-NEXT: ld2 { v2.4s, v3.4s }, [x10] ; CHECK-NEXT: add x8, x8, #32 // =32 ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 -; CHECK-NEXT: fmul v1.4s, v2.4s, v1.4s -; CHECK-NEXT: fmul v0.4s, v3.4s, v0.4s -; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s -; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s -; CHECK-NEXT: str q0, [x2], #16 +; CHECK-NEXT: fmul v4.4s, v2.4s, v0.4s +; CHECK-NEXT: fmla v4.4s, v1.4s, v3.4s +; CHECK-NEXT: str q4, [x2], #16 ; CHECK-NEXT: b.ne .LBB3_1 ; CHECK-NEXT: // %bb.2: // %while.end ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll b/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll index 7c4fef3c71c581..c04243ee575453 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll @@ -4,94 +4,49 @@ define void @arm_cmplx_mag_squared_f16(half* nocapture readonly %pSrc, half* nocapture %pDst, i32 %numSamples) { ; CHECK-LABEL: arm_cmplx_mag_squared_f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: beq.w .LBB0_9 +; CHECK-NEXT: beq .LBB0_8 ; CHECK-NEXT: @ %bb.1: @ %while.body.preheader ; CHECK-NEXT: cmp r2, #8 -; CHECK-NEXT: blo.w .LBB0_6 +; CHECK-NEXT: blo .LBB0_9 ; CHECK-NEXT: @ %bb.2: @ %vector.memcheck ; CHECK-NEXT: add.w r3, r0, r2, lsl #2 ; CHECK-NEXT: cmp r3, r1 ; CHECK-NEXT: itt hi ; CHECK-NEXT: addhi.w r3, r1, r2, lsl #1 ; CHECK-NEXT: cmphi r3, r0 -; CHECK-NEXT: bhi .LBB0_6 +; CHECK-NEXT: bhi .LBB0_9 ; CHECK-NEXT: @ %bb.3: @ %vector.ph -; CHECK-NEXT: bic r5, r2, #7 -; CHECK-NEXT: movs r4, #1 -; CHECK-NEXT: sub.w r3, r5, #8 -; CHECK-NEXT: and r8, r2, #7 -; CHECK-NEXT: add.w r12, r1, r5, lsl #1 -; CHECK-NEXT: add.w r3, r4, r3, lsr #3 -; CHECK-NEXT: mov r7, r3 -; CHECK-NEXT: add.w r3, r0, r5, lsl #2 +; CHECK-NEXT: bic r4, r2, #7 +; CHECK-NEXT: movs r3, #1 +; CHECK-NEXT: sub.w r12, r4, #8 +; CHECK-NEXT: and r7, r2, #7 +; CHECK-NEXT: add.w r3, r3, r12, lsr #3 +; CHECK-NEXT: add.w r12, r1, r4, lsl #1 +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: add.w r3, r0, r4, lsl #2 ; CHECK-NEXT: .LBB0_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.u16 q0, [r0], #32 -; CHECK-NEXT: mov lr, r7 +; CHECK-NEXT: vld20.16 {q0, q1}, [r0] +; CHECK-NEXT: mov lr, r5 ; CHECK-NEXT: subs.w lr, lr, #1 -; CHECK-NEXT: vmul.f16 q1, q0, q0 -; CHECK-NEXT: mov r7, lr -; CHECK-NEXT: vmovx.f16 s0, s5 -; CHECK-NEXT: vmovx.f16 s8, s6 -; CHECK-NEXT: vmov r4, s0 -; CHECK-NEXT: vmovx.f16 s0, s4 -; CHECK-NEXT: vmov r6, s0 -; CHECK-NEXT: vmov.16 q0[0], r6 -; CHECK-NEXT: vmov r6, s4 -; CHECK-NEXT: vmov.16 q0[1], r4 -; CHECK-NEXT: vmov r4, s8 -; CHECK-NEXT: vmovx.f16 s8, s7 -; CHECK-NEXT: vmov.16 q0[2], r4 -; CHECK-NEXT: vmov r4, s8 -; CHECK-NEXT: vldrh.u16 q2, [r0, #-16] -; CHECK-NEXT: vmov.16 q0[3], r4 -; CHECK-NEXT: vmul.f16 q2, q2, q2 -; CHECK-NEXT: vmovx.f16 s12, s8 -; CHECK-NEXT: vmov r4, s12 -; CHECK-NEXT: vmovx.f16 s12, s9 -; CHECK-NEXT: vmov.16 q0[4], r4 -; CHECK-NEXT: vmov r4, s12 -; CHECK-NEXT: vmovx.f16 s12, s10 -; CHECK-NEXT: vmov.16 q0[5], r4 -; CHECK-NEXT: vmov r4, s12 -; CHECK-NEXT: vmov.16 q3[0], r6 -; CHECK-NEXT: vmov.16 q0[6], r4 -; CHECK-NEXT: vmov r4, s5 -; CHECK-NEXT: vmov.16 q3[1], r4 -; CHECK-NEXT: vmov r4, s6 -; CHECK-NEXT: vmov.16 q3[2], r4 -; CHECK-NEXT: vmov r4, s7 -; CHECK-NEXT: vmov.16 q3[3], r4 -; CHECK-NEXT: vmov r4, s8 -; CHECK-NEXT: vmov.16 q3[4], r4 -; CHECK-NEXT: vmov r4, s9 -; CHECK-NEXT: vmov.16 q3[5], r4 -; CHECK-NEXT: vmov r4, s10 -; CHECK-NEXT: vmov.16 q3[6], r4 -; CHECK-NEXT: vmov r4, s11 -; CHECK-NEXT: vmovx.f16 s4, s11 -; CHECK-NEXT: vmov.16 q3[7], r4 -; CHECK-NEXT: vmov r4, s4 -; CHECK-NEXT: vmov.16 q0[7], r4 -; CHECK-NEXT: vadd.f16 q0, q0, q3 -; CHECK-NEXT: vstrb.8 q0, [r1], #16 +; CHECK-NEXT: vld21.16 {q0, q1}, [r0]! +; CHECK-NEXT: mov r5, lr +; CHECK-NEXT: vmul.f16 q2, q0, q0 +; CHECK-NEXT: vfma.f16 q2, q1, q1 +; CHECK-NEXT: vstrb.8 q2, [r1], #16 ; CHECK-NEXT: bne .LBB0_4 ; CHECK-NEXT: b .LBB0_5 ; CHECK-NEXT: .LBB0_5: @ %middle.block -; CHECK-NEXT: cmp r5, r2 -; CHECK-NEXT: mov lr, r8 -; CHECK-NEXT: bne .LBB0_7 -; CHECK-NEXT: b .LBB0_9 -; CHECK-NEXT: .LBB0_6: -; CHECK-NEXT: mov r3, r0 -; CHECK-NEXT: mov r12, r1 -; CHECK-NEXT: mov lr, r2 -; CHECK-NEXT: .LBB0_7: @ %while.body.preheader26 +; CHECK-NEXT: cmp r4, r2 +; CHECK-NEXT: mov lr, r7 +; CHECK-NEXT: it eq +; CHECK-NEXT: popeq {r4, r5, r7, pc} +; CHECK-NEXT: .LBB0_6: @ %while.body.preheader26 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB0_8: @ %while.body +; CHECK-NEXT: .LBB0_7: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldr.16 s0, [r3] ; CHECK-NEXT: vldr.16 s2, [r3, #2] @@ -100,9 +55,14 @@ define void @arm_cmplx_mag_squared_f16(half* nocapture readonly %pSrc, half* noc ; CHECK-NEXT: vfma.f16 s0, s2, s2 ; CHECK-NEXT: vstr.16 s0, [r12] ; CHECK-NEXT: add.w r12, r12, #2 -; CHECK-NEXT: le lr, .LBB0_8 -; CHECK-NEXT: .LBB0_9: @ %while.end -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: le lr, .LBB0_7 +; CHECK-NEXT: .LBB0_8: @ %while.end +; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: .LBB0_9: +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: mov r12, r1 +; CHECK-NEXT: mov lr, r2 +; CHECK-NEXT: b .LBB0_6 entry: %cmp.not11 = icmp eq i32 %numSamples, 0 br i1 %cmp.not11, label %while.end, label %while.body.preheader @@ -195,37 +155,28 @@ define void @arm_cmplx_mag_squared_f32(float* nocapture readonly %pSrc, float* n ; CHECK-NEXT: cmphi r3, r0 ; CHECK-NEXT: bhi .LBB1_9 ; CHECK-NEXT: @ %bb.3: @ %vector.ph -; CHECK-NEXT: bic r5, r2, #3 -; CHECK-NEXT: movs r4, #1 -; CHECK-NEXT: subs r3, r5, #4 +; CHECK-NEXT: bic r4, r2, #3 +; CHECK-NEXT: movs r3, #1 +; CHECK-NEXT: sub.w r12, r4, #4 ; CHECK-NEXT: and r7, r2, #3 -; CHECK-NEXT: add.w r12, r1, r5, lsl #2 -; CHECK-NEXT: add.w r3, r4, r3, lsr #2 -; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: add.w r3, r0, r5, lsl #3 +; CHECK-NEXT: add.w r3, r3, r12, lsr #2 +; CHECK-NEXT: add.w r12, r1, r4, lsl #2 +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: add.w r3, r0, r4, lsl #3 ; CHECK-NEXT: .LBB1_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r0, #16] -; CHECK-NEXT: vldrw.u32 q1, [r0], #32 -; CHECK-NEXT: mov lr, r4 -; CHECK-NEXT: vmul.f32 q1, q1, q1 -; CHECK-NEXT: vmul.f32 q0, q0, q0 -; CHECK-NEXT: vmov.f64 d4, d2 +; CHECK-NEXT: vld20.32 {q0, q1}, [r0] +; CHECK-NEXT: mov lr, r5 ; CHECK-NEXT: subs.w lr, lr, #1 -; CHECK-NEXT: mov r4, lr -; CHECK-NEXT: vmov.f32 s12, s5 -; CHECK-NEXT: vmov.f32 s9, s6 -; CHECK-NEXT: vmov.f32 s13, s7 -; CHECK-NEXT: vmov.f32 s10, s0 -; CHECK-NEXT: vmov.f32 s14, s1 -; CHECK-NEXT: vmov.f32 s11, s2 -; CHECK-NEXT: vmov.f32 s15, s3 -; CHECK-NEXT: vadd.f32 q0, q3, q2 -; CHECK-NEXT: vstrb.8 q0, [r1], #16 +; CHECK-NEXT: vld21.32 {q0, q1}, [r0]! +; CHECK-NEXT: mov r5, lr +; CHECK-NEXT: vmul.f32 q2, q0, q0 +; CHECK-NEXT: vfma.f32 q2, q1, q1 +; CHECK-NEXT: vstrb.8 q2, [r1], #16 ; CHECK-NEXT: bne .LBB1_4 ; CHECK-NEXT: b .LBB1_5 ; CHECK-NEXT: .LBB1_5: @ %middle.block -; CHECK-NEXT: cmp r5, r2 +; CHECK-NEXT: cmp r4, r2 ; CHECK-NEXT: mov lr, r7 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r7, pc} diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll index a1e1b4dbe748c3..47114092bfb1a7 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll @@ -7,12 +7,15 @@ target triple = "aarch64--linux-gnu" define <4 x float> @vld2(<8 x float>* %pSrc) { ; CHECK-LABEL: @vld2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, <8 x float>* [[PSRC:%.*]], align 4 -; CHECK-NEXT: [[L2:%.*]] = fmul fast <8 x float> [[WIDE_VEC]], [[WIDE_VEC]] -; CHECK-NEXT: [[L3:%.*]] = shufflevector <8 x float> [[L2]], <8 x float> undef, <4 x i32> -; CHECK-NEXT: [[L4:%.*]] = fmul fast <8 x float> [[WIDE_VEC]], [[WIDE_VEC]] -; CHECK-NEXT: [[L5:%.*]] = shufflevector <8 x float> [[L4]], <8 x float> undef, <4 x i32> -; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L5]], [[L3]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x float>* [[PSRC:%.*]] to <4 x float>* +; CHECK-NEXT: [[LDN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 +; CHECK-NEXT: [[L26:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L43]], [[L26]] ; CHECK-NEXT: ret <4 x float> [[L6]] ; entry: @@ -28,15 +31,19 @@ entry: define <4 x float> @vld3(<12 x float>* %pSrc) { ; CHECK-LABEL: @vld3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x float>, <12 x float>* [[PSRC:%.*]], align 4 -; CHECK-NEXT: [[L2:%.*]] = fmul fast <12 x float> [[WIDE_VEC]], [[WIDE_VEC]] -; CHECK-NEXT: [[L3:%.*]] = shufflevector <12 x float> [[L2]], <12 x float> undef, <4 x i32> -; CHECK-NEXT: [[L4:%.*]] = fmul fast <12 x float> [[WIDE_VEC]], [[WIDE_VEC]] -; CHECK-NEXT: [[L5:%.*]] = shufflevector <12 x float> [[L4]], <12 x float> undef, <4 x i32> -; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L5]], [[L3]] -; CHECK-NEXT: [[L7:%.*]] = fmul fast <12 x float> [[WIDE_VEC]], [[WIDE_VEC]] -; CHECK-NEXT: [[L8:%.*]] = shufflevector <12 x float> [[L7]], <12 x float> undef, <4 x i32> -; CHECK-NEXT: [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L8]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <12 x float>* [[PSRC:%.*]] to <4 x float>* +; CHECK-NEXT: [[LDN:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0v4f32(<4 x float>* [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 +; CHECK-NEXT: [[L29:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L46]], [[L29]] +; CHECK-NEXT: [[L73:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L73]] ; CHECK-NEXT: ret <4 x float> [[L9]] ; entry: @@ -55,17 +62,22 @@ entry: define <4 x float> @vld4(<16 x float>* %pSrc) { ; CHECK-LABEL: @vld4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, <16 x float>* [[PSRC:%.*]], align 4 -; CHECK-NEXT: [[L3:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[WIDE_VEC]] -; CHECK-NEXT: [[L4:%.*]] = shufflevector <16 x float> [[L3]], <16 x float> undef, <4 x i32> -; CHECK-NEXT: [[L5:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[WIDE_VEC]] -; CHECK-NEXT: [[L6:%.*]] = shufflevector <16 x float> [[L5]], <16 x float> undef, <4 x i32> -; CHECK-NEXT: [[L7:%.*]] = fadd fast <4 x float> [[L6]], [[L4]] -; CHECK-NEXT: [[L8:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[WIDE_VEC]] -; CHECK-NEXT: [[L9:%.*]] = shufflevector <16 x float> [[L8]], <16 x float> undef, <4 x i32> -; CHECK-NEXT: [[L10:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[WIDE_VEC]] -; CHECK-NEXT: [[L11:%.*]] = shufflevector <16 x float> [[L10]], <16 x float> undef, <4 x i32> -; CHECK-NEXT: [[L12:%.*]] = fadd fast <4 x float> [[L11]], [[L9]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x float>* [[PSRC:%.*]] to <4 x float>* +; CHECK-NEXT: [[LDN:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0v4f32(<4 x float>* [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 +; CHECK-NEXT: [[L312:%.*]] = fmul <4 x float> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[L59:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[L7:%.*]] = fadd fast <4 x float> [[L59]], [[L312]] +; CHECK-NEXT: [[L86:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L103:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L12:%.*]] = fadd fast <4 x float> [[L103]], [[L86]] ; CHECK-NEXT: ret <4 x float> [[L12]] ; entry: @@ -86,13 +98,17 @@ entry: define <4 x float> @twosrc(<8 x float>* %pSrc1, <8 x float>* %pSrc2) { ; CHECK-LABEL: @twosrc( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, <8 x float>* [[PSRC1:%.*]], align 4 -; CHECK-NEXT: [[WIDE_VEC26:%.*]] = load <8 x float>, <8 x float>* [[PSRC2:%.*]], align 4 -; CHECK-NEXT: [[L4:%.*]] = fmul fast <8 x float> [[WIDE_VEC26]], [[WIDE_VEC]] -; CHECK-NEXT: [[L5:%.*]] = shufflevector <8 x float> [[L4]], <8 x float> undef, <4 x i32> -; CHECK-NEXT: [[L6:%.*]] = fmul fast <8 x float> [[WIDE_VEC26]], [[WIDE_VEC]] -; CHECK-NEXT: [[L7:%.*]] = shufflevector <8 x float> [[L6]], <8 x float> undef, <4 x i32> -; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L7]], [[L5]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x float>* [[PSRC1:%.*]] to <4 x float>* +; CHECK-NEXT: [[LDN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x float>* [[PSRC2:%.*]] to <4 x float>* +; CHECK-NEXT: [[LDN7:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 1 +; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[L63:%.*]] = fmul <4 x float> [[TMP5]], [[TMP1]] +; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L63]], [[L46]] ; CHECK-NEXT: ret <4 x float> [[L8]] ; entry: @@ -109,14 +125,17 @@ entry: define <4 x float> @twosrc2(<8 x float>* %pSrc1, <8 x float>* %pSrc2) { ; CHECK-LABEL: @twosrc2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, <8 x float>* [[PSRC1:%.*]], align 4 -; CHECK-NEXT: [[WIDE_VEC26:%.*]] = load <8 x float>, <8 x float>* [[PSRC2:%.*]], align 4 -; CHECK-NEXT: [[L4:%.*]] = fmul fast <8 x float> [[WIDE_VEC26]], [[WIDE_VEC]] -; CHECK-NEXT: [[L5:%.*]] = shufflevector <8 x float> [[L4]], <8 x float> undef, <4 x i32> -; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[WIDE_VEC26]], <8 x float> undef, <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> undef, <4 x i32> -; CHECK-NEXT: [[L6:%.*]] = fmul fast <4 x float> [[S1]], [[S2]] -; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L5]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x float>* [[PSRC1:%.*]] to <4 x float>* +; CHECK-NEXT: [[LDN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x float>* [[PSRC2:%.*]] to <4 x float>* +; CHECK-NEXT: [[LDN4:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 1 +; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[L6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]] +; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L43]] ; CHECK-NEXT: ret <4 x float> [[L8]] ; entry: From fcf62879163ab28b4bd72cbc5715b49133f38427 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Wed, 28 Oct 2020 13:34:28 +0300 Subject: [PATCH 05/16] [yaml2obj] - Improve handling of SectionHeaderTable::NoHeaders flag. When `NoHeaders` is set, we still have following issues: 1) We emit the `.shstrtab` implicit section of size 1 (empty string table). 2) We still align the start of the section header table, what affects the output size. 3) We still write section header table bytes. This patch fixes all of these issues. Differential revision: https://reviews.llvm.org/D90295 --- llvm/lib/ObjectYAML/ELFEmitter.cpp | 44 +++++++++++-------- .../ELF/malformed-pt-dynamic.test | 16 +++---- .../tools/yaml2obj/ELF/section-headers.yaml | 34 ++++++++++++++ 3 files changed, 67 insertions(+), 27 deletions(-) diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index cbd76c7aad1b38..a1acd2a69da85d 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -232,7 +232,7 @@ template class ELFState { ArrayRef SHeaders); void finalizeStrings(); - void writeELFHeader(raw_ostream &OS, uint64_t SHOff); + void writeELFHeader(raw_ostream &OS, Optional SHOff); void writeSectionContent(Elf_Shdr &SHeader, const ELFYAML::NoBitsSection &Section, ContiguousBlobAccumulator &CBA); @@ -363,7 +363,9 @@ ELFState::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH) std::string SecName = ("." + DebugSecName).str(); ImplicitSections.push_back(StringRef(SecName).copy(StringAlloc)); } - ImplicitSections.insert(ImplicitSections.end(), {".strtab", ".shstrtab"}); + ImplicitSections.insert(ImplicitSections.end(), {".strtab"}); + if (!Doc.SectionHeaders || !Doc.SectionHeaders->NoHeaders.getValueOr(false)) + ImplicitSections.insert(ImplicitSections.end(), {".shstrtab"}); // Insert placeholders for implicit sections that are not // defined explicitly in YAML. @@ -379,7 +381,7 @@ ELFState::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH) } template -void ELFState::writeELFHeader(raw_ostream &OS, uint64_t SHOff) { +void ELFState::writeELFHeader(raw_ostream &OS, Optional SHOff) { using namespace llvm::ELF; Elf_Ehdr Header; @@ -429,22 +431,19 @@ void ELFState::writeELFHeader(raw_ostream &OS, uint64_t SHOff) { Header.e_shentsize = Doc.Header.EShEntSize ? (uint16_t)*Doc.Header.EShEntSize : sizeof(Elf_Shdr); - const bool NoShdrs = - Doc.SectionHeaders && Doc.SectionHeaders->NoHeaders.getValueOr(false); - if (Doc.Header.EShOff) Header.e_shoff = *Doc.Header.EShOff; - else if (NoShdrs) - Header.e_shoff = 0; + else if (SHOff) + Header.e_shoff = *SHOff; else - Header.e_shoff = SHOff; + Header.e_shoff = 0; if (Doc.Header.EShNum) Header.e_shnum = *Doc.Header.EShNum; else if (!Doc.SectionHeaders || (Doc.SectionHeaders->NoHeaders && !*Doc.SectionHeaders->NoHeaders)) Header.e_shnum = Doc.getSections().size(); - else if (NoShdrs) + else if (!SHOff) Header.e_shnum = 0; else Header.e_shnum = @@ -454,10 +453,10 @@ void ELFState::writeELFHeader(raw_ostream &OS, uint64_t SHOff) { if (Doc.Header.EShStrNdx) Header.e_shstrndx = *Doc.Header.EShStrNdx; - else if (NoShdrs || ExcludedSectionHeaders.count(".shstrtab")) - Header.e_shstrndx = 0; - else + else if (SHOff && !ExcludedSectionHeaders.count(".shstrtab")) Header.e_shstrndx = SN2I.get(".shstrtab"); + else + Header.e_shstrndx = 0; OS.write((const char *)&Header, sizeof(Header)); } @@ -1884,11 +1883,17 @@ bool ELFState::writeELF(raw_ostream &OS, ELFYAML::Object &Doc, // Now we can decide segment offsets. State.setProgramHeaderLayout(PHeaders, SHeaders); - // Align the start of the section header table, which is written after all - // section data. - uint64_t SHOff = - State.alignToOffset(CBA, sizeof(typename ELFT::uint), /*Offset=*/None); - bool ReachedLimit = SHOff + arrayDataSize(makeArrayRef(SHeaders)) > MaxSize; + // If needed, align the start of the section header table, which is written + // after all section data. + const bool HasSectionHeaders = + !Doc.SectionHeaders || !Doc.SectionHeaders->NoHeaders.getValueOr(false); + Optional SHOff; + if (HasSectionHeaders) + SHOff = State.alignToOffset(CBA, sizeof(typename ELFT::uint), + /*Offset=*/None); + bool ReachedLimit = SHOff.getValueOr(CBA.getOffset()) + + arrayDataSize(makeArrayRef(SHeaders)) > + MaxSize; if (Error E = CBA.takeLimitError()) { // We report a custom error message instead below. consumeError(std::move(E)); @@ -1906,7 +1911,8 @@ bool ELFState::writeELF(raw_ostream &OS, ELFYAML::Object &Doc, State.writeELFHeader(OS, SHOff); writeArrayData(OS, makeArrayRef(PHeaders)); CBA.writeBlobToStream(OS); - writeArrayData(OS, makeArrayRef(SHeaders)); + if (HasSectionHeaders) + writeArrayData(OS, makeArrayRef(SHeaders)); return true; } diff --git a/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test b/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test index 4c22b318432c25..68da95b29ce03b 100644 --- a/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test +++ b/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test @@ -22,13 +22,13 @@ # WARN1-GNU-NEXT: 0x0000000000000000 (NULL) 0x0 ## Case A.2: in this case we drop section headers. The dynamic table is not dumped. -# RUN: yaml2obj %s -DFILESIZE=0x119 -DNOHEADERS=true -o %t1.noheaders +# RUN: yaml2obj %s -DFILESIZE=0x12 -DNOHEADERS=true -o %t1.noheaders # RUN: llvm-readobj %t1.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t1.noheaders %s \ # RUN: --check-prefix=WARN1-NOHEADERS --implicit-check-not="DynamicSection [" # RUN: llvm-readelf %t1.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t1.noheaders %s \ # RUN: --check-prefix=WARN1-NOHEADERS --implicit-check-not="Dynamic section" -# WARN1-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0x119) exceeds the size of the file (0x1118) +# WARN1-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0x12) exceeds the size of the file (0x1011) ## Case B: Test case where the offset of the PT_DYNAMIC header is too large to be in the file. @@ -45,13 +45,13 @@ # WARN2: warning: '[[FILE]]': no valid dynamic table was found ## Case B.2: in this case we drop section headers. The dynamic table is not dumped. -# RUN: yaml2obj %s -DOFFSET=0x1119 -DNOHEADERS=true -o %t2.noheaders +# RUN: yaml2obj %s -DOFFSET=0x1112 -DNOHEADERS=true -o %t2.noheaders # RUN: llvm-readobj %t2.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t2.noheaders %s \ # RUN: --check-prefix=WARN2-NOHEADERS --implicit-check-not="DynamicSection [" # RUN: llvm-readelf %t2.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t2.noheaders %s \ # RUN: --check-prefix=WARN2-NOHEADERS --implicit-check-not="Dynamic section" -# WARN2-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1119) + file size (0x10) exceeds the size of the file (0x1118) +# WARN2-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1112) + file size (0x10) exceeds the size of the file (0x1011) ## Case C: test we report a warning when the offset + the file size of the PT_DYNAMIC is so large a ## value that it overflows the platform address size type. Check we also report a warning about @@ -73,7 +73,7 @@ # RUN: llvm-readelf %t3.noheaders --dynamic-table 2>&1 | \ # RUN: FileCheck -DFILE=%t3.noheaders %s --check-prefix=WARN3-NOHEADERS -# WARN3-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0xffffffffffffffff) + file size (0x10) exceeds the size of the file (0x1118) +# WARN3-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0xffffffffffffffff) + file size (0x10) exceeds the size of the file (0x1011) # RUN: yaml2obj %s -DFILESIZE=0xffffffffffffffff -o %t4 # RUN: llvm-readobj %t4 --dynamic-table 2>&1 | FileCheck -DFILE=%t4 %s --check-prefix=WARN4 @@ -87,7 +87,7 @@ # RUN: llvm-readelf %t4.noheaders --dynamic-table 2>&1 | \ # RUN: FileCheck -DFILE=%t4.noheaders %s --check-prefix=WARN4-NOHEADERS -# WARN4-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0xffffffffffffffff) exceeds the size of the file (0x1118) +# WARN4-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0xffffffffffffffff) exceeds the size of the file (0x1011) ## Case D: the same as "Case C", but for a 32-bit object. @@ -107,7 +107,7 @@ # RUN: llvm-readelf %t5.noheaders --dynamic-table 2>&1 | \ # RUN: FileCheck -DFILE=%t5.noheaders %s --check-prefix=WARN5-NOHEADERS -# WARN5-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0xffffffff) + file size (0x8) exceeds the size of the file (0x10ac) +# WARN5-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0xffffffff) + file size (0x8) exceeds the size of the file (0x1009) # RUN: yaml2obj %s -DBITS=32 -DFILESIZE=0xffffffff -o %t6 # RUN: llvm-readobj %t6 --dynamic-table 2>&1 | FileCheck -DFILE=%t6 %s --check-prefix=WARN6 @@ -121,7 +121,7 @@ # RUN: llvm-readelf %t6.noheaders --dynamic-table 2>&1 | \ # RUN: FileCheck -DFILE=%t6.noheaders %s --check-prefix=WARN6-NOHEADERS -# WARN6-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0xffffffff) exceeds the size of the file (0x10ac) +# WARN6-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0xffffffff) exceeds the size of the file (0x1009) --- !ELF FileHeader: diff --git a/llvm/test/tools/yaml2obj/ELF/section-headers.yaml b/llvm/test/tools/yaml2obj/ELF/section-headers.yaml index 87d6ebe57c3053..c90ffe38122877 100644 --- a/llvm/test/tools/yaml2obj/ELF/section-headers.yaml +++ b/llvm/test/tools/yaml2obj/ELF/section-headers.yaml @@ -191,6 +191,10 @@ FileHeader: Sections: - Name: .foo Type: SHT_PROGBITS +## FIXME: we have to set an arbitrary size to create a +## piece of dummy data to make llvm-readelf happy. +## See: https://bugs.llvm.org/show_bug.cgi?id=40804 + Size: 0x100 SectionHeaderTable: NoHeaders: true @@ -250,3 +254,33 @@ Symbols: Section: .foo - Name: bar Section: .bar + +## Check that when "NoHeaders" is set to "true" then we don't emit +## the .shstrtab section implicitly and don't write the data of the +## section header table to the file. + +# RUN: yaml2obj %s --docnum=8 -o %t8 +# RUN: wc -c < %t8 | FileCheck %s --check-prefix=SIZE + +# SIZE: 511{{$}} + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL +Sections: +## We don't want any implicit sections to be added after the .foo section, +## so add them here explicitly. + - Name: .strtab + Type: SHT_STRTAB +## Nothing should be emitted after the following section. +## So we know that the expected file size is 0x100 + 0xFF == 0x1FF == 511. + - Name: .foo + Type: SHT_PROGBITS +## Unaligned size. Used to make sure that we don't try to align the file offset +## for writing the section header table. + Size: 0xFF + Offset: 0x100 +SectionHeaderTable: + NoHeaders: true From ef129f01e9053871fdf97ad48dd26857d3af925d Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Thu, 29 Oct 2020 16:29:45 +0700 Subject: [PATCH 06/16] [SCEV][NFC] Use general predicate checkers in monotonicity check This makes the code more compact and readable. --- llvm/lib/Analysis/ScalarEvolution.cpp | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 8bc2595d52612e..ec39180cec3956 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -9270,25 +9270,20 @@ ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS, // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be // as general as possible. - switch (Pred) { - default: - return None; // Conservative answer + // Only handle LE/LT/GE/GT predicates. + if (!ICmpInst::isRelational(Pred)) + return None; - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: + // Check that AR does not wrap. + if (ICmpInst::isUnsigned(Pred)) { if (!LHS->hasNoUnsignedWrap()) return None; - return Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE ? MonotonicallyIncreasing : MonotonicallyDecreasing; - - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: { + } else { + assert(ICmpInst::isSigned(Pred) && + "Relational predicate is either signed or unsigned!"); if (!LHS->hasNoSignedWrap()) return None; @@ -9308,10 +9303,6 @@ ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS, return None; } - - } - - llvm_unreachable("switch has default clause!"); } bool ScalarEvolution::isLoopInvariantPredicate( From 075f661d01f856192c236b6256f18bc697e28e1b Mon Sep 17 00:00:00 2001 From: David Zarzycki Date: Thu, 29 Oct 2020 05:53:47 -0400 Subject: [PATCH 07/16] [lldb] Unbreak the build after a recent PowerPC change 40dd4d5233d9f81705a24d91b48d2620e487b89d introduced two new types. --- lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 3f6da8ef49d6c3..6a5c5cb69ac6cb 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -4819,6 +4819,12 @@ lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type, case clang::BuiltinType::OCLIntelSubgroupAVCImeDualRefStreamin: break; + // PowerPC -- Matrix Multiply Assist + case clang::BuiltinType::VectorPair: + case clang::BuiltinType::VectorQuad: + break; + + // ARM -- Scalable Vector Extension case clang::BuiltinType::SveBool: case clang::BuiltinType::SveInt8: case clang::BuiltinType::SveInt8x2: From 749f13e7635aebcda978ee2e42f5569ab7f0fafc Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 27 Oct 2020 11:55:27 +0000 Subject: [PATCH 08/16] [lldb] Correct --help output for qemu rootfs script It was printing "Usage:" twice. Reviewed By: omjavaid Differential Revision: https://reviews.llvm.org/D90225 --- lldb/scripts/lldb-test-qemu/rootfs.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/lldb/scripts/lldb-test-qemu/rootfs.sh b/lldb/scripts/lldb-test-qemu/rootfs.sh index 0491f4be0bc2b0..33ff278c1702a2 100644 --- a/lldb/scripts/lldb-test-qemu/rootfs.sh +++ b/lldb/scripts/lldb-test-qemu/rootfs.sh @@ -3,7 +3,6 @@ set -e print_usage() { - echo "Usage:" echo "Usage: $(basename $0) [options]" echo -e "Creates a Ubuntu root file system image.\n" echo -e " --help\t\t\tDisplay this information." From 9c82944b2dc5dcb5e9100a76647fcc1aaa6333b9 Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Wed, 28 Oct 2020 20:11:40 +0900 Subject: [PATCH 09/16] [VE] Add vector control instructions Add LVL/SVL/SMVL/LVIX isntructions. Add regression tests too. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D90355 --- llvm/lib/Target/VE/VEInstrVec.td | 24 ++++++++++++++++++++++++ llvm/test/MC/VE/LVIX.s | 16 ++++++++++++++++ llvm/test/MC/VE/LVL.s | 16 ++++++++++++++++ llvm/test/MC/VE/SMVL.s | 16 ++++++++++++++++ llvm/test/MC/VE/SVL.s | 16 ++++++++++++++++ 5 files changed, 88 insertions(+) create mode 100644 llvm/test/MC/VE/LVIX.s create mode 100644 llvm/test/MC/VE/LVL.s create mode 100644 llvm/test/MC/VE/SMVL.s create mode 100644 llvm/test/MC/VE/SVL.s diff --git a/llvm/lib/Target/VE/VEInstrVec.td b/llvm/lib/Target/VE/VEInstrVec.td index 77de72dc5cd803..ffff1826b59445 100644 --- a/llvm/lib/Target/VE/VEInstrVec.td +++ b/llvm/lib/Target/VE/VEInstrVec.td @@ -1427,3 +1427,27 @@ defm LZVM : RVMSm<"lzvm", 0xa5, VM>; // Section 8.17.12 - TOVM (Trailing One of VM) defm TOVM : RVMSm<"tovm", 0xa6, VM>; + +//----------------------------------------------------------------------------- +// Section 8.18 - Vector Control Instructions +//----------------------------------------------------------------------------- + +// Section 8.18.1 - LVL (Load VL) +let sx = 0, cz = 0, sz = 0, hasSideEffects = 0, Defs = [VL] in { + def LVLr : RR<0xbf, (outs), (ins I64:$sy), "lvl $sy">; + let cy = 0 in def LVLi : RR<0xbf, (outs), (ins simm7:$sy), "lvl $sy">; +} + +// Section 8.18.2 - SVL (Save VL) +let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0, Uses = [VL] in +def SVL : RR<0x2f, (outs I64:$sx), (ins), "svl $sx">; + +// Section 8.18.3 - SMVL (Save Maximum Vector Length) +let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in +def SMVL : RR<0x2e, (outs I64:$sx), (ins), "smvl $sx">; + +// Section 8.18.4 - LVIX (Load Vector Data Index) +let sx = 0, cz = 0, sz = 0, hasSideEffects = 0, Defs = [VIX] in { + def LVIXr : RR<0xaf, (outs), (ins I64:$sy), "lvix $sy">; + let cy = 0 in def LVIXi : RR<0xaf, (outs), (ins uimm6:$sy), "lvix $sy">; +} diff --git a/llvm/test/MC/VE/LVIX.s b/llvm/test/MC/VE/LVIX.s new file mode 100644 index 00000000000000..61594cf69c9355 --- /dev/null +++ b/llvm/test/MC/VE/LVIX.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc -triple=ve --show-encoding < %s \ +# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc -triple=ve -filetype=obj < %s | llvm-objdump -d - \ +# RUN: | FileCheck %s --check-prefixes=CHECK-INST + +# CHECK-INST: lvix %s11 +# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x8b,0x00,0xaf] +lvix %s11 + +# CHECK-INST: lvix 63 +# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x3f,0x00,0xaf] +lvix 63 + +# CHECK-INST: lvix %s63 +# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0xbf,0x00,0xaf] +lvix %s63 diff --git a/llvm/test/MC/VE/LVL.s b/llvm/test/MC/VE/LVL.s new file mode 100644 index 00000000000000..50eec02d5ad328 --- /dev/null +++ b/llvm/test/MC/VE/LVL.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc -triple=ve --show-encoding < %s \ +# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc -triple=ve -filetype=obj < %s | llvm-objdump -d - \ +# RUN: | FileCheck %s --check-prefixes=CHECK-INST + +# CHECK-INST: lvl %s11 +# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x8b,0x00,0xbf] +lvl %s11 + +# CHECK-INST: lvl 63 +# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x3f,0x00,0xbf] +lvl 63 + +# CHECK-INST: lvl -64 +# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x40,0x00,0xbf] +lvl -64 diff --git a/llvm/test/MC/VE/SMVL.s b/llvm/test/MC/VE/SMVL.s new file mode 100644 index 00000000000000..c1fb3628db8185 --- /dev/null +++ b/llvm/test/MC/VE/SMVL.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc -triple=ve --show-encoding < %s \ +# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc -triple=ve -filetype=obj < %s | llvm-objdump -d - \ +# RUN: | FileCheck %s --check-prefixes=CHECK-INST + +# CHECK-INST: smvl %s11 +# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x0b,0x2e] +smvl %s11 + +# CHECK-INST: smvl %s0 +# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2e] +smvl %s0 + +# CHECK-INST: smvl %s63 +# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x3f,0x2e] +smvl %s63 diff --git a/llvm/test/MC/VE/SVL.s b/llvm/test/MC/VE/SVL.s new file mode 100644 index 00000000000000..a825dcb1dec20d --- /dev/null +++ b/llvm/test/MC/VE/SVL.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc -triple=ve --show-encoding < %s \ +# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc -triple=ve -filetype=obj < %s | llvm-objdump -d - \ +# RUN: | FileCheck %s --check-prefixes=CHECK-INST + +# CHECK-INST: svl %s11 +# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x0b,0x2f] +svl %s11 + +# CHECK-INST: svl %s0 +# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2f] +svl %s0 + +# CHECK-INST: svl %s63 +# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x3f,0x2f] +svl %s63 From b3761765d00bb4dafb5ecdf17502f7143432371b Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Wed, 28 Oct 2020 20:13:45 +0900 Subject: [PATCH 10/16] [VE] Add missing vector regression test I forgot to add a regression test for VMAXX instruction when I added it. So, I'm adding it now. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D90356 --- llvm/test/MC/VE/VMAXX.s | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 llvm/test/MC/VE/VMAXX.s diff --git a/llvm/test/MC/VE/VMAXX.s b/llvm/test/MC/VE/VMAXX.s new file mode 100644 index 00000000000000..1c16066016b2be --- /dev/null +++ b/llvm/test/MC/VE/VMAXX.s @@ -0,0 +1,28 @@ +# RUN: llvm-mc -triple=ve --show-encoding < %s \ +# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc -triple=ve -filetype=obj < %s | llvm-objdump -d - \ +# RUN: | FileCheck %s --check-prefixes=CHECK-INST + +# CHECK-INST: vrmaxs.l.fst %v11, %v12 +# CHECK-ENCODING: encoding: [0x00,0x00,0x0c,0x0b,0x00,0x00,0x00,0xab] +vrmaxs.l.fst %v11, %v12 + +# CHECK-INST: vrmaxs.l.fst %v11, %vix, %vm11 +# CHECK-ENCODING: encoding: [0x00,0x00,0xff,0x0b,0x00,0x00,0x0b,0xab] +vrmaxs.l.fst %v11, %vix, %vm11 + +# CHECK-INST: vrmaxs.l.lst %vix, %v22, %vm15 +# CHECK-ENCODING: encoding: [0x00,0x00,0x16,0xff,0x00,0x00,0x2f,0xab] +vrmaxs.l.lst %vix, %v22, %vm15 + +# CHECK-INST: vrmins.l.lst %v63, %v60, %vm2 +# CHECK-ENCODING: encoding: [0x00,0x00,0x3c,0x3f,0x00,0x00,0x32,0xab] +vrmins.l.lst %v63, %v60, %vm2 + +# CHECK-INST: vrmins.l.fst %vix, %vix +# CHECK-ENCODING: encoding: [0x00,0x00,0xff,0xff,0x00,0x00,0x10,0xab] +vrmins.l.fst %vix, %vix, %vm0 + +# CHECK-INST: vrmins.l.lst %vix, %vix, %vm2 +# CHECK-ENCODING: encoding: [0x00,0x00,0xff,0xff,0x00,0x00,0x32,0xab] +vrmins.l.lst %vix, %vix, %vm2 From df00267f1fdb0b098dc42f1caa8a59b29c8e0e5f Mon Sep 17 00:00:00 2001 From: Krasimir Georgiev Date: Thu, 29 Oct 2020 11:27:54 +0100 Subject: [PATCH 11/16] clang-format: Add a consumer to diagnostics engine Contributed by dmikis (Kirill Dmitrenko)! Otherwise problems like trying to format readonly file in-place led to crashes. I've added reviewers by looking at `git blame` and other reviews to the changed file, so may have missed someone. Reviewed By: krasimir Differential Revision: https://reviews.llvm.org/D90121 --- clang/tools/clang-format/CMakeLists.txt | 1 + clang/tools/clang-format/ClangFormat.cpp | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/clang/tools/clang-format/CMakeLists.txt b/clang/tools/clang-format/CMakeLists.txt index 35ecdb11253ce0..28ac4fb5913e73 100644 --- a/clang/tools/clang-format/CMakeLists.txt +++ b/clang/tools/clang-format/CMakeLists.txt @@ -7,6 +7,7 @@ add_clang_tool(clang-format set(CLANG_FORMAT_LIB_DEPS clangBasic clangFormat + clangFrontend clangRewrite clangToolingCore ) diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp index 3a7247deab46df..d7b768329bcc63 100644 --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -18,6 +18,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/Version.h" #include "clang/Format/Format.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Rewrite/Core/Rewriter.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" @@ -423,9 +424,11 @@ static bool format(StringRef FileName) { IntrusiveRefCntPtr InMemoryFileSystem( new llvm::vfs::InMemoryFileSystem); FileManager Files(FileSystemOptions(), InMemoryFileSystem); + IntrusiveRefCntPtr DiagOpts(new DiagnosticOptions()); + TextDiagnosticPrinter DiagnosticsConsumer(errs(), &*DiagOpts); DiagnosticsEngine Diagnostics( - IntrusiveRefCntPtr(new DiagnosticIDs), - new DiagnosticOptions); + IntrusiveRefCntPtr(new DiagnosticIDs), &*DiagOpts, + &DiagnosticsConsumer, false); SourceManager Sources(Diagnostics, Files); FileID ID = createInMemoryFile(AssumedFileName, Code.get(), Sources, Files, InMemoryFileSystem.get()); From 840737fc82277f5303708ff062cdedd084248631 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Wed, 28 Oct 2020 15:05:35 +0300 Subject: [PATCH 12/16] [yaml2obj][test] - Merge dynsymtab-shlink.yaml to dynsym-section.yaml This simplifies the dynsymtab-shlink.yaml test (with use of macros) and merges it into the dynsym-section.yaml test. Differential revision: https://reviews.llvm.org/D90301 --- .../tools/yaml2obj/ELF/dynsym-section.yaml | 47 ++++++++++ .../tools/yaml2obj/ELF/dynsymtab-shlink.yaml | 93 ------------------- 2 files changed, 47 insertions(+), 93 deletions(-) delete mode 100644 llvm/test/tools/yaml2obj/ELF/dynsymtab-shlink.yaml diff --git a/llvm/test/tools/yaml2obj/ELF/dynsym-section.yaml b/llvm/test/tools/yaml2obj/ELF/dynsym-section.yaml index 1c6f3efe3cfdd0..5c461132fc9d01 100644 --- a/llvm/test/tools/yaml2obj/ELF/dynsym-section.yaml +++ b/llvm/test/tools/yaml2obj/ELF/dynsym-section.yaml @@ -31,3 +31,50 @@ Sections: # OFFSET-NEXT: [Nr] Name Type Address Off Size ES Flg Lk Inf Al # OFFSET-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 # OFFSET-NEXT: [ 1] .dynsym DYNSYM 0000000000000000 000100 000018 18 A 0 1 0 + +## Check we are able to set Link = 0 for the .dynsym section explicitly. + +# RUN: yaml2obj %s --docnum=2 -DLINK="Link: 0" -o %t2 +# RUN: llvm-readelf --section-headers %t2 | FileCheck %s --check-prefix=LINK-NULL + +# LINK-NULL: [Nr] Name {{.*}} Flg Lk Inf +# LINK-NULL: [ 1] .dynsym {{.*}} A 0 1 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN +Sections: + - Name: .dynsym + Type: SHT_DYNSYM + [[LINK]] + - Name: .dynstr + Type: SHT_STRTAB + - Name: .foo + Type: SHT_PROGBITS + +## Check that by default the .dynsym section will be linked to the .dynstr section, +## when the latter one exists. + +# RUN: yaml2obj %s --docnum=2 -DLINK="" -o %t3 +# RUN: llvm-readelf --section-headers %t3 | FileCheck %s --check-prefix=LINK-DEFAULT + +# LINK-DEFAULT: [Nr] Name {{.*}} Flg Lk Inf +# LINK-DEFAULT: [ 1] .dynsym {{.*}} A 2 1 +# LINK-DEFAULT: [ 2] .dynstr {{.*}} A 0 0 + +## Even when the .dynstr section exists, we can explicitly link the .dynsym section +## to another section. + +# RUN: yaml2obj %s --docnum=2 -DLINK="Link: 3" -o %t4 +# RUN: llvm-readelf --section-headers %t4 | FileCheck %s --check-prefix=LINK-FOO + +# LINK-FOO: [Nr] Name {{.*}} Flg Lk Inf +# LINK-FOO: [ 1] .dynsym {{.*}} A 3 1 +# LINK-FOO: [ 3] .foo + +## Check we can use a section name as a Link value for .dynsym. + +# RUN: yaml2obj %s --docnum=2 -DLINK="Link: .foo" -o %t5 +# RUN: llvm-readelf --section-headers %t5 | FileCheck %s --check-prefix=LINK-FOO diff --git a/llvm/test/tools/yaml2obj/ELF/dynsymtab-shlink.yaml b/llvm/test/tools/yaml2obj/ELF/dynsymtab-shlink.yaml deleted file mode 100644 index b69413e7136bfb..00000000000000 --- a/llvm/test/tools/yaml2obj/ELF/dynsymtab-shlink.yaml +++ /dev/null @@ -1,93 +0,0 @@ -## For implicit dynamic symbol table sections, `Link` field can also -## be specified in YAML. Here we test the behavior in different cases. - -## Check we are able to set Link = 0 for .dynsym explicitly. - -# RUN: yaml2obj --docnum=1 %s -o %t1 -# RUN: llvm-readobj %t1 -S | FileCheck %s --check-prefix=CASE1 - -# CASE1: Name: .dynsym -# CASE1: Link: -# CASE1-SAME: 0 - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN -Sections: - - Name: .dynsym - Type: SHT_DYNSYM - Link: 0 - -## Check that by default .dynsym will be linked to .dynstr -## if the latter exists. - -# RUN: yaml2obj --docnum=2 %s -o %t2 -# RUN: llvm-readobj %t2 -S | FileCheck %s --check-prefix=CASE2 - -# CASE2: .dynsym -# CASE2: Link: -# CASE2-SAME: 2 -# CASE2: Index: 2 -# CASE2-NEXT: Name: .dynstr - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN -Sections: - - Name: .dynsym - Type: SHT_DYNSYM - - Name: .dynstr - Type: SHT_STRTAB - -## Even if .dynstr exists, we can explicitly link .dynsym -## to another section. - -# RUN: yaml2obj --docnum=3 %s -o %t3 -# RUN: llvm-readobj %t3 -S | FileCheck %s --check-prefix=CASE3 - -# CASE3: .dynsym -# CASE3: Link: -# CASE3-SAME: 3 -# CASE3: Index: 3 -# CASE3-NEXT: Name: .foo - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN -Sections: - - Name: .dynsym - Type: SHT_DYNSYM - Link: 3 - - Name: .dynstr - Type: SHT_STRTAB - - Name: .foo - Type: SHT_PROGBITS - -## Check we can use a section name as a Link value for .dynsym. - -# RUN: yaml2obj --docnum=4 %s -o %t4 -# RUN: llvm-readobj %t4 -S | FileCheck %s --check-prefix=CASE4 - -# CASE4: .dynsym -# CASE4: Link: -# CASE4-SAME: 2 -# CASE4: Index: 2 -# CASE4-NEXT: Name: .foo - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN -Sections: - - Name: .dynsym - Type: SHT_DYNSYM - Link: .foo - - Name: .foo - Type: SHT_PROGBITS From 9dbffe4a28f46a9fd5dab36bf443a090588ac6de Mon Sep 17 00:00:00 2001 From: Tres Popp Date: Thu, 29 Oct 2020 11:01:05 +0100 Subject: [PATCH 13/16] [mlir] Reorder shape assuming bufferization. The previous ordering continued to use the original assuming after replacing it which is not allowed. Now, inline the region from the old into the new before the replacement. Differential Revision: https://reviews.llvm.org/D90375 --- .../lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp b/mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp index 61e862836a7331..041b54b3bd1445 100644 --- a/mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp +++ b/mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp @@ -32,10 +32,9 @@ class ConvertAssumingOpTypes : public OpConversionPattern { auto newAssumingOp = rewriter.create(op.getLoc(), newResultTypes, op.witness()); - - rewriter.replaceOp(op, newAssumingOp.getResults()); rewriter.inlineRegionBefore(op.doRegion(), newAssumingOp.doRegion(), newAssumingOp.doRegion().end()); + rewriter.replaceOp(op, newAssumingOp.getResults()); return success(); } From 79c5b4c546bb528bd51003a10f0a5aecab74ffbe Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Thu, 29 Oct 2020 17:34:58 +0700 Subject: [PATCH 14/16] [NFC] Add some new util functions to ICmpInst --- llvm/include/llvm/IR/Instructions.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 7b41dced564d4c..ea1a60d52c877a 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -1290,6 +1290,30 @@ class ICmpInst: public CmpInst { return !isEquality(P); } + /// Return true if the predicate is SGT or UGT. + /// + static bool isGT(Predicate P) { + return P == ICMP_SGT || P == ICMP_UGT; + } + + /// Return true if the predicate is SLT or ULT. + /// + static bool isLT(Predicate P) { + return P == ICMP_SLT || P == ICMP_ULT; + } + + /// Return true if the predicate is SGE or UGE. + /// + static bool isGE(Predicate P) { + return P == ICMP_SGE || P == ICMP_UGE; + } + + /// Return true if the predicate is SLE or ULE. + /// + static bool isLE(Predicate P) { + return P == ICMP_SLE || P == ICMP_ULE; + } + /// Exchange the two operands to this instruction in such a way that it does /// not modify the semantics of the instruction. The predicate value may be /// changed to retain the same result if the predicate is order dependent From 88d6421e4c439582ca4ca5e3744f8cc4498bb48e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 29 Oct 2020 09:30:37 +0000 Subject: [PATCH 15/16] [SCEV] Match 'zext (trunc A to iB) to iY' as URem. URem operations with constant power-of-2 second operands are modeled as such. This patch on its own has very little impact (e.g. no changes in CodeGen for MultiSource/SPEC2000/SPEC2006 on X86 -O3 -flto), but I'll soon post follow-up patches that make use of it to more accurately determine the trip multiple. Reviewed By: mkazantsev Differential Revision: https://reviews.llvm.org/D89821 --- llvm/lib/Analysis/ScalarEvolution.cpp | 15 ++++- .../Analysis/ScalarEvolutionTest.cpp | 58 +++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index ec39180cec3956..2911b2e424af38 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -12850,11 +12850,24 @@ void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const { } // Match the mathematical pattern A - (A / B) * B, where A and B can be -// arbitrary expressions. +// arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used +// for URem with constant power-of-2 second operands. // It's not always easy, as A and B can be folded (imagine A is X / 2, and B is // 4, A / B becomes X / 8). bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS) { + // Try to match 'zext (trunc A to iB) to iY', which is used + // for URem with constant power-of-2 second operands. Make sure the size of + // the operand A matches the size of the whole expressions. + if (const auto *ZExt = dyn_cast(Expr)) + if (const auto *Trunc = dyn_cast(ZExt->getOperand(0))) { + LHS = Trunc->getOperand(); + if (LHS->getType() != Expr->getType()) + LHS = getZeroExtendExpr(LHS, Expr->getType()); + RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1) + << getTypeSizeInBits(Trunc->getType())); + return true; + } const auto *Add = dyn_cast(Expr); if (Add == nullptr || Add->getNumOperands() != 2) return false; diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp index 909a140296ac62..7fa588566c55cc 100644 --- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp @@ -63,6 +63,11 @@ class ScalarEvolutionsTest : public testing::Test { const SCEV *RHS) { return SE.computeConstantDifference(LHS, RHS); } + + static bool matchURem(ScalarEvolution &SE, const SCEV *Expr, const SCEV *&LHS, + const SCEV *&RHS) { + return SE.matchURem(Expr, LHS, RHS); + } }; TEST_F(ScalarEvolutionsTest, SCEVUnknownRAUW) { @@ -1363,4 +1368,57 @@ TEST_F(ScalarEvolutionsTest, ProveImplicationViaNarrowing) { }); } +TEST_F(ScalarEvolutionsTest, MatchURem) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString( + "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" " + " " + "define void @test(i32 %a, i32 %b, i16 %c, i64 %d) {" + "entry: " + " %rem1 = urem i32 %a, 2" + " %rem2 = urem i32 %a, 5" + " %rem3 = urem i32 %a, %b" + " %c.ext = zext i16 %c to i32" + " %rem4 = urem i32 %c.ext, 2" + " %ext = zext i32 %rem4 to i64" + " %rem5 = urem i64 %d, 17179869184" + " ret void " + "} ", + Err, C); + + assert(M && "Could not parse module?"); + assert(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "test", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + for (auto *N : {"rem1", "rem2", "rem3", "rem5"}) { + auto *URemI = getInstructionByName(F, N); + auto *S = SE.getSCEV(URemI); + const SCEV *LHS, *RHS; + EXPECT_TRUE(matchURem(SE, S, LHS, RHS)); + EXPECT_EQ(LHS, SE.getSCEV(URemI->getOperand(0))); + EXPECT_EQ(RHS, SE.getSCEV(URemI->getOperand(1))); + EXPECT_EQ(LHS->getType(), S->getType()); + EXPECT_EQ(RHS->getType(), S->getType()); + } + + // Check the case where the urem operand is zero-extended. Make sure the + // match results are extended to the size of the input expression. + auto *Ext = getInstructionByName(F, "ext"); + auto *URem1 = getInstructionByName(F, "rem4"); + auto *S = SE.getSCEV(Ext); + const SCEV *LHS, *RHS; + EXPECT_TRUE(matchURem(SE, S, LHS, RHS)); + EXPECT_NE(LHS, SE.getSCEV(URem1->getOperand(0))); + // RHS and URem1->getOperand(1) have different widths, so compare the + // integer values. + EXPECT_EQ(cast(RHS)->getValue()->getZExtValue(), + cast(SE.getSCEV(URem1->getOperand(1))) + ->getValue() + ->getZExtValue()); + EXPECT_EQ(LHS->getType(), S->getType()); + EXPECT_EQ(RHS->getType(), S->getType()); + }); +} + } // end namespace llvm From 930a8c60b60805567e3cc0c7958be3ceeafd01f9 Mon Sep 17 00:00:00 2001 From: Alok Kumar Sharma Date: Thu, 29 Oct 2020 16:08:51 +0530 Subject: [PATCH 16/16] [DebugInfo] [NFCI] Adding a missed out line in support for DW_TAG_generic_subrange. This commit adds a missed out line in earlier commit for DW_TAG_generic_subrange. Previous commit ID: a6dd01afa3d5902203d04a72e0b478078f796a35 Differential Revision: https://reviews.llvm.org/D89218 Thanks markus for pointing this out. --- llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index f2560982d1bfca..9f111dc30f7602 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -543,6 +543,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, break; case dwarf::DW_OP_consts: assert(!isRegisterLocation()); + emitOp(dwarf::DW_OP_consts); emitSigned(Op->getArg(0)); break; case dwarf::DW_OP_LLVM_convert: {