From 5627ae6c507d62ef52d30fe80a0120f2ee033123 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Fri, 9 Oct 2020 15:17:26 +0200
Subject: [PATCH 01/16] [clangd] Support CodeActionParams.only

Differential Revision: https://reviews.llvm.org/D89126
---
 clang-tools-extra/clangd/ClangdLSPServer.cpp  | 28 +++++++-----
 clang-tools-extra/clangd/Protocol.cpp         |  5 ++-
 clang-tools-extra/clangd/Protocol.h           | 13 +++++-
 .../clangd/test/code-action-request.test      | 43 ++++++++++++++++++-
 4 files changed, 76 insertions(+), 13 deletions(-)
diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp
index 99c2465a579c0..3164b6cbfb146 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -993,12 +993,24 @@ void ClangdLSPServer::onCodeAction(const CodeActionParams &Params,
   if (!Code)
     return Reply(llvm::make_error<LSPError>(
         "onCodeAction called for non-added file", ErrorCode::InvalidParams));
+
+  // Checks whether a particular CodeActionKind is included in the response.
+  auto KindAllowed = [Only(Params.context.only)](llvm::StringRef Kind) {
+    if (Only.empty())
+      return true;
+    return llvm::any_of(Only, [&](llvm::StringRef Base) {
+      return Kind.consume_front(Base) && (Kind.empty() || Kind.startswith("."));
+    });
+  };
+
   // We provide a code action for Fixes on the specified diagnostics.
   std::vector<CodeAction> FixIts;
-  for (const Diagnostic &D : Params.context.diagnostics) {
-    for (auto &F : getFixes(File.file(), D)) {
-      FixIts.push_back(toCodeAction(F, Params.textDocument.uri));
-      FixIts.back().diagnostics = {D};
+  if (KindAllowed(CodeAction::QUICKFIX_KIND)) {
+    for (const Diagnostic &D : Params.context.diagnostics) {
+      for (auto &F : getFixes(File.file(), D)) {
+        FixIts.push_back(toCodeAction(F, Params.textDocument.uri));
+        FixIts.back().diagnostics = {D};
+      }
     }
   }
 
@@ -1038,14 +1050,10 @@ void ClangdLSPServer::onCodeAction(const CodeActionParams &Params,
         }
         return Reply(llvm::json::Array(Commands));
       };
-
   Server->enumerateTweaks(
       File.file(), Params.range,
-      [&](const Tweak &T) {
-        if (!Opts.TweakFilter(T))
-          return false;
-        // FIXME: also consider CodeActionContext.only
-        return true;
+      [this, KindAllowed(std::move(KindAllowed))](const Tweak &T) {
+        return Opts.TweakFilter(T) && KindAllowed(T.kind());
       },
       std::move(ConsumeActions));
 }
diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp
index 0103a06ff9510..d11307d04cee9 100644
--- a/clang-tools-extra/clangd/Protocol.cpp
+++ b/clang-tools-extra/clangd/Protocol.cpp
@@ -599,7 +599,10 @@ llvm::json::Value toJSON(const PublishDiagnosticsParams &PDP) {
 bool fromJSON(const llvm::json::Value &Params, CodeActionContext &R,
               llvm::json::Path P) {
   llvm::json::ObjectMapper O(Params, P);
-  return O && O.map("diagnostics", R.diagnostics);
+  if (!O || !O.map("diagnostics", R.diagnostics))
+    return false;
+  O.map("only", R.only);
+  return true;
 }
 
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Diagnostic &D) {
diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h
index 165a4a89e1cd0..f846accbdde7c 100644
--- a/clang-tools-extra/clangd/Protocol.h
+++ b/clang-tools-extra/clangd/Protocol.h
@@ -863,8 +863,19 @@ struct PublishDiagnosticsParams {
 llvm::json::Value toJSON(const PublishDiagnosticsParams &);
 
 struct CodeActionContext {
-  /// An array of diagnostics.
+  /// An array of diagnostics known on the client side overlapping the range
+  /// provided to the `textDocument/codeAction` request. They are provided so
+  /// that the server knows which errors are currently presented to the user for
+  /// the given range. There is no guarantee that these accurately reflect the
+  /// error state of the resource. The primary parameter to compute code actions
+  /// is the provided range.
   std::vector<Diagnostic> diagnostics;
+
+  /// Requested kind of actions to return.
+  ///
+  /// Actions not of this kind are filtered out by the client before being
+  /// shown. So servers can omit computing them.
+  std::vector<std::string> only;
 };
 bool fromJSON(const llvm::json::Value &, CodeActionContext &, llvm::json::Path);
 
diff --git a/clang-tools-extra/clangd/test/code-action-request.test b/clang-tools-extra/clangd/test/code-action-request.test
index 78e90ce6c4239..f16f77989b473 100644
--- a/clang-tools-extra/clangd/test/code-action-request.test
+++ b/clang-tools-extra/clangd/test/code-action-request.test
@@ -51,6 +51,47 @@
 # CHECK-NEXT:    }
 # CHECK-NEXT:  ]
 ---
+{
+  "jsonrpc": "2.0",
+  "id": 2,
+  "method": "textDocument/codeAction",
+  "params": {
+    "textDocument": { "uri": "test:///main.cpp" },
+        "range": {
+            "start": {"line": 0, "character": 0},
+            "end": {"line": 0, "character": 4}
+        },
+        "context": {
+            "diagnostics": [],
+            "only": ["quickfix"]
+        }
+    }
+}
+#      CHECK:  "id": 2,
+# CHECK-NEXT:  "jsonrpc": "2.0",
+# CHECK-NEXT:  "result": []
+---
+{
+  "jsonrpc": "2.0",
+  "id": 3,
+  "method": "textDocument/codeAction",
+  "params": {
+    "textDocument": { "uri": "test:///main.cpp" },
+        "range": {
+            "start": {"line": 0, "character": 0},
+            "end": {"line": 0, "character": 4}
+        },
+        "context": {
+            "diagnostics": [],
+            "only": ["refactor"]
+        }
+    }
+}
+#      CHECK:  "id": 3,
+# CHECK-NEXT:  "jsonrpc": "2.0",
+# CHECK-NEXT:  "result": [
+# CHECK-NEXT:    {
+---
 {"jsonrpc":"2.0","id":4,"method":"workspace/executeCommand","params":{"command":"clangd.applyTweak","arguments":[{"file":"test:///main.cpp","selection":{"end":{"character":4,"line":0},"start":{"character":0,"line":0}},"tweakID":"ExpandAutoType"}]}}
 #      CHECK:    "newText": "int",
 # CHECK-NEXT:    "range": {
@@ -64,7 +105,7 @@
 # CHECK-NEXT:      }
 # CHECK-NEXT:    }
 ---
-{"jsonrpc":"2.0","id":4,"method":"shutdown"}
+{"jsonrpc":"2.0","id":5,"method":"shutdown"}
 ---
 {"jsonrpc":"2.0","method":"exit"}
 ---

From a5b2e795c3b26fae16d774a48694e7419ad652f1 Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Thu, 29 Oct 2020 15:27:21 +0700
Subject: [PATCH 02/16] [NFC][SCEV] Refactor monotonic predicate checks to
 return enums instead of bools

This patch gets rid of output parameter which is not needed for most users
and prepares this API for further refactoring.
---
 llvm/include/llvm/Analysis/ScalarEvolution.h  | 23 +++++---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 57 ++++++++++---------
 .../lib/Transforms/Scalar/LoopPredication.cpp |  3 +-
 llvm/lib/Transforms/Utils/LoopPeel.cpp        |  4 +-
 4 files changed, 46 insertions(+), 41 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 017efb994f57b..37f4ad43639f9 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -939,17 +939,23 @@ class ScalarEvolution {
   bool isKnownOnEveryIteration(ICmpInst::Predicate Pred,
                                const SCEVAddRecExpr *LHS, const SCEV *RHS);
 
-  /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X"
-  /// is monotonically increasing or decreasing.  In the former case set
-  /// `Increasing` to true and in the latter case set `Increasing` to false.
-  ///
   /// A predicate is said to be monotonically increasing if may go from being
   /// false to being true as the loop iterates, but never the other way
   /// around.  A predicate is said to be monotonically decreasing if may go
   /// from being true to being false as the loop iterates, but never the other
   /// way around.
-  bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred,
-                            bool &Increasing);
+  enum MonotonicPredicateType {
+    MonotonicallyIncreasing,
+    MonotonicallyDecreasing
+  };
+
+  /// If, for all loop invariant X, the predicate "LHS `Pred` X" is
+  /// monotonically increasing or decreasing, returns
+  /// Some(MonotonicallyIncreasing) and Some(MonotonicallyDecreasing)
+  /// respectively. If we could not prove either of these facts, returns None.
+  Optional<MonotonicPredicateType>
+  getMonotonicPredicateType(const SCEVAddRecExpr *LHS,
+                            ICmpInst::Predicate Pred);
 
   /// Return true if the result of the predicate LHS `Pred` RHS is loop
   /// invariant with respect to L.  Set InvariantPred, InvariantLHS and
@@ -1881,8 +1887,9 @@ class ScalarEvolution {
   /// Try to prove NSW or NUW on \p AR relying on ConstantRange manipulation.
   SCEV::NoWrapFlags proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR);
 
-  bool isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
-                                ICmpInst::Predicate Pred, bool &Increasing);
+  Optional<MonotonicPredicateType>
+  getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS,
+                                ICmpInst::Predicate Pred);
 
   /// Return SCEV no-wrap flags that can be proven based on reasoning about
   /// how poison produced from no-wrap flags on this value (e.g. a nuw add)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index bca8e28849e76..8bc2595d52612 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9236,31 +9236,30 @@ bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred,
          isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS);
 }
 
-bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS,
-                                           ICmpInst::Predicate Pred,
-                                           bool &Increasing) {
-  bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing);
+Optional<ScalarEvolution::MonotonicPredicateType>
+ScalarEvolution::getMonotonicPredicateType(const SCEVAddRecExpr *LHS,
+                                           ICmpInst::Predicate Pred) {
+  auto Result = getMonotonicPredicateTypeImpl(LHS, Pred);
 
 #ifndef NDEBUG
   // Verify an invariant: inverting the predicate should turn a monotonically
   // increasing change to a monotonically decreasing one, and vice versa.
-  bool IncreasingSwapped;
-  bool ResultSwapped = isMonotonicPredicateImpl(
-      LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped);
+  if (Result) {
+    auto ResultSwapped =
+        getMonotonicPredicateTypeImpl(LHS, ICmpInst::getSwappedPredicate(Pred));
 
-  assert(Result == ResultSwapped && "should be able to analyze both!");
-  if (ResultSwapped)
-    assert(Increasing == !IncreasingSwapped &&
+    assert(ResultSwapped.hasValue() && "should be able to analyze both!");
+    assert(ResultSwapped.getValue() != Result.getValue() &&
            "monotonicity should flip as we flip the predicate");
+  }
 #endif
 
   return Result;
 }
 
-bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
-                                               ICmpInst::Predicate Pred,
-                                               bool &Increasing) {
-
+Optional<ScalarEvolution::MonotonicPredicateType>
+ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS,
+                                               ICmpInst::Predicate Pred) {
   // A zero step value for LHS means the induction variable is essentially a
   // loop invariant value. We don't really depend on the predicate actually
   // flipping from false to true (for increasing predicates, and the other way
@@ -9273,38 +9272,41 @@ bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
 
   switch (Pred) {
   default:
-    return false; // Conservative answer
+    return None; // Conservative answer
 
   case ICmpInst::ICMP_UGT:
   case ICmpInst::ICMP_UGE:
   case ICmpInst::ICMP_ULT:
   case ICmpInst::ICMP_ULE:
     if (!LHS->hasNoUnsignedWrap())
-      return false;
+      return None;
 
-    Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE;
-    return true;
+    return Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE
+               ? MonotonicallyIncreasing
+               : MonotonicallyDecreasing;
 
   case ICmpInst::ICMP_SGT:
   case ICmpInst::ICMP_SGE:
   case ICmpInst::ICMP_SLT:
   case ICmpInst::ICMP_SLE: {
     if (!LHS->hasNoSignedWrap())
-      return false;
+      return None;
 
     const SCEV *Step = LHS->getStepRecurrence(*this);
 
     if (isKnownNonNegative(Step)) {
-      Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE;
-      return true;
+      return Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE
+                 ? MonotonicallyIncreasing
+                 : MonotonicallyDecreasing;
     }
 
     if (isKnownNonPositive(Step)) {
-      Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE;
-      return true;
+      return Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE
+                 ? MonotonicallyIncreasing
+                 : MonotonicallyDecreasing;
     }
 
-    return false;
+    return None;
   }
 
   }
@@ -9330,10 +9332,9 @@ bool ScalarEvolution::isLoopInvariantPredicate(
   if (!ArLHS || ArLHS->getLoop() != L)
     return false;
 
-  bool Increasing;
-  if (!isMonotonicPredicate(ArLHS, Pred, Increasing))
+  auto MonotonicType = getMonotonicPredicateType(ArLHS, Pred);
+  if (!MonotonicType)
     return false;
-
   // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to
   // true as the loop iterates, and the backedge is control dependent on
   // "ArLHS `Pred` RHS" == true then we can reason as follows:
@@ -9351,7 +9352,7 @@ bool ScalarEvolution::isLoopInvariantPredicate(
   //
   // A similar reasoning applies for a monotonically decreasing predicate, by
   // replacing true with false and false with true in the above two bullets.
-
+  bool Increasing = *MonotonicType == ScalarEvolution::MonotonicallyIncreasing;
   auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred);
 
   if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS))
diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 27df56f6ea96c..3ca5b985c365e 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -454,8 +454,7 @@ static bool isSafeToTruncateWideIVType(const DataLayout &DL,
   // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
   // IV wraps around, and the truncation of the IV would lose the range of
   // iterations between 2^32 and 2^64.
-  bool Increasing;
-  if (!SE.isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing))
+  if (!SE.getMonotonicPredicateType(LatchCheck.IV, LatchCheck.Pred))
     return false;
   // The active bits should be less than the bits in the RangeCheckType. This
   // guarantees that truncating the latch check to RangeCheckType is a safe
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index a08b5781ac3e0..27a61a2078684 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -227,11 +227,9 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
     // consider AddRecs of the loop we are trying to peel.
     if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
       continue;
-    bool Increasing;
     if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
-        !SE.isMonotonicPredicate(LeftAR, Pred, Increasing))
+        !SE.getMonotonicPredicateType(LeftAR, Pred))
       continue;
-    (void)Increasing;
 
     // Check if extending the current DesiredPeelCount lets us evaluate Pred
     // or !Pred in the loop body statically.

From 1d773a4ff05d0dcfab112719b82b2bd5d0c93ff5 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 27 Oct 2020 10:58:34 +0100
Subject: [PATCH 03/16] [CMake] Support inter-proto dependencies in
 generate_protos.

Differential Revision: https://reviews.llvm.org/D90215
---
 .../clangd/index/remote/CMakeLists.txt        | 11 +++------
 llvm/cmake/modules/FindGRPC.cmake             | 23 ++++++++++++++++++-
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/clangd/index/remote/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/CMakeLists.txt
index 554288df0bcbb..a07dd994b5a37 100644
--- a/clang-tools-extra/clangd/index/remote/CMakeLists.txt
+++ b/clang-tools-extra/clangd/index/remote/CMakeLists.txt
@@ -1,13 +1,8 @@
 if (CLANGD_ENABLE_REMOTE)
-  generate_protos(RemoteIndexServiceProto "Service.proto" GRPC)
   generate_protos(RemoteIndexProto "Index.proto")
-  # Ensure dependency headers are generated before dependent protos are built.
-  # FIXME: this should be encapsulated in generate_protos.
-  # FIXME: CMake docs say OBJECT_DEPENDS isn't needed, but I can't get the
-  #        recommended add_dependencies() approach to work.
-  set_source_files_properties(
-    ${CMAKE_CURRENT_BINARY_DIR}/Service.pb.cc
-    PROPERTIES OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Index.pb.h)
+  generate_protos(RemoteIndexServiceProto "Service.proto"
+    DEPENDS "Index.proto"
+    GRPC)
   include_directories(${CMAKE_CURRENT_BINARY_DIR})
   include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../)
 
diff --git a/llvm/cmake/modules/FindGRPC.cmake b/llvm/cmake/modules/FindGRPC.cmake
index 9e837a80661c2..f2c9bee38c93b 100644
--- a/llvm/cmake/modules/FindGRPC.cmake
+++ b/llvm/cmake/modules/FindGRPC.cmake
@@ -84,8 +84,10 @@ endif()
 # Proto headers are generated in ${CMAKE_CURRENT_BINARY_DIR}.
 # Libraries that use these headers should adjust the include path.
 # If the "GRPC" argument is given, services are also generated.
+# The DEPENDS list should name *.proto source files that are imported.
+# They may be relative to the source dir or absolute (for generated protos).
 function(generate_protos LibraryName ProtoFile)
-  cmake_parse_arguments(PARSE_ARGV 2 PROTO "GRPC" "" "")
+  cmake_parse_arguments(PARSE_ARGV 2 PROTO "GRPC" "" "DEPENDS")
   get_filename_component(ProtoSourceAbsolutePath "${CMAKE_CURRENT_SOURCE_DIR}/${ProtoFile}" ABSOLUTE)
   get_filename_component(ProtoSourcePath ${ProtoSourceAbsolutePath} PATH)
   get_filename_component(Basename ${ProtoSourceAbsolutePath} NAME_WLE)
@@ -111,4 +113,23 @@ function(generate_protos LibraryName ProtoFile)
   add_clang_library(${LibraryName} ${GeneratedProtoSource}
     PARTIAL_SOURCES_INTENDED
     LINK_LIBS grpc++ protobuf)
+
+  # Ensure dependency headers are generated before dependent protos are built.
+  # DEPENDS arg is a list of "Foo.proto". While they're logically relative to
+  # the source dir, the generated headers we need are in the binary dir.
+  foreach(ImportedProto IN LISTS PROTO_DEPENDS)
+    # Foo.proto -> Foo.pb.h
+    STRING(REGEX REPLACE "\\.proto$" ".pb.h" ImportedHeader "${ImportedProto}")
+    # Foo.pb.h -> ${CMAKE_CURRENT_BINARY_DIR}/Foo.pb.h
+    get_filename_component(ImportedHeader "${ImportedHeader}"
+      ABSOLUTE
+      BASE_DIR "${CMAKE_CURRENT_BINARY_DIR}")
+    # Compilation of each generated source depends on ${BINARY}/Foo.pb.h.
+    foreach(Generated IN LISTS GeneratedProtoSource)
+      # FIXME: CMake docs suggest OBJECT_DEPENDS isn't needed, but I can't get
+      #        the recommended add_dependencies() approach to work.
+      set_source_files_properties("${Generated}"
+        PROPERTIES OBJECT_DEPENDS "${ImportedHeader}")
+    endforeach(Generated)
+  endforeach(ImportedProto)
 endfunction()

From a4b6b1e1c83fdfc5954e0fb631c2e6237236589e Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Thu, 29 Oct 2020 09:13:23 +0000
Subject: [PATCH 04/16] [InterleaveAccess] Recognise Interleave loads through
 binary operations

Instcombine will currently sink identical shuffles though vector binary
operations. This is probably generally useful, but can break up the code
pattern we use to represent an interleaving load group. This patch
reverses that in the InterleaveAccessPass to re-recognise the pattern of
shuffles sunk past binary operations and folds them back if an
interleave group can be created.

Differential Revision: https://reviews.llvm.org/D89489
---
 llvm/lib/CodeGen/InterleavedAccessPass.cpp    |  99 +++++++++---
 llvm/test/CodeGen/AArch64/vldn_shuffle.ll     |  84 +++-------
 llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll    | 147 ++++++------------
 .../AArch64/binopshuffles.ll                  | 101 +++++++-----
 4 files changed, 209 insertions(+), 222 deletions(-)

diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index c4d83547a06c6..73771609a7923 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -66,6 +66,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <utility>
 
@@ -118,6 +119,14 @@ class InterleavedAccess : public FunctionPass {
   /// replacements are also performed.
   bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
                           ArrayRef<ShuffleVectorInst *> Shuffles);
+
+  /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them
+  /// to binop(shuffle(x), shuffle(y)) to allow the formation of an
+  /// interleaving load. Any newly created shuffles that operate on \p LI will
+  /// be added to \p Shuffles.
+  bool tryReplaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
+                               SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
+                               LoadInst *LI);
 };
 
 } // end anonymous namespace.
@@ -283,61 +292,85 @@ bool InterleavedAccess::lowerInterleavedLoad(
   if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
     return false;
 
+  // Check if all users of this load are shufflevectors. If we encounter any
+  // users that are extractelement instructions or binary operators, we save
+  // them to later check if they can be modified to extract from one of the
+  // shufflevectors instead of the load.
+
   SmallVector<ShuffleVectorInst *, 4> Shuffles;
   SmallVector<ExtractElementInst *, 4> Extracts;
+  // BinOpShuffles need to be handled a single time in case both operands of the
+  // binop are the same load.
+  SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles;
 
-  // Check if all users of this load are shufflevectors. If we encounter any
-  // users that are extractelement instructions, we save them to later check if
-  // they can be modifed to extract from one of the shufflevectors instead of
-  // the load.
-  for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) {
-    auto *Extract = dyn_cast<ExtractElementInst>(*UI);
+  for (auto *User : LI->users()) {
+    auto *Extract = dyn_cast<ExtractElementInst>(User);
     if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
       Extracts.push_back(Extract);
       continue;
     }
-    ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(*UI);
+    auto *BI = dyn_cast<BinaryOperator>(User);
+    if (BI && BI->hasOneUse()) {
+      if (auto *SVI = dyn_cast<ShuffleVectorInst>(*BI->user_begin())) {
+        BinOpShuffles.insert(SVI);
+        continue;
+      }
+    }
+    auto *SVI = dyn_cast<ShuffleVectorInst>(User);
     if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
       return false;
 
     Shuffles.push_back(SVI);
   }
 
-  if (Shuffles.empty())
+  if (Shuffles.empty() && BinOpShuffles.empty())
     return false;
 
   unsigned Factor, Index;
 
   unsigned NumLoadElements =
       cast<FixedVectorType>(LI->getType())->getNumElements();
+  auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
   // Check if the first shufflevector is DE-interleave shuffle.
-  if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index,
-                          MaxFactor, NumLoadElements))
+  if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
+                          NumLoadElements))
     return false;
 
   // Holds the corresponding index for each DE-interleave shuffle.
   SmallVector<unsigned, 4> Indices;
-  Indices.push_back(Index);
 
-  Type *VecTy = Shuffles[0]->getType();
+  Type *VecTy = FirstSVI->getType();
 
   // Check if other shufflevectors are also DE-interleaved of the same type
   // and factor as the first shufflevector.
-  for (unsigned i = 1; i < Shuffles.size(); i++) {
-    if (Shuffles[i]->getType() != VecTy)
+  for (auto *Shuffle : Shuffles) {
+    if (Shuffle->getType() != VecTy)
       return false;
-
-    if (!isDeInterleaveMaskOfFactor(Shuffles[i]->getShuffleMask(), Factor,
+    if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
                                     Index))
       return false;
 
     Indices.push_back(Index);
   }
+  for (auto *Shuffle : BinOpShuffles) {
+    if (Shuffle->getType() != VecTy)
+      return false;
+    if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
+                                    Index))
+      return false;
+
+    if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
+      Indices.push_back(Index);
+    if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
+      Indices.push_back(Index);
+  }
 
   // Try and modify users of the load that are extractelement instructions to
   // use the shufflevector instructions instead of the load.
   if (!tryReplaceExtracts(Extracts, Shuffles))
     return false;
+  if (!tryReplaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI))
+    return false;
 
   LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
 
@@ -352,6 +385,34 @@ bool InterleavedAccess::lowerInterleavedLoad(
   return true;
 }
 
+bool InterleavedAccess::tryReplaceBinOpShuffles(
+    ArrayRef<ShuffleVectorInst *> BinOpShuffles,
+    SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
+  for (auto *SVI : BinOpShuffles) {
+    BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
+    ArrayRef<int> Mask = SVI->getShuffleMask();
+
+    auto *NewSVI1 = new ShuffleVectorInst(
+        BI->getOperand(0), UndefValue::get(BI->getOperand(0)->getType()), Mask,
+        SVI->getName(), SVI);
+    auto *NewSVI2 = new ShuffleVectorInst(
+        BI->getOperand(1), UndefValue::get(BI->getOperand(1)->getType()), Mask,
+        SVI->getName(), SVI);
+    Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2,
+                                          BI->getName(), SVI);
+    SVI->replaceAllUsesWith(NewBI);
+    LLVM_DEBUG(dbgs() << "  Replaced: " << *BI << "\n    And   : " << *SVI
+                      << "\n  With    : " << *NewSVI1 << "\n    And   : "
+                      << *NewSVI2 << "\n    And   : " << *NewBI << "\n");
+    RecursivelyDeleteTriviallyDeadInstructions(SVI);
+    if (NewSVI1->getOperand(0) == LI)
+      Shuffles.push_back(NewSVI1);
+    if (NewSVI2->getOperand(0) == LI)
+      Shuffles.push_back(NewSVI2);
+  }
+  return true;
+}
+
 bool InterleavedAccess::tryReplaceExtracts(
     ArrayRef<ExtractElementInst *> Extracts,
     ArrayRef<ShuffleVectorInst *> Shuffles) {
@@ -421,7 +482,7 @@ bool InterleavedAccess::lowerInterleavedStore(
   if (!SI->isSimple())
     return false;
 
-  ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
+  auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
   if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
     return false;
 
@@ -461,10 +522,10 @@ bool InterleavedAccess::runOnFunction(Function &F) {
   bool Changed = false;
 
   for (auto &I : instructions(F)) {
-    if (LoadInst *LI = dyn_cast<LoadInst>(&I))
+    if (auto *LI = dyn_cast<LoadInst>(&I))
       Changed |= lowerInterleavedLoad(LI, DeadInsts);
 
-    if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+    if (auto *SI = dyn_cast<StoreInst>(&I))
       Changed |= lowerInterleavedStore(SI, DeadInsts);
   }
 
diff --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
index 4c501e5403f97..99100a2ab4c82 100644
--- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
@@ -7,13 +7,10 @@ define void @vld2(float* nocapture readonly %pSrc, float* noalias nocapture %pDs
 ; CHECK-NEXT:    mov x8, xzr
 ; CHECK-NEXT:  .LBB0_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldp q1, q0, [x0], #32
-; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT:    fmul v1.4s, v1.4s, v1.4s
-; CHECK-NEXT:    uzp1 v2.4s, v1.4s, v0.4s
-; CHECK-NEXT:    uzp2 v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    fadd v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    str q0, [x1, x8]
+; CHECK-NEXT:    ld2 { v0.4s, v1.4s }, [x0], #32
+; CHECK-NEXT:    fmul v2.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v2.4s, v1.4s, v1.4s
+; CHECK-NEXT:    str q2, [x1, x8]
 ; CHECK-NEXT:    add x8, x8, #16 // =16
 ; CHECK-NEXT:    cmp x8, #1, lsl #12 // =4096
 ; CHECK-NEXT:    b.ne .LBB0_1
@@ -50,27 +47,11 @@ define void @vld3(float* nocapture readonly %pSrc, float* noalias nocapture %pDs
 ; CHECK-NEXT:    mov x8, xzr
 ; CHECK-NEXT:  .LBB1_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldp q2, q0, [x0, #16]
-; CHECK-NEXT:    ldr q1, [x0], #48
-; CHECK-NEXT:    fmul v2.4s, v2.4s, v2.4s
-; CHECK-NEXT:    fmul v1.4s, v1.4s, v1.4s
-; CHECK-NEXT:    ext v3.16b, v2.16b, v1.16b, #8
-; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT:    ext v5.16b, v1.16b, v3.16b, #12
-; CHECK-NEXT:    ext v3.16b, v3.16b, v2.16b, #4
-; CHECK-NEXT:    dup v4.4s, v0.s[1]
-; CHECK-NEXT:    mov v2.s[0], v1.s[2]
-; CHECK-NEXT:    dup v1.4s, v0.s[2]
-; CHECK-NEXT:    mov v0.s[2], v0.s[0]
-; CHECK-NEXT:    ext v5.16b, v5.16b, v5.16b, #12
-; CHECK-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT:    ext v0.16b, v0.16b, v2.16b, #8
-; CHECK-NEXT:    mov v5.s[3], v4.s[3]
-; CHECK-NEXT:    mov v3.s[3], v1.s[3]
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    fadd v1.4s, v3.4s, v5.4s
-; CHECK-NEXT:    fadd v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    str q0, [x1, x8]
+; CHECK-NEXT:    ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48
+; CHECK-NEXT:    fmul v3.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v3.4s, v1.4s, v1.4s
+; CHECK-NEXT:    fmla v3.4s, v2.4s, v2.4s
+; CHECK-NEXT:    str q3, [x1, x8]
 ; CHECK-NEXT:    add x8, x8, #16 // =16
 ; CHECK-NEXT:    cmp x8, #1, lsl #12 // =4096
 ; CHECK-NEXT:    b.ne .LBB1_1
@@ -110,37 +91,15 @@ define void @vld4(float* nocapture readonly %pSrc, float* noalias nocapture %pDs
 ; CHECK-NEXT:    mov x8, xzr
 ; CHECK-NEXT:  .LBB2_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldp q2, q3, [x0, #32]
-; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0], #64
 ; CHECK-NEXT:    add x9, x1, x8
 ; CHECK-NEXT:    add x8, x8, #32 // =32
-; CHECK-NEXT:    fmul v3.4s, v3.4s, v3.4s
-; CHECK-NEXT:    fmul v2.4s, v2.4s, v2.4s
-; CHECK-NEXT:    fmul v1.4s, v1.4s, v1.4s
-; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT:    zip1 v5.4s, v2.4s, v3.4s
-; CHECK-NEXT:    trn2 v7.4s, v2.4s, v3.4s
-; CHECK-NEXT:    zip1 v4.4s, v0.4s, v1.4s
-; CHECK-NEXT:    trn2 v6.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ext v5.16b, v2.16b, v5.16b, #8
-; CHECK-NEXT:    ext v7.16b, v2.16b, v7.16b, #8
-; CHECK-NEXT:    zip2 v1.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ext v4.16b, v5.16b, v4.16b, #8
-; CHECK-NEXT:    zip2 v5.4s, v2.4s, v3.4s
-; CHECK-NEXT:    ext v0.16b, v6.16b, v0.16b, #8
-; CHECK-NEXT:    ext v6.16b, v7.16b, v6.16b, #8
-; CHECK-NEXT:    mov v2.s[3], v3.s[2]
-; CHECK-NEXT:    ext v0.16b, v5.16b, v0.16b, #8
-; CHECK-NEXT:    ext v3.16b, v4.16b, v4.16b, #8
-; CHECK-NEXT:    ext v4.16b, v6.16b, v6.16b, #8
-; CHECK-NEXT:    ext v1.16b, v2.16b, v1.16b, #8
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    fadd v2.4s, v4.4s, v3.4s
-; CHECK-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
 ; CHECK-NEXT:    cmp x8, #2, lsl #12 // =8192
-; CHECK-NEXT:    fadd v3.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add x0, x0, #64 // =64
-; CHECK-NEXT:    st2 { v2.4s, v3.4s }, [x9]
+; CHECK-NEXT:    fmul v4.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v4.4s, v1.4s, v1.4s
+; CHECK-NEXT:    fmul v5.4s, v2.4s, v2.4s
+; CHECK-NEXT:    fmla v5.4s, v3.4s, v3.4s
+; CHECK-NEXT:    st2 { v4.4s, v5.4s }, [x9]
 ; CHECK-NEXT:    b.ne .LBB2_1
 ; CHECK-NEXT:  // %bb.2: // %while.end
 ; CHECK-NEXT:    ret
@@ -184,16 +143,13 @@ define void @twosrc(float* nocapture readonly %pSrc, float* nocapture readonly %
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    add x9, x0, x8
 ; CHECK-NEXT:    add x10, x1, x8
-; CHECK-NEXT:    ldp q0, q1, [x9]
-; CHECK-NEXT:    ldp q3, q2, [x10]
+; CHECK-NEXT:    ld2 { v0.4s, v1.4s }, [x9]
+; CHECK-NEXT:    ld2 { v2.4s, v3.4s }, [x10]
 ; CHECK-NEXT:    add x8, x8, #32 // =32
 ; CHECK-NEXT:    cmp x8, #2, lsl #12 // =8192
-; CHECK-NEXT:    fmul v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    fmul v0.4s, v3.4s, v0.4s
-; CHECK-NEXT:    uzp1 v2.4s, v0.4s, v1.4s
-; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    fadd v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    str q0, [x2], #16
+; CHECK-NEXT:    fmul v4.4s, v2.4s, v0.4s
+; CHECK-NEXT:    fmla v4.4s, v1.4s, v3.4s
+; CHECK-NEXT:    str q4, [x2], #16
 ; CHECK-NEXT:    b.ne .LBB3_1
 ; CHECK-NEXT:  // %bb.2: // %while.end
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll b/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll
index 7c4fef3c71c58..c04243ee57545 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll
@@ -4,94 +4,49 @@
 define void @arm_cmplx_mag_squared_f16(half* nocapture readonly %pSrc, half* nocapture %pDst, i32 %numSamples) {
 ; CHECK-LABEL: arm_cmplx_mag_squared_f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    beq.w .LBB0_9
+; CHECK-NEXT:    beq .LBB0_8
 ; CHECK-NEXT:  @ %bb.1: @ %while.body.preheader
 ; CHECK-NEXT:    cmp r2, #8
-; CHECK-NEXT:    blo.w .LBB0_6
+; CHECK-NEXT:    blo .LBB0_9
 ; CHECK-NEXT:  @ %bb.2: @ %vector.memcheck
 ; CHECK-NEXT:    add.w r3, r0, r2, lsl #2
 ; CHECK-NEXT:    cmp r3, r1
 ; CHECK-NEXT:    itt hi
 ; CHECK-NEXT:    addhi.w r3, r1, r2, lsl #1
 ; CHECK-NEXT:    cmphi r3, r0
-; CHECK-NEXT:    bhi .LBB0_6
+; CHECK-NEXT:    bhi .LBB0_9
 ; CHECK-NEXT:  @ %bb.3: @ %vector.ph
-; CHECK-NEXT:    bic r5, r2, #7
-; CHECK-NEXT:    movs r4, #1
-; CHECK-NEXT:    sub.w r3, r5, #8
-; CHECK-NEXT:    and r8, r2, #7
-; CHECK-NEXT:    add.w r12, r1, r5, lsl #1
-; CHECK-NEXT:    add.w r3, r4, r3, lsr #3
-; CHECK-NEXT:    mov r7, r3
-; CHECK-NEXT:    add.w r3, r0, r5, lsl #2
+; CHECK-NEXT:    bic r4, r2, #7
+; CHECK-NEXT:    movs r3, #1
+; CHECK-NEXT:    sub.w r12, r4, #8
+; CHECK-NEXT:    and r7, r2, #7
+; CHECK-NEXT:    add.w r3, r3, r12, lsr #3
+; CHECK-NEXT:    add.w r12, r1, r4, lsl #1
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    add.w r3, r0, r4, lsl #2
 ; CHECK-NEXT:  .LBB0_4: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vldrh.u16 q0, [r0], #32
-; CHECK-NEXT:    mov lr, r7
+; CHECK-NEXT:    vld20.16 {q0, q1}, [r0]
+; CHECK-NEXT:    mov lr, r5
 ; CHECK-NEXT:    subs.w lr, lr, #1
-; CHECK-NEXT:    vmul.f16 q1, q0, q0
-; CHECK-NEXT:    mov r7, lr
-; CHECK-NEXT:    vmovx.f16 s0, s5
-; CHECK-NEXT:    vmovx.f16 s8, s6
-; CHECK-NEXT:    vmov r4, s0
-; CHECK-NEXT:    vmovx.f16 s0, s4
-; CHECK-NEXT:    vmov r6, s0
-; CHECK-NEXT:    vmov.16 q0[0], r6
-; CHECK-NEXT:    vmov r6, s4
-; CHECK-NEXT:    vmov.16 q0[1], r4
-; CHECK-NEXT:    vmov r4, s8
-; CHECK-NEXT:    vmovx.f16 s8, s7
-; CHECK-NEXT:    vmov.16 q0[2], r4
-; CHECK-NEXT:    vmov r4, s8
-; CHECK-NEXT:    vldrh.u16 q2, [r0, #-16]
-; CHECK-NEXT:    vmov.16 q0[3], r4
-; CHECK-NEXT:    vmul.f16 q2, q2, q2
-; CHECK-NEXT:    vmovx.f16 s12, s8
-; CHECK-NEXT:    vmov r4, s12
-; CHECK-NEXT:    vmovx.f16 s12, s9
-; CHECK-NEXT:    vmov.16 q0[4], r4
-; CHECK-NEXT:    vmov r4, s12
-; CHECK-NEXT:    vmovx.f16 s12, s10
-; CHECK-NEXT:    vmov.16 q0[5], r4
-; CHECK-NEXT:    vmov r4, s12
-; CHECK-NEXT:    vmov.16 q3[0], r6
-; CHECK-NEXT:    vmov.16 q0[6], r4
-; CHECK-NEXT:    vmov r4, s5
-; CHECK-NEXT:    vmov.16 q3[1], r4
-; CHECK-NEXT:    vmov r4, s6
-; CHECK-NEXT:    vmov.16 q3[2], r4
-; CHECK-NEXT:    vmov r4, s7
-; CHECK-NEXT:    vmov.16 q3[3], r4
-; CHECK-NEXT:    vmov r4, s8
-; CHECK-NEXT:    vmov.16 q3[4], r4
-; CHECK-NEXT:    vmov r4, s9
-; CHECK-NEXT:    vmov.16 q3[5], r4
-; CHECK-NEXT:    vmov r4, s10
-; CHECK-NEXT:    vmov.16 q3[6], r4
-; CHECK-NEXT:    vmov r4, s11
-; CHECK-NEXT:    vmovx.f16 s4, s11
-; CHECK-NEXT:    vmov.16 q3[7], r4
-; CHECK-NEXT:    vmov r4, s4
-; CHECK-NEXT:    vmov.16 q0[7], r4
-; CHECK-NEXT:    vadd.f16 q0, q0, q3
-; CHECK-NEXT:    vstrb.8 q0, [r1], #16
+; CHECK-NEXT:    vld21.16 {q0, q1}, [r0]!
+; CHECK-NEXT:    mov r5, lr
+; CHECK-NEXT:    vmul.f16 q2, q0, q0
+; CHECK-NEXT:    vfma.f16 q2, q1, q1
+; CHECK-NEXT:    vstrb.8 q2, [r1], #16
 ; CHECK-NEXT:    bne .LBB0_4
 ; CHECK-NEXT:    b .LBB0_5
 ; CHECK-NEXT:  .LBB0_5: @ %middle.block
-; CHECK-NEXT:    cmp r5, r2
-; CHECK-NEXT:    mov lr, r8
-; CHECK-NEXT:    bne .LBB0_7
-; CHECK-NEXT:    b .LBB0_9
-; CHECK-NEXT:  .LBB0_6:
-; CHECK-NEXT:    mov r3, r0
-; CHECK-NEXT:    mov r12, r1
-; CHECK-NEXT:    mov lr, r2
-; CHECK-NEXT:  .LBB0_7: @ %while.body.preheader26
+; CHECK-NEXT:    cmp r4, r2
+; CHECK-NEXT:    mov lr, r7
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    popeq {r4, r5, r7, pc}
+; CHECK-NEXT:  .LBB0_6: @ %while.body.preheader26
 ; CHECK-NEXT:    dls lr, lr
-; CHECK-NEXT:  .LBB0_8: @ %while.body
+; CHECK-NEXT:  .LBB0_7: @ %while.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldr.16 s0, [r3]
 ; CHECK-NEXT:    vldr.16 s2, [r3, #2]
@@ -100,9 +55,14 @@ define void @arm_cmplx_mag_squared_f16(half* nocapture readonly %pSrc, half* noc
 ; CHECK-NEXT:    vfma.f16 s0, s2, s2
 ; CHECK-NEXT:    vstr.16 s0, [r12]
 ; CHECK-NEXT:    add.w r12, r12, #2
-; CHECK-NEXT:    le lr, .LBB0_8
-; CHECK-NEXT:  .LBB0_9: @ %while.end
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT:    le lr, .LBB0_7
+; CHECK-NEXT:  .LBB0_8: @ %while.end
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:  .LBB0_9:
+; CHECK-NEXT:    mov r3, r0
+; CHECK-NEXT:    mov r12, r1
+; CHECK-NEXT:    mov lr, r2
+; CHECK-NEXT:    b .LBB0_6
 entry:
   %cmp.not11 = icmp eq i32 %numSamples, 0
   br i1 %cmp.not11, label %while.end, label %while.body.preheader
@@ -195,37 +155,28 @@ define void @arm_cmplx_mag_squared_f32(float* nocapture readonly %pSrc, float* n
 ; CHECK-NEXT:    cmphi r3, r0
 ; CHECK-NEXT:    bhi .LBB1_9
 ; CHECK-NEXT:  @ %bb.3: @ %vector.ph
-; CHECK-NEXT:    bic r5, r2, #3
-; CHECK-NEXT:    movs r4, #1
-; CHECK-NEXT:    subs r3, r5, #4
+; CHECK-NEXT:    bic r4, r2, #3
+; CHECK-NEXT:    movs r3, #1
+; CHECK-NEXT:    sub.w r12, r4, #4
 ; CHECK-NEXT:    and r7, r2, #3
-; CHECK-NEXT:    add.w r12, r1, r5, lsl #2
-; CHECK-NEXT:    add.w r3, r4, r3, lsr #2
-; CHECK-NEXT:    mov r4, r3
-; CHECK-NEXT:    add.w r3, r0, r5, lsl #3
+; CHECK-NEXT:    add.w r3, r3, r12, lsr #2
+; CHECK-NEXT:    add.w r12, r1, r4, lsl #2
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    add.w r3, r0, r4, lsl #3
 ; CHECK-NEXT:  .LBB1_4: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vldrw.u32 q0, [r0, #16]
-; CHECK-NEXT:    vldrw.u32 q1, [r0], #32
-; CHECK-NEXT:    mov lr, r4
-; CHECK-NEXT:    vmul.f32 q1, q1, q1
-; CHECK-NEXT:    vmul.f32 q0, q0, q0
-; CHECK-NEXT:    vmov.f64 d4, d2
+; CHECK-NEXT:    vld20.32 {q0, q1}, [r0]
+; CHECK-NEXT:    mov lr, r5
 ; CHECK-NEXT:    subs.w lr, lr, #1
-; CHECK-NEXT:    mov r4, lr
-; CHECK-NEXT:    vmov.f32 s12, s5
-; CHECK-NEXT:    vmov.f32 s9, s6
-; CHECK-NEXT:    vmov.f32 s13, s7
-; CHECK-NEXT:    vmov.f32 s10, s0
-; CHECK-NEXT:    vmov.f32 s14, s1
-; CHECK-NEXT:    vmov.f32 s11, s2
-; CHECK-NEXT:    vmov.f32 s15, s3
-; CHECK-NEXT:    vadd.f32 q0, q3, q2
-; CHECK-NEXT:    vstrb.8 q0, [r1], #16
+; CHECK-NEXT:    vld21.32 {q0, q1}, [r0]!
+; CHECK-NEXT:    mov r5, lr
+; CHECK-NEXT:    vmul.f32 q2, q0, q0
+; CHECK-NEXT:    vfma.f32 q2, q1, q1
+; CHECK-NEXT:    vstrb.8 q2, [r1], #16
 ; CHECK-NEXT:    bne .LBB1_4
 ; CHECK-NEXT:    b .LBB1_5
 ; CHECK-NEXT:  .LBB1_5: @ %middle.block
-; CHECK-NEXT:    cmp r5, r2
+; CHECK-NEXT:    cmp r4, r2
 ; CHECK-NEXT:    mov lr, r7
 ; CHECK-NEXT:    it eq
 ; CHECK-NEXT:    popeq {r4, r5, r7, pc}
diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll
index a1e1b4dbe748c..47114092bfb1a 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll
@@ -7,12 +7,15 @@ target triple = "aarch64--linux-gnu"
 define <4 x float> @vld2(<8 x float>* %pSrc) {
 ; CHECK-LABEL: @vld2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x float>, <8 x float>* [[PSRC:%.*]], align 4
-; CHECK-NEXT:    [[L2:%.*]] = fmul fast <8 x float> [[WIDE_VEC]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[L3:%.*]] = shufflevector <8 x float> [[L2]], <8 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[L4:%.*]] = fmul fast <8 x float> [[WIDE_VEC]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[L5:%.*]] = shufflevector <8 x float> [[L4]], <8 x float> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[L6:%.*]] = fadd fast <4 x float> [[L5]], [[L3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x float>* [[PSRC:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[LDN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
+; CHECK-NEXT:    [[L26:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[L43:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[L6:%.*]] = fadd fast <4 x float> [[L43]], [[L26]]
 ; CHECK-NEXT:    ret <4 x float> [[L6]]
 ;
 entry:
@@ -28,15 +31,19 @@ entry:
 define <4 x float> @vld3(<12 x float>* %pSrc) {
 ; CHECK-LABEL: @vld3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <12 x float>, <12 x float>* [[PSRC:%.*]], align 4
-; CHECK-NEXT:    [[L2:%.*]] = fmul fast <12 x float> [[WIDE_VEC]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[L3:%.*]] = shufflevector <12 x float> [[L2]], <12 x float> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT:    [[L4:%.*]] = fmul fast <12 x float> [[WIDE_VEC]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[L5:%.*]] = shufflevector <12 x float> [[L4]], <12 x float> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-; CHECK-NEXT:    [[L6:%.*]] = fadd fast <4 x float> [[L5]], [[L3]]
-; CHECK-NEXT:    [[L7:%.*]] = fmul fast <12 x float> [[WIDE_VEC]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[L8:%.*]] = shufflevector <12 x float> [[L7]], <12 x float> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; CHECK-NEXT:    [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L8]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <12 x float>* [[PSRC:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[LDN:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0v4f32(<4 x float>* [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
+; CHECK-NEXT:    [[L29:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[L46:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[L6:%.*]] = fadd fast <4 x float> [[L46]], [[L29]]
+; CHECK-NEXT:    [[L73:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L73]]
 ; CHECK-NEXT:    ret <4 x float> [[L9]]
 ;
 entry:
@@ -55,17 +62,22 @@ entry:
 define <4 x float> @vld4(<16 x float>* %pSrc) {
 ; CHECK-LABEL: @vld4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x float>, <16 x float>* [[PSRC:%.*]], align 4
-; CHECK-NEXT:    [[L3:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[L4:%.*]] = shufflevector <16 x float> [[L3]], <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-NEXT:    [[L5:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[L6:%.*]] = shufflevector <16 x float> [[L5]], <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-NEXT:    [[L7:%.*]] = fadd fast <4 x float> [[L6]], [[L4]]
-; CHECK-NEXT:    [[L8:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[L9:%.*]] = shufflevector <16 x float> [[L8]], <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-NEXT:    [[L10:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[L11:%.*]] = shufflevector <16 x float> [[L10]], <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
-; CHECK-NEXT:    [[L12:%.*]] = fadd fast <4 x float> [[L11]], [[L9]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x float>* [[PSRC:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[LDN:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0v4f32(<4 x float>* [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
+; CHECK-NEXT:    [[L312:%.*]] = fmul <4 x float> [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    [[L59:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[L7:%.*]] = fadd fast <4 x float> [[L59]], [[L312]]
+; CHECK-NEXT:    [[L86:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[L103:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[L12:%.*]] = fadd fast <4 x float> [[L103]], [[L86]]
 ; CHECK-NEXT:    ret <4 x float> [[L12]]
 ;
 entry:
@@ -86,13 +98,17 @@ entry:
 define <4 x float> @twosrc(<8 x float>* %pSrc1, <8 x float>* %pSrc2) {
 ; CHECK-LABEL: @twosrc(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x float>, <8 x float>* [[PSRC1:%.*]], align 4
-; CHECK-NEXT:    [[WIDE_VEC26:%.*]] = load <8 x float>, <8 x float>* [[PSRC2:%.*]], align 4
-; CHECK-NEXT:    [[L4:%.*]] = fmul fast <8 x float> [[WIDE_VEC26]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[L5:%.*]] = shufflevector <8 x float> [[L4]], <8 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[L6:%.*]] = fmul fast <8 x float> [[WIDE_VEC26]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[L7:%.*]] = shufflevector <8 x float> [[L6]], <8 x float> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[L8:%.*]] = fadd fast <4 x float> [[L7]], [[L5]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x float>* [[PSRC1:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[LDN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x float>* [[PSRC2:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[LDN7:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 1
+; CHECK-NEXT:    [[L46:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    [[L63:%.*]] = fmul <4 x float> [[TMP5]], [[TMP1]]
+; CHECK-NEXT:    [[L8:%.*]] = fadd fast <4 x float> [[L63]], [[L46]]
 ; CHECK-NEXT:    ret <4 x float> [[L8]]
 ;
 entry:
@@ -109,14 +125,17 @@ entry:
 define <4 x float> @twosrc2(<8 x float>* %pSrc1, <8 x float>* %pSrc2) {
 ; CHECK-LABEL: @twosrc2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x float>, <8 x float>* [[PSRC1:%.*]], align 4
-; CHECK-NEXT:    [[WIDE_VEC26:%.*]] = load <8 x float>, <8 x float>* [[PSRC2:%.*]], align 4
-; CHECK-NEXT:    [[L4:%.*]] = fmul fast <8 x float> [[WIDE_VEC26]], [[WIDE_VEC]]
-; CHECK-NEXT:    [[L5:%.*]] = shufflevector <8 x float> [[L4]], <8 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x float> [[WIDE_VEC26]], <8 x float> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[L6:%.*]] = fmul fast <4 x float> [[S1]], [[S2]]
-; CHECK-NEXT:    [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L5]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x float>* [[PSRC1:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[LDN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x float>* [[PSRC2:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[LDN4:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 1
+; CHECK-NEXT:    [[L43:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    [[L6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]]
+; CHECK-NEXT:    [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L43]]
 ; CHECK-NEXT:    ret <4 x float> [[L8]]
 ;
 entry:

From fcf62879163ab28b4bd72cbc5715b49133f38427 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Wed, 28 Oct 2020 13:34:28 +0300
Subject: [PATCH 05/16] [yaml2obj] - Improve handling of
 SectionHeaderTable::NoHeaders flag.

When `NoHeaders` is set, we still have following issues:
1) We emit the `.shstrtab` implicit section of size 1 (empty string table).
2) We still align the start of the section header table, what affects the output size.
3) We still write section header table bytes.

This patch fixes all of these issues.

Differential revision: https://reviews.llvm.org/D90295
---
 llvm/lib/ObjectYAML/ELFEmitter.cpp            | 44 +++++++++++--------
 .../ELF/malformed-pt-dynamic.test             | 16 +++----
 .../tools/yaml2obj/ELF/section-headers.yaml   | 34 ++++++++++++++
 3 files changed, 67 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index cbd76c7aad1b3..a1acd2a69da85 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -232,7 +232,7 @@ template <class ELFT> class ELFState {
                    ArrayRef<typename ELFT::Shdr> SHeaders);
 
   void finalizeStrings();
-  void writeELFHeader(raw_ostream &OS, uint64_t SHOff);
+  void writeELFHeader(raw_ostream &OS, Optional<uint64_t> SHOff);
   void writeSectionContent(Elf_Shdr &SHeader,
                            const ELFYAML::NoBitsSection &Section,
                            ContiguousBlobAccumulator &CBA);
@@ -363,7 +363,9 @@ ELFState<ELFT>::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH)
       std::string SecName = ("." + DebugSecName).str();
       ImplicitSections.push_back(StringRef(SecName).copy(StringAlloc));
     }
-  ImplicitSections.insert(ImplicitSections.end(), {".strtab", ".shstrtab"});
+  ImplicitSections.insert(ImplicitSections.end(), {".strtab"});
+  if (!Doc.SectionHeaders || !Doc.SectionHeaders->NoHeaders.getValueOr(false))
+    ImplicitSections.insert(ImplicitSections.end(), {".shstrtab"});
 
   // Insert placeholders for implicit sections that are not
   // defined explicitly in YAML.
@@ -379,7 +381,7 @@ ELFState<ELFT>::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH)
 }
 
 template <class ELFT>
-void ELFState<ELFT>::writeELFHeader(raw_ostream &OS, uint64_t SHOff) {
+void ELFState<ELFT>::writeELFHeader(raw_ostream &OS, Optional<uint64_t> SHOff) {
   using namespace llvm::ELF;
 
   Elf_Ehdr Header;
@@ -429,22 +431,19 @@ void ELFState<ELFT>::writeELFHeader(raw_ostream &OS, uint64_t SHOff) {
   Header.e_shentsize = Doc.Header.EShEntSize ? (uint16_t)*Doc.Header.EShEntSize
                                              : sizeof(Elf_Shdr);
 
-  const bool NoShdrs =
-      Doc.SectionHeaders && Doc.SectionHeaders->NoHeaders.getValueOr(false);
-
   if (Doc.Header.EShOff)
     Header.e_shoff = *Doc.Header.EShOff;
-  else if (NoShdrs)
-    Header.e_shoff = 0;
+  else if (SHOff)
+    Header.e_shoff = *SHOff;
   else
-    Header.e_shoff = SHOff;
+    Header.e_shoff = 0;
 
   if (Doc.Header.EShNum)
     Header.e_shnum = *Doc.Header.EShNum;
   else if (!Doc.SectionHeaders ||
            (Doc.SectionHeaders->NoHeaders && !*Doc.SectionHeaders->NoHeaders))
     Header.e_shnum = Doc.getSections().size();
-  else if (NoShdrs)
+  else if (!SHOff)
     Header.e_shnum = 0;
   else
     Header.e_shnum =
@@ -454,10 +453,10 @@ void ELFState<ELFT>::writeELFHeader(raw_ostream &OS, uint64_t SHOff) {
 
   if (Doc.Header.EShStrNdx)
     Header.e_shstrndx = *Doc.Header.EShStrNdx;
-  else if (NoShdrs || ExcludedSectionHeaders.count(".shstrtab"))
-    Header.e_shstrndx = 0;
-  else
+  else if (SHOff && !ExcludedSectionHeaders.count(".shstrtab"))
     Header.e_shstrndx = SN2I.get(".shstrtab");
+  else
+    Header.e_shstrndx = 0;
 
   OS.write((const char *)&Header, sizeof(Header));
 }
@@ -1884,11 +1883,17 @@ bool ELFState<ELFT>::writeELF(raw_ostream &OS, ELFYAML::Object &Doc,
   // Now we can decide segment offsets.
   State.setProgramHeaderLayout(PHeaders, SHeaders);
 
-  // Align the start of the section header table, which is written after all
-  // section data.
-  uint64_t SHOff =
-      State.alignToOffset(CBA, sizeof(typename ELFT::uint), /*Offset=*/None);
-  bool ReachedLimit = SHOff + arrayDataSize(makeArrayRef(SHeaders)) > MaxSize;
+  // If needed, align the start of the section header table, which is written
+  // after all section data.
+  const bool HasSectionHeaders =
+      !Doc.SectionHeaders || !Doc.SectionHeaders->NoHeaders.getValueOr(false);
+  Optional<uint64_t> SHOff;
+  if (HasSectionHeaders)
+    SHOff = State.alignToOffset(CBA, sizeof(typename ELFT::uint),
+                                /*Offset=*/None);
+  bool ReachedLimit = SHOff.getValueOr(CBA.getOffset()) +
+                          arrayDataSize(makeArrayRef(SHeaders)) >
+                      MaxSize;
   if (Error E = CBA.takeLimitError()) {
     // We report a custom error message instead below.
     consumeError(std::move(E));
@@ -1906,7 +1911,8 @@ bool ELFState<ELFT>::writeELF(raw_ostream &OS, ELFYAML::Object &Doc,
   State.writeELFHeader(OS, SHOff);
   writeArrayData(OS, makeArrayRef(PHeaders));
   CBA.writeBlobToStream(OS);
-  writeArrayData(OS, makeArrayRef(SHeaders));
+  if (HasSectionHeaders)
+    writeArrayData(OS, makeArrayRef(SHeaders));
   return true;
 }
 
diff --git a/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test b/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test
index 4c22b318432c2..68da95b29ce03 100644
--- a/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test
+++ b/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test
@@ -22,13 +22,13 @@
 # WARN1-GNU-NEXT:   0x0000000000000000 (NULL) 0x0
 
 ## Case A.2: in this case we drop section headers. The dynamic table is not dumped.
-# RUN: yaml2obj %s -DFILESIZE=0x119 -DNOHEADERS=true -o %t1.noheaders
+# RUN: yaml2obj %s -DFILESIZE=0x12 -DNOHEADERS=true -o %t1.noheaders
 # RUN: llvm-readobj %t1.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t1.noheaders %s \
 # RUN:   --check-prefix=WARN1-NOHEADERS --implicit-check-not="DynamicSection ["
 # RUN: llvm-readelf %t1.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t1.noheaders %s \
 # RUN:   --check-prefix=WARN1-NOHEADERS --implicit-check-not="Dynamic section"
 
-# WARN1-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0x119) exceeds the size of the file (0x1118)
+# WARN1-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0x12) exceeds the size of the file (0x1011)
 
 ## Case B: Test case where the offset of the PT_DYNAMIC header is too large to be in the file.
 
@@ -45,13 +45,13 @@
 # WARN2: warning: '[[FILE]]': no valid dynamic table was found
 
 ## Case B.2: in this case we drop section headers. The dynamic table is not dumped.
-# RUN: yaml2obj %s -DOFFSET=0x1119 -DNOHEADERS=true -o %t2.noheaders
+# RUN: yaml2obj %s -DOFFSET=0x1112 -DNOHEADERS=true -o %t2.noheaders
 # RUN: llvm-readobj %t2.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t2.noheaders %s \
 # RUN:   --check-prefix=WARN2-NOHEADERS --implicit-check-not="DynamicSection ["
 # RUN: llvm-readelf %t2.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t2.noheaders %s \
 # RUN:   --check-prefix=WARN2-NOHEADERS --implicit-check-not="Dynamic section"
 
-# WARN2-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1119) + file size (0x10) exceeds the size of the file (0x1118)
+# WARN2-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1112) + file size (0x10) exceeds the size of the file (0x1011)
 
 ## Case C: test we report a warning when the offset + the file size of the PT_DYNAMIC is so large a
 ##         value that it overflows the platform address size type. Check we also report a warning about
@@ -73,7 +73,7 @@
 # RUN: llvm-readelf %t3.noheaders --dynamic-table 2>&1 | \
 # RUN:   FileCheck -DFILE=%t3.noheaders %s --check-prefix=WARN3-NOHEADERS
 
-# WARN3-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0xffffffffffffffff) + file size (0x10) exceeds the size of the file (0x1118)
+# WARN3-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0xffffffffffffffff) + file size (0x10) exceeds the size of the file (0x1011)
 
 # RUN: yaml2obj %s -DFILESIZE=0xffffffffffffffff -o %t4
 # RUN: llvm-readobj %t4 --dynamic-table 2>&1 | FileCheck -DFILE=%t4 %s --check-prefix=WARN4
@@ -87,7 +87,7 @@
 # RUN: llvm-readelf %t4.noheaders --dynamic-table 2>&1 | \
 # RUN:   FileCheck -DFILE=%t4.noheaders %s --check-prefix=WARN4-NOHEADERS
 
-# WARN4-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0xffffffffffffffff) exceeds the size of the file (0x1118)
+# WARN4-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0xffffffffffffffff) exceeds the size of the file (0x1011)
 
 ## Case D: the same as "Case C", but for a 32-bit object.
 
@@ -107,7 +107,7 @@
 # RUN: llvm-readelf %t5.noheaders --dynamic-table 2>&1 | \
 # RUN:   FileCheck -DFILE=%t5.noheaders %s --check-prefix=WARN5-NOHEADERS
 
-# WARN5-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0xffffffff) + file size (0x8) exceeds the size of the file (0x10ac)
+# WARN5-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0xffffffff) + file size (0x8) exceeds the size of the file (0x1009)
 
 # RUN: yaml2obj %s -DBITS=32 -DFILESIZE=0xffffffff -o %t6
 # RUN: llvm-readobj %t6 --dynamic-table 2>&1 | FileCheck -DFILE=%t6 %s --check-prefix=WARN6
@@ -121,7 +121,7 @@
 # RUN: llvm-readelf %t6.noheaders --dynamic-table 2>&1 | \
 # RUN:   FileCheck -DFILE=%t6.noheaders %s --check-prefix=WARN6-NOHEADERS
 
-# WARN6-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0xffffffff) exceeds the size of the file (0x10ac)
+# WARN6-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0xffffffff) exceeds the size of the file (0x1009)
 
 --- !ELF
 FileHeader:
diff --git a/llvm/test/tools/yaml2obj/ELF/section-headers.yaml b/llvm/test/tools/yaml2obj/ELF/section-headers.yaml
index 87d6ebe57c305..c90ffe3812287 100644
--- a/llvm/test/tools/yaml2obj/ELF/section-headers.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/section-headers.yaml
@@ -191,6 +191,10 @@ FileHeader:
 Sections:
   - Name: .foo
     Type: SHT_PROGBITS
+## FIXME: we have to set an arbitrary size to create a
+## piece of dummy data to make llvm-readelf happy.
+## See: https://bugs.llvm.org/show_bug.cgi?id=40804
+    Size: 0x100
 SectionHeaderTable:
   NoHeaders: true
 
@@ -250,3 +254,33 @@ Symbols:
     Section: .foo
   - Name:    bar
     Section: .bar
+
+## Check that when "NoHeaders" is set to "true" then we don't emit
+## the .shstrtab section implicitly and don't write the data of the
+## section header table to the file.
+
+# RUN: yaml2obj %s --docnum=8 -o %t8
+# RUN: wc -c < %t8 | FileCheck %s --check-prefix=SIZE
+
+# SIZE: 511{{$}}
+
+--- !ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data:  ELFDATA2LSB
+  Type:  ET_REL
+Sections:
+## We don't want any implicit sections to be added after the .foo section,
+## so add them here explicitly.
+  - Name: .strtab
+    Type: SHT_STRTAB
+## Nothing should be emitted after the following section.
+## So we know that the expected file size is 0x100 + 0xFF == 0x1FF == 511.
+  - Name:   .foo
+    Type:   SHT_PROGBITS
+## Unaligned size. Used to make sure that we don't try to align the file offset
+## for writing the section header table.
+    Size:   0xFF
+    Offset: 0x100
+SectionHeaderTable:
+  NoHeaders: true

From ef129f01e9053871fdf97ad48dd26857d3af925d Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Thu, 29 Oct 2020 16:29:45 +0700
Subject: [PATCH 06/16] [SCEV][NFC] Use general predicate checkers in
 monotonicity check

This makes the code more compact and readable.
---
 llvm/lib/Analysis/ScalarEvolution.cpp | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 8bc2595d52612..ec39180cec395 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9270,25 +9270,20 @@ ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS,
   // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be
   // as general as possible.
 
-  switch (Pred) {
-  default:
-    return None; // Conservative answer
+  // Only handle LE/LT/GE/GT predicates.
+  if (!ICmpInst::isRelational(Pred))
+    return None;
 
-  case ICmpInst::ICMP_UGT:
-  case ICmpInst::ICMP_UGE:
-  case ICmpInst::ICMP_ULT:
-  case ICmpInst::ICMP_ULE:
+  // Check that AR does not wrap.
+  if (ICmpInst::isUnsigned(Pred)) {
     if (!LHS->hasNoUnsignedWrap())
       return None;
-
     return Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE
                ? MonotonicallyIncreasing
                : MonotonicallyDecreasing;
-
-  case ICmpInst::ICMP_SGT:
-  case ICmpInst::ICMP_SGE:
-  case ICmpInst::ICMP_SLT:
-  case ICmpInst::ICMP_SLE: {
+  } else {
+    assert(ICmpInst::isSigned(Pred) &&
+           "Relational predicate is either signed or unsigned!");
     if (!LHS->hasNoSignedWrap())
       return None;
 
@@ -9308,10 +9303,6 @@ ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS,
 
     return None;
   }
-
-  }
-
-  llvm_unreachable("switch has default clause!");
 }
 
 bool ScalarEvolution::isLoopInvariantPredicate(

From 075f661d01f856192c236b6256f18bc697e28e1b Mon Sep 17 00:00:00 2001
From: David Zarzycki <dave@znu.io>
Date: Thu, 29 Oct 2020 05:53:47 -0400
Subject: [PATCH 07/16] [lldb] Unbreak the build after a recent PowerPC change

40dd4d5233d9f81705a24d91b48d2620e487b89d introduced two new types.
---
 lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index 3f6da8ef49d6c..6a5c5cb69ac6c 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -4819,6 +4819,12 @@ lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type,
     case clang::BuiltinType::OCLIntelSubgroupAVCImeDualRefStreamin:
       break;
 
+    // PowerPC -- Matrix Multiply Assist
+    case clang::BuiltinType::VectorPair:
+    case clang::BuiltinType::VectorQuad:
+      break;
+
+    // ARM -- Scalable Vector Extension
     case clang::BuiltinType::SveBool:
     case clang::BuiltinType::SveInt8:
     case clang::BuiltinType::SveInt8x2:

From 749f13e7635aebcda978ee2e42f5569ab7f0fafc Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Tue, 27 Oct 2020 11:55:27 +0000
Subject: [PATCH 08/16] [lldb] Correct --help output for qemu rootfs script

It was printing "Usage:" twice.

Reviewed By: omjavaid

Differential Revision: https://reviews.llvm.org/D90225
---
 lldb/scripts/lldb-test-qemu/rootfs.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lldb/scripts/lldb-test-qemu/rootfs.sh b/lldb/scripts/lldb-test-qemu/rootfs.sh
index 0491f4be0bc2b..33ff278c1702a 100644
--- a/lldb/scripts/lldb-test-qemu/rootfs.sh
+++ b/lldb/scripts/lldb-test-qemu/rootfs.sh
@@ -3,7 +3,6 @@
 set -e
 
 print_usage() {
-  echo "Usage:"
   echo "Usage: $(basename $0) [options]"
   echo -e "Creates a Ubuntu root file system image.\n"
   echo -e "  --help\t\t\tDisplay this information."

From 9c82944b2dc5dcb5e9100a76647fcc1aaa6333b9 Mon Sep 17 00:00:00 2001
From: "Kazushi (Jam) Marukawa" <marukawa@nec.com>
Date: Wed, 28 Oct 2020 20:11:40 +0900
Subject: [PATCH 09/16] [VE] Add vector control instructions

Add LVL/SVL/SMVL/LVIX isntructions.  Add regression tests too.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D90355
---
 llvm/lib/Target/VE/VEInstrVec.td | 24 ++++++++++++++++++++++++
 llvm/test/MC/VE/LVIX.s           | 16 ++++++++++++++++
 llvm/test/MC/VE/LVL.s            | 16 ++++++++++++++++
 llvm/test/MC/VE/SMVL.s           | 16 ++++++++++++++++
 llvm/test/MC/VE/SVL.s            | 16 ++++++++++++++++
 5 files changed, 88 insertions(+)
 create mode 100644 llvm/test/MC/VE/LVIX.s
 create mode 100644 llvm/test/MC/VE/LVL.s
 create mode 100644 llvm/test/MC/VE/SMVL.s
 create mode 100644 llvm/test/MC/VE/SVL.s

diff --git a/llvm/lib/Target/VE/VEInstrVec.td b/llvm/lib/Target/VE/VEInstrVec.td
index 77de72dc5cd80..ffff1826b5944 100644
--- a/llvm/lib/Target/VE/VEInstrVec.td
+++ b/llvm/lib/Target/VE/VEInstrVec.td
@@ -1427,3 +1427,27 @@ defm LZVM : RVMSm<"lzvm", 0xa5, VM>;
 
 // Section 8.17.12 - TOVM (Trailing One of VM)
 defm TOVM : RVMSm<"tovm", 0xa6, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.18 - Vector Control Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.18.1 - LVL (Load VL)
+let sx = 0, cz = 0, sz = 0, hasSideEffects = 0, Defs = [VL] in {
+  def LVLr : RR<0xbf, (outs), (ins I64:$sy), "lvl $sy">;
+  let cy = 0 in def LVLi : RR<0xbf, (outs), (ins simm7:$sy), "lvl $sy">;
+}
+
+// Section 8.18.2 - SVL (Save VL)
+let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0, Uses = [VL] in
+def SVL : RR<0x2f, (outs I64:$sx), (ins), "svl $sx">;
+
+// Section 8.18.3 - SMVL (Save Maximum Vector Length)
+let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in
+def SMVL : RR<0x2e, (outs I64:$sx), (ins), "smvl $sx">;
+
+// Section 8.18.4 - LVIX (Load Vector Data Index)
+let sx = 0, cz = 0, sz = 0, hasSideEffects = 0, Defs = [VIX] in {
+  def LVIXr : RR<0xaf, (outs), (ins I64:$sy), "lvix $sy">;
+  let cy = 0 in def LVIXi : RR<0xaf, (outs), (ins uimm6:$sy), "lvix $sy">;
+}
diff --git a/llvm/test/MC/VE/LVIX.s b/llvm/test/MC/VE/LVIX.s
new file mode 100644
index 0000000000000..61594cf69c935
--- /dev/null
+++ b/llvm/test/MC/VE/LVIX.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -triple=ve --show-encoding < %s \
+# RUN:     | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+# RUN: llvm-mc -triple=ve -filetype=obj < %s | llvm-objdump -d - \
+# RUN:     | FileCheck %s --check-prefixes=CHECK-INST
+
+# CHECK-INST: lvix %s11
+# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x8b,0x00,0xaf]
+lvix %s11
+
+# CHECK-INST: lvix 63
+# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x3f,0x00,0xaf]
+lvix 63
+
+# CHECK-INST: lvix %s63
+# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0xbf,0x00,0xaf]
+lvix %s63
diff --git a/llvm/test/MC/VE/LVL.s b/llvm/test/MC/VE/LVL.s
new file mode 100644
index 0000000000000..50eec02d5ad32
--- /dev/null
+++ b/llvm/test/MC/VE/LVL.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -triple=ve --show-encoding < %s \
+# RUN:     | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+# RUN: llvm-mc -triple=ve -filetype=obj < %s | llvm-objdump -d - \
+# RUN:     | FileCheck %s --check-prefixes=CHECK-INST
+
+# CHECK-INST: lvl %s11
+# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x8b,0x00,0xbf]
+lvl %s11
+
+# CHECK-INST: lvl 63
+# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x3f,0x00,0xbf]
+lvl 63
+
+# CHECK-INST: lvl -64
+# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x40,0x00,0xbf]
+lvl -64
diff --git a/llvm/test/MC/VE/SMVL.s b/llvm/test/MC/VE/SMVL.s
new file mode 100644
index 0000000000000..c1fb3628db818
--- /dev/null
+++ b/llvm/test/MC/VE/SMVL.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -triple=ve --show-encoding < %s \
+# RUN:     | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+# RUN: llvm-mc -triple=ve -filetype=obj < %s | llvm-objdump -d - \
+# RUN:     | FileCheck %s --check-prefixes=CHECK-INST
+
+# CHECK-INST: smvl %s11
+# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x0b,0x2e]
+smvl %s11
+
+# CHECK-INST: smvl %s0
+# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2e]
+smvl %s0
+
+# CHECK-INST: smvl %s63
+# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x3f,0x2e]
+smvl %s63
diff --git a/llvm/test/MC/VE/SVL.s b/llvm/test/MC/VE/SVL.s
new file mode 100644
index 0000000000000..a825dcb1dec20
--- /dev/null
+++ b/llvm/test/MC/VE/SVL.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -triple=ve --show-encoding < %s \
+# RUN:     | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+# RUN: llvm-mc -triple=ve -filetype=obj < %s | llvm-objdump -d - \
+# RUN:     | FileCheck %s --check-prefixes=CHECK-INST
+
+# CHECK-INST: svl %s11
+# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x0b,0x2f]
+svl %s11
+
+# CHECK-INST: svl %s0
+# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2f]
+svl %s0
+
+# CHECK-INST: svl %s63
+# CHECK-ENCODING: encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x3f,0x2f]
+svl %s63

From b3761765d00bb4dafb5ecdf17502f7143432371b Mon Sep 17 00:00:00 2001
From: "Kazushi (Jam) Marukawa" <marukawa@nec.com>
Date: Wed, 28 Oct 2020 20:13:45 +0900
Subject: [PATCH 10/16] [VE] Add missing vector regression test

I forgot to add a regression test for VMAXX instruction when I added
it.  So, I'm adding it now.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D90356
---
 llvm/test/MC/VE/VMAXX.s | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 llvm/test/MC/VE/VMAXX.s

diff --git a/llvm/test/MC/VE/VMAXX.s b/llvm/test/MC/VE/VMAXX.s
new file mode 100644
index 0000000000000..1c16066016b2b
--- /dev/null
+++ b/llvm/test/MC/VE/VMAXX.s
@@ -0,0 +1,28 @@
+# RUN: llvm-mc -triple=ve --show-encoding < %s \
+# RUN:     | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+# RUN: llvm-mc -triple=ve -filetype=obj < %s | llvm-objdump -d - \
+# RUN:     | FileCheck %s --check-prefixes=CHECK-INST
+
+# CHECK-INST: vrmaxs.l.fst %v11, %v12
+# CHECK-ENCODING: encoding: [0x00,0x00,0x0c,0x0b,0x00,0x00,0x00,0xab]
+vrmaxs.l.fst %v11, %v12
+
+# CHECK-INST: vrmaxs.l.fst %v11, %vix, %vm11
+# CHECK-ENCODING: encoding: [0x00,0x00,0xff,0x0b,0x00,0x00,0x0b,0xab]
+vrmaxs.l.fst %v11, %vix, %vm11
+
+# CHECK-INST: vrmaxs.l.lst %vix, %v22, %vm15
+# CHECK-ENCODING: encoding: [0x00,0x00,0x16,0xff,0x00,0x00,0x2f,0xab]
+vrmaxs.l.lst %vix, %v22, %vm15
+
+# CHECK-INST: vrmins.l.lst %v63, %v60, %vm2
+# CHECK-ENCODING: encoding: [0x00,0x00,0x3c,0x3f,0x00,0x00,0x32,0xab]
+vrmins.l.lst %v63, %v60, %vm2
+
+# CHECK-INST: vrmins.l.fst %vix, %vix
+# CHECK-ENCODING: encoding: [0x00,0x00,0xff,0xff,0x00,0x00,0x10,0xab]
+vrmins.l.fst %vix, %vix, %vm0
+
+# CHECK-INST: vrmins.l.lst %vix, %vix, %vm2
+# CHECK-ENCODING: encoding: [0x00,0x00,0xff,0xff,0x00,0x00,0x32,0xab]
+vrmins.l.lst %vix, %vix, %vm2

From df00267f1fdb0b098dc42f1caa8a59b29c8e0e5f Mon Sep 17 00:00:00 2001
From: Krasimir Georgiev <krasimir@google.com>
Date: Thu, 29 Oct 2020 11:27:54 +0100
Subject: [PATCH 11/16] clang-format: Add a consumer to diagnostics engine

Contributed by dmikis (Kirill Dmitrenko)!

Otherwise problems like trying to format readonly file in-place led to crashes.

I've added reviewers by looking at `git blame` and other reviews to the changed file, so may have missed someone.

Reviewed By: krasimir

Differential Revision: https://reviews.llvm.org/D90121
---
 clang/tools/clang-format/CMakeLists.txt  | 1 +
 clang/tools/clang-format/ClangFormat.cpp | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/clang/tools/clang-format/CMakeLists.txt b/clang/tools/clang-format/CMakeLists.txt
index 35ecdb11253ce..28ac4fb5913e7 100644
--- a/clang/tools/clang-format/CMakeLists.txt
+++ b/clang/tools/clang-format/CMakeLists.txt
@@ -7,6 +7,7 @@ add_clang_tool(clang-format
 set(CLANG_FORMAT_LIB_DEPS
   clangBasic
   clangFormat
+  clangFrontend
   clangRewrite
   clangToolingCore
   )
diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp
index 3a7247deab46d..d7b768329bcc6 100644
--- a/clang/tools/clang-format/ClangFormat.cpp
+++ b/clang/tools/clang-format/ClangFormat.cpp
@@ -18,6 +18,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Version.h"
 #include "clang/Format/Format.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Rewrite/Core/Rewriter.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
@@ -423,9 +424,11 @@ static bool format(StringRef FileName) {
     IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
         new llvm::vfs::InMemoryFileSystem);
     FileManager Files(FileSystemOptions(), InMemoryFileSystem);
+    IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions());
+    TextDiagnosticPrinter DiagnosticsConsumer(errs(), &*DiagOpts);
     DiagnosticsEngine Diagnostics(
-        IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
-        new DiagnosticOptions);
+        IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), &*DiagOpts,
+        &DiagnosticsConsumer, false);
     SourceManager Sources(Diagnostics, Files);
     FileID ID = createInMemoryFile(AssumedFileName, Code.get(), Sources, Files,
                                    InMemoryFileSystem.get());

From 840737fc82277f5303708ff062cdedd084248631 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Wed, 28 Oct 2020 15:05:35 +0300
Subject: [PATCH 12/16] [yaml2obj][test] - Merge dynsymtab-shlink.yaml to
 dynsym-section.yaml

This simplifies the dynsymtab-shlink.yaml test (with use of macros)
and merges it into the dynsym-section.yaml test.

Differential revision: https://reviews.llvm.org/D90301
---
 .../tools/yaml2obj/ELF/dynsym-section.yaml    | 47 ++++++++++
 .../tools/yaml2obj/ELF/dynsymtab-shlink.yaml  | 93 -------------------
 2 files changed, 47 insertions(+), 93 deletions(-)
 delete mode 100644 llvm/test/tools/yaml2obj/ELF/dynsymtab-shlink.yaml

diff --git a/llvm/test/tools/yaml2obj/ELF/dynsym-section.yaml b/llvm/test/tools/yaml2obj/ELF/dynsym-section.yaml
index 1c6f3efe3cfdd..5c461132fc9d0 100644
--- a/llvm/test/tools/yaml2obj/ELF/dynsym-section.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/dynsym-section.yaml
@@ -31,3 +31,50 @@ Sections:
 # OFFSET-NEXT:   [Nr] Name    Type   Address          Off    Size   ES Flg Lk Inf Al
 # OFFSET-NEXT:   [ 0]         NULL   0000000000000000 000000 000000 00     0   0  0
 # OFFSET-NEXT:   [ 1] .dynsym DYNSYM 0000000000000000 000100 000018 18   A 0   1  0
+
+## Check we are able to set Link = 0 for the .dynsym section explicitly.
+
+# RUN: yaml2obj %s --docnum=2 -DLINK="Link: 0" -o %t2
+# RUN: llvm-readelf --section-headers %t2 | FileCheck %s --check-prefix=LINK-NULL
+
+# LINK-NULL: [Nr] Name    {{.*}} Flg Lk Inf
+# LINK-NULL: [ 1] .dynsym {{.*}}   A 0   1
+
+--- !ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data:  ELFDATA2LSB
+  Type:  ET_DYN
+Sections:
+  - Name: .dynsym
+    Type: SHT_DYNSYM
+    [[LINK]]
+  - Name: .dynstr
+    Type: SHT_STRTAB
+  - Name: .foo
+    Type: SHT_PROGBITS
+
+## Check that by default the .dynsym section will be linked to the .dynstr section,
+## when the latter one exists.
+
+# RUN: yaml2obj %s --docnum=2 -DLINK="" -o %t3
+# RUN: llvm-readelf --section-headers %t3 | FileCheck %s --check-prefix=LINK-DEFAULT
+
+# LINK-DEFAULT: [Nr] Name    {{.*}} Flg Lk Inf
+# LINK-DEFAULT: [ 1] .dynsym {{.*}}   A 2   1
+# LINK-DEFAULT: [ 2] .dynstr {{.*}}   A 0   0
+
+## Even when the .dynstr section exists, we can explicitly link the .dynsym section
+## to another section.
+
+# RUN: yaml2obj %s --docnum=2 -DLINK="Link: 3" -o %t4
+# RUN: llvm-readelf --section-headers %t4 | FileCheck %s --check-prefix=LINK-FOO
+
+# LINK-FOO: [Nr] Name     {{.*}} Flg Lk Inf
+# LINK-FOO: [ 1] .dynsym  {{.*}}   A 3   1
+# LINK-FOO: [ 3] .foo
+
+## Check we can use a section name as a Link value for .dynsym.
+
+# RUN: yaml2obj %s --docnum=2 -DLINK="Link: .foo" -o %t5
+# RUN: llvm-readelf --section-headers %t5 | FileCheck %s --check-prefix=LINK-FOO
diff --git a/llvm/test/tools/yaml2obj/ELF/dynsymtab-shlink.yaml b/llvm/test/tools/yaml2obj/ELF/dynsymtab-shlink.yaml
deleted file mode 100644
index b69413e7136bf..0000000000000
--- a/llvm/test/tools/yaml2obj/ELF/dynsymtab-shlink.yaml
+++ /dev/null
@@ -1,93 +0,0 @@
-## For implicit dynamic symbol table sections, `Link` field can also
-## be specified in YAML. Here we test the behavior in different cases.
-
-## Check we are able to set Link = 0 for .dynsym explicitly.
-
-# RUN: yaml2obj --docnum=1 %s -o %t1
-# RUN: llvm-readobj %t1 -S | FileCheck %s --check-prefix=CASE1
-
-# CASE1: Name: .dynsym
-# CASE1: Link:
-# CASE1-SAME: 0
-
---- !ELF
-FileHeader:
-  Class: ELFCLASS64
-  Data:  ELFDATA2LSB
-  Type:  ET_DYN
-Sections:
-  - Name: .dynsym
-    Type: SHT_DYNSYM
-    Link: 0
-
-## Check that by default .dynsym will be linked to .dynstr
-## if the latter exists.
-
-# RUN: yaml2obj --docnum=2 %s -o %t2
-# RUN: llvm-readobj %t2 -S | FileCheck %s --check-prefix=CASE2
-
-# CASE2:      .dynsym
-# CASE2:      Link:
-# CASE2-SAME: 2
-# CASE2:      Index: 2
-# CASE2-NEXT: Name: .dynstr
-
---- !ELF
-FileHeader:
-  Class: ELFCLASS64
-  Data:  ELFDATA2LSB
-  Type:  ET_DYN
-Sections:
-  - Name: .dynsym
-    Type: SHT_DYNSYM
-  - Name: .dynstr
-    Type: SHT_STRTAB
-
-## Even if .dynstr exists, we can explicitly link .dynsym
-## to another section.
-
-# RUN: yaml2obj --docnum=3 %s -o %t3
-# RUN: llvm-readobj %t3 -S | FileCheck %s --check-prefix=CASE3
-
-# CASE3:      .dynsym
-# CASE3:      Link:
-# CASE3-SAME: 3
-# CASE3:      Index: 3
-# CASE3-NEXT: Name: .foo
-
---- !ELF
-FileHeader:
-  Class: ELFCLASS64
-  Data:  ELFDATA2LSB
-  Type:  ET_DYN
-Sections:
-  - Name: .dynsym
-    Type: SHT_DYNSYM
-    Link: 3
-  - Name: .dynstr
-    Type: SHT_STRTAB
-  - Name: .foo
-    Type: SHT_PROGBITS
-
-## Check we can use a section name as a Link value for .dynsym.
-
-# RUN: yaml2obj --docnum=4 %s -o %t4
-# RUN: llvm-readobj %t4 -S | FileCheck %s --check-prefix=CASE4
-
-# CASE4:      .dynsym
-# CASE4:      Link:
-# CASE4-SAME: 2
-# CASE4:      Index: 2
-# CASE4-NEXT: Name: .foo
-
---- !ELF
-FileHeader:
-  Class: ELFCLASS64
-  Data:  ELFDATA2LSB
-  Type:  ET_DYN
-Sections:
-  - Name: .dynsym
-    Type: SHT_DYNSYM
-    Link: .foo
-  - Name: .foo
-    Type: SHT_PROGBITS

From 9dbffe4a28f46a9fd5dab36bf443a090588ac6de Mon Sep 17 00:00:00 2001
From: Tres Popp <tpopp@google.com>
Date: Thu, 29 Oct 2020 11:01:05 +0100
Subject: [PATCH 13/16] [mlir] Reorder shape assuming bufferization.

The previous ordering continued to use the original assuming after
replacing it which is not allowed. Now, inline the region from the old
into the new before the replacement.

Differential Revision: https://reviews.llvm.org/D90375
---
 .../lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp b/mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp
index 61e862836a733..041b54b3bd144 100644
--- a/mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp
+++ b/mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp
@@ -32,10 +32,9 @@ class ConvertAssumingOpTypes : public OpConversionPattern<AssumingOp> {
 
     auto newAssumingOp =
         rewriter.create<AssumingOp>(op.getLoc(), newResultTypes, op.witness());
-
-    rewriter.replaceOp(op, newAssumingOp.getResults());
     rewriter.inlineRegionBefore(op.doRegion(), newAssumingOp.doRegion(),
                                 newAssumingOp.doRegion().end());
+    rewriter.replaceOp(op, newAssumingOp.getResults());
 
     return success();
   }

From 79c5b4c546bb528bd51003a10f0a5aecab74ffbe Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Thu, 29 Oct 2020 17:34:58 +0700
Subject: [PATCH 14/16] [NFC] Add some new util functions to ICmpInst

---
 llvm/include/llvm/IR/Instructions.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index 7b41dced564d4..ea1a60d52c877 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -1290,6 +1290,30 @@ class ICmpInst: public CmpInst {
     return !isEquality(P);
   }
 
+  /// Return true if the predicate is SGT or UGT.
+  ///
+  static bool isGT(Predicate P) {
+    return P == ICMP_SGT || P == ICMP_UGT;
+  }
+
+  /// Return true if the predicate is SLT or ULT.
+  ///
+  static bool isLT(Predicate P) {
+    return P == ICMP_SLT || P == ICMP_ULT;
+  }
+
+  /// Return true if the predicate is SGE or UGE.
+  ///
+  static bool isGE(Predicate P) {
+    return P == ICMP_SGE || P == ICMP_UGE;
+  }
+
+  /// Return true if the predicate is SLE or ULE.
+  ///
+  static bool isLE(Predicate P) {
+    return P == ICMP_SLE || P == ICMP_ULE;
+  }
+
   /// Exchange the two operands to this instruction in such a way that it does
   /// not modify the semantics of the instruction. The predicate value may be
   /// changed to retain the same result if the predicate is order dependent

From 88d6421e4c439582ca4ca5e3744f8cc4498bb48e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 29 Oct 2020 09:30:37 +0000
Subject: [PATCH 15/16] [SCEV] Match 'zext (trunc A to iB) to iY' as URem.

URem operations with constant power-of-2 second operands are modeled as
such. This patch on its own has very little impact (e.g. no changes in
CodeGen for MultiSource/SPEC2000/SPEC2006 on X86 -O3 -flto), but I'll
soon post follow-up patches that make use of it to more accurately
determine the trip multiple.

Reviewed By: mkazantsev

Differential Revision: https://reviews.llvm.org/D89821
---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 15 ++++-
 .../Analysis/ScalarEvolutionTest.cpp          | 58 +++++++++++++++++++
 2 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index ec39180cec395..2911b2e424af3 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -12850,11 +12850,24 @@ void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const {
 }
 
 // Match the mathematical pattern A - (A / B) * B, where A and B can be
-// arbitrary expressions.
+// arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used
+// for URem with constant power-of-2 second operands.
 // It's not always easy, as A and B can be folded (imagine A is X / 2, and B is
 // 4, A / B becomes X / 8).
 bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS,
                                 const SCEV *&RHS) {
+  // Try to match 'zext (trunc A to iB) to iY', which is used
+  // for URem with constant power-of-2 second operands. Make sure the size of
+  // the operand A matches the size of the whole expressions.
+  if (const auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(Expr))
+    if (const auto *Trunc = dyn_cast<SCEVTruncateExpr>(ZExt->getOperand(0))) {
+      LHS = Trunc->getOperand();
+      if (LHS->getType() != Expr->getType())
+        LHS = getZeroExtendExpr(LHS, Expr->getType());
+      RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1)
+                        << getTypeSizeInBits(Trunc->getType()));
+      return true;
+    }
   const auto *Add = dyn_cast<SCEVAddExpr>(Expr);
   if (Add == nullptr || Add->getNumOperands() != 2)
     return false;
diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
index 909a140296ac6..7fa588566c55c 100644
--- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
+++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -63,6 +63,11 @@ class ScalarEvolutionsTest : public testing::Test {
                                                    const SCEV *RHS) {
     return SE.computeConstantDifference(LHS, RHS);
   }
+
+  static bool matchURem(ScalarEvolution &SE, const SCEV *Expr, const SCEV *&LHS,
+                        const SCEV *&RHS) {
+    return SE.matchURem(Expr, LHS, RHS);
+  }
 };
 
 TEST_F(ScalarEvolutionsTest, SCEVUnknownRAUW) {
@@ -1363,4 +1368,57 @@ TEST_F(ScalarEvolutionsTest, ProveImplicationViaNarrowing) {
   });
 }
 
+TEST_F(ScalarEvolutionsTest, MatchURem) {
+  LLVMContext C;
+  SMDiagnostic Err;
+  std::unique_ptr<Module> M = parseAssemblyString(
+      "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" "
+      " "
+      "define void @test(i32 %a, i32 %b, i16 %c, i64 %d) {"
+      "entry: "
+      "  %rem1 = urem i32 %a, 2"
+      "  %rem2 = urem i32 %a, 5"
+      "  %rem3 = urem i32 %a, %b"
+      "  %c.ext = zext i16 %c to i32"
+      "  %rem4 = urem i32 %c.ext, 2"
+      "  %ext = zext i32 %rem4 to i64"
+      "  %rem5 = urem i64 %d, 17179869184"
+      "  ret void "
+      "} ",
+      Err, C);
+
+  assert(M && "Could not parse module?");
+  assert(!verifyModule(*M) && "Must have been well formed!");
+
+  runWithSE(*M, "test", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) {
+    for (auto *N : {"rem1", "rem2", "rem3", "rem5"}) {
+      auto *URemI = getInstructionByName(F, N);
+      auto *S = SE.getSCEV(URemI);
+      const SCEV *LHS, *RHS;
+      EXPECT_TRUE(matchURem(SE, S, LHS, RHS));
+      EXPECT_EQ(LHS, SE.getSCEV(URemI->getOperand(0)));
+      EXPECT_EQ(RHS, SE.getSCEV(URemI->getOperand(1)));
+      EXPECT_EQ(LHS->getType(), S->getType());
+      EXPECT_EQ(RHS->getType(), S->getType());
+    }
+
+    // Check the case where the urem operand is zero-extended. Make sure the
+    // match results are extended to the size of the input expression.
+    auto *Ext = getInstructionByName(F, "ext");
+    auto *URem1 = getInstructionByName(F, "rem4");
+    auto *S = SE.getSCEV(Ext);
+    const SCEV *LHS, *RHS;
+    EXPECT_TRUE(matchURem(SE, S, LHS, RHS));
+    EXPECT_NE(LHS, SE.getSCEV(URem1->getOperand(0)));
+    // RHS and URem1->getOperand(1) have different widths, so compare the
+    // integer values.
+    EXPECT_EQ(cast<SCEVConstant>(RHS)->getValue()->getZExtValue(),
+              cast<SCEVConstant>(SE.getSCEV(URem1->getOperand(1)))
+                  ->getValue()
+                  ->getZExtValue());
+    EXPECT_EQ(LHS->getType(), S->getType());
+    EXPECT_EQ(RHS->getType(), S->getType());
+  });
+}
+
 }  // end namespace llvm

From 930a8c60b60805567e3cc0c7958be3ceeafd01f9 Mon Sep 17 00:00:00 2001
From: Alok Kumar Sharma <AlokKumar.Sharma@amd.com>
Date: Thu, 29 Oct 2020 16:08:51 +0530
Subject: [PATCH 16/16] [DebugInfo] [NFCI] Adding a missed out line in support
 for DW_TAG_generic_subrange.

This commit adds a missed out line in earlier commit for DW_TAG_generic_subrange.
Previous commit ID: a6dd01afa3d5902203d04a72e0b478078f796a35
Differential Revision: https://reviews.llvm.org/D89218
Thanks markus for pointing this out.
---
 llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index f2560982d1bfc..9f111dc30f760 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -543,6 +543,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
       break;
     case dwarf::DW_OP_consts:
       assert(!isRegisterLocation());
+      emitOp(dwarf::DW_OP_consts);
       emitSigned(Op->getArg(0));
       break;
     case dwarf::DW_OP_LLVM_convert: {