diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index ba9bd439c4a39f..cb935becaef190 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -764,10 +764,7 @@ class CompilerInstance : public ModuleLoader {
   static bool InitializeSourceManager(const FrontendInputFile &Input,
                                       DiagnosticsEngine &Diags,
                                       FileManager &FileMgr,
-                                      SourceManager &SourceMgr,
-                                      HeaderSearch *HS,
-                                      DependencyOutputOptions &DepOpts,
-                                      const FrontendOptions &Opts);
+                                      SourceManager &SourceMgr);
 
   /// }
 
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 387b91abb537d0..d0394492cad6dd 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -130,8 +130,26 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
         "exec_hi", "tma_lo", "tma_hi", "tba_lo", "tba_hi",
     });
 
+    switch (*Name) {
+    case 'I':
+      Info.setRequiresImmediate(-16, 64);
+      return true;
+    case 'J':
+      Info.setRequiresImmediate(-32768, 32767);
+      return true;
+    case 'A':
+    case 'B':
+    case 'C':
+      Info.setRequiresImmediate();
+      return true;
+    default:
+      break;
+    }
+
     StringRef S(Name);
-    if (S == "A") {
+
+    if (S == "DA" || S == "DB") {
+      Name++;
       Info.setRequiresImmediate();
       return true;
     }
@@ -203,6 +221,12 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
   // the constraint.  In practice, it won't be changed unless the
   // constraint is longer than one character.
   std::string convertConstraint(const char *&Constraint) const override {
+
+    StringRef S(Constraint);
+    if (S == "DA" || S == "DB") {
+      return std::string("^") + std::string(Constraint++, 2);
+    }
+
     const char *Begin = Constraint;
     TargetInfo::ConstraintInfo Info("", "");
     if (validateAsmConstraint(Constraint, Info))
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 0140a756b7dde8..9dc9c42297eda1 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -815,17 +815,15 @@ std::unique_ptr<llvm::raw_pwrite_stream> CompilerInstance::createOutputFile(
 // Initialization Utilities
 
 bool CompilerInstance::InitializeSourceManager(const FrontendInputFile &Input){
-  return InitializeSourceManager(
-      Input, getDiagnostics(), getFileManager(), getSourceManager(),
-      hasPreprocessor() ? &getPreprocessor().getHeaderSearchInfo() : nullptr,
-      getDependencyOutputOpts(), getFrontendOpts());
+  return InitializeSourceManager(Input, getDiagnostics(), getFileManager(),
+                                 getSourceManager());
 }
 
 // static
-bool CompilerInstance::InitializeSourceManager(
-    const FrontendInputFile &Input, DiagnosticsEngine &Diags,
-    FileManager &FileMgr, SourceManager &SourceMgr, HeaderSearch *HS,
-    DependencyOutputOptions &DepOpts, const FrontendOptions &Opts) {
+bool CompilerInstance::InitializeSourceManager(const FrontendInputFile &Input,
+                                               DiagnosticsEngine &Diags,
+                                               FileManager &FileMgr,
+                                               SourceManager &SourceMgr) {
   SrcMgr::CharacteristicKind Kind =
       Input.getKind().getFormat() == InputKind::ModuleMap
           ? Input.isSystem() ? SrcMgr::C_System_ModuleMap
diff --git a/clang/test/CodeGenOpenCL/inline-asm-amdgcn.cl b/clang/test/CodeGenOpenCL/inline-asm-amdgcn.cl
index 37090772f66461..259c12384f2c8d 100644
--- a/clang/test/CodeGenOpenCL/inline-asm-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/inline-asm-amdgcn.cl
@@ -33,3 +33,17 @@ kernel void test_agpr() {
          : "={a1}"(reg_a)
          : "{a1}"(reg_b));
 }
+
+kernel void test_constraint_DA() {
+  const long x = 0x200000001;
+  int res;
+  // CHECK: call i32 asm sideeffect "v_mov_b32 $0, $1 & 0xFFFFFFFF", "=v,^DA"(i64 8589934593)
+  __asm volatile("v_mov_b32 %0, %1 & 0xFFFFFFFF" : "=v"(res) : "DA"(x));
+}
+
+kernel void test_constraint_DB() {
+  const long x = 0x200000001;
+  int res;
+  // CHECK: call i32 asm sideeffect "v_mov_b32 $0, $1 & 0xFFFFFFFF", "=v,^DB"(i64 8589934593)
+  __asm volatile("v_mov_b32 %0, %1 & 0xFFFFFFFF" : "=v"(res) : "DB"(x));
+}
diff --git a/clang/test/Sema/inline-asm-validate-amdgpu.cl b/clang/test/Sema/inline-asm-validate-amdgpu.cl
index 3d6488227ef299..418952c0e7272b 100644
--- a/clang/test/Sema/inline-asm-validate-amdgpu.cl
+++ b/clang/test/Sema/inline-asm-validate-amdgpu.cl
@@ -18,9 +18,35 @@ kernel void test () {
   // vgpr constraints
   __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : );
 
-  // 'A' constraint
+  // 'I' constraint (an immediate integer in the range -16 to 64)
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (imm) : );
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (-16) : );
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (64) : );
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (-17) : ); // expected-error {{value '-17' out of range for constraint 'I'}}
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (65) : ); // expected-error {{value '65' out of range for constraint 'I'}}
+
+  // 'J' constraint (an immediate 16-bit signed integer)
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (imm) : );
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (-32768) : );
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (32767) : );
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (-32769) : ); // expected-error {{value '-32769' out of range for constraint 'J'}}
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (32768) : ); // expected-error {{value '32768' out of range for constraint 'J'}}
+
+  // 'A' constraint (an immediate constant that can be inlined)
   __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "A" (imm) : );
 
+  // 'B' constraint (an immediate 32-bit signed integer)
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "B" (imm) : );
+
+  // 'C' constraint (an immediate 32-bit unsigned integer or 'A' constraint)
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "C" (imm) : );
+
+  // 'DA' constraint (an immediate 64-bit constant that can be split into two 'A' constants)
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "DA" (imm) : );
+
+  // 'DB' constraint (an immediate 64-bit constant that can be split into two 'B' constants)
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "DB" (imm) : );
+
 }
 
 __kernel void
diff --git a/flang/runtime/file.cpp b/flang/runtime/file.cpp
index 677b9675f47aa7..fdd7ea64c20185 100644
--- a/flang/runtime/file.cpp
+++ b/flang/runtime/file.cpp
@@ -352,7 +352,7 @@ bool OpenFile::RawSeekToEnd() {
 
 int OpenFile::PendingResult(const Terminator &terminator, int iostat) {
   int id{nextId_++};
-  pending_.reset(&New<Pending>{}(terminator, id, iostat, std::move(pending_)));
+  pending_ = New<Pending>{terminator}(id, iostat, std::move(pending_));
   return id;
 }
 } // namespace Fortran::runtime::io
diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp
index db228e5bbf21c7..bbb7eb2863c613 100644
--- a/flang/runtime/io-api.cpp
+++ b/flang/runtime/io-api.cpp
@@ -28,9 +28,10 @@ Cookie BeginInternalArrayListIO(const Descriptor &descriptor,
     void ** /*scratchArea*/, std::size_t /*scratchBytes*/,
     const char *sourceFile, int sourceLine) {
   Terminator oom{sourceFile, sourceLine};
-  return &New<InternalListIoStatementState<DIR>>{}(
-      oom, descriptor, sourceFile, sourceLine)
-              .ioStatementState();
+  return &New<InternalListIoStatementState<DIR>>{oom}(
+      descriptor, sourceFile, sourceLine)
+              .release()
+              ->ioStatementState();
 }
 
 Cookie IONAME(BeginInternalArrayListOutput)(const Descriptor &descriptor,
@@ -52,9 +53,10 @@ Cookie BeginInternalArrayFormattedIO(const Descriptor &descriptor,
     const char *format, std::size_t formatLength, void ** /*scratchArea*/,
     std::size_t /*scratchBytes*/, const char *sourceFile, int sourceLine) {
   Terminator oom{sourceFile, sourceLine};
-  return &New<InternalFormattedIoStatementState<DIR>>{}(
-      oom, descriptor, format, formatLength, sourceFile, sourceLine)
-              .ioStatementState();
+  return &New<InternalFormattedIoStatementState<DIR>>{oom}(
+      descriptor, format, formatLength, sourceFile, sourceLine)
+              .release()
+              ->ioStatementState();
 }
 
 Cookie IONAME(BeginInternalArrayFormattedOutput)(const Descriptor &descriptor,
@@ -78,9 +80,10 @@ Cookie BeginInternalFormattedIO(
     void ** /*scratchArea*/, std::size_t /*scratchBytes*/,
     const char *sourceFile, int sourceLine) {
   Terminator oom{sourceFile, sourceLine};
-  return &New<InternalFormattedIoStatementState<DIR>>{}(oom, internal,
-      internalLength, format, formatLength, sourceFile, sourceLine)
-              .ioStatementState();
+  return &New<InternalFormattedIoStatementState<DIR>>{oom}(
+      internal, internalLength, format, formatLength, sourceFile, sourceLine)
+              .release()
+              ->ioStatementState();
 }
 
 Cookie IONAME(BeginInternalFormattedOutput)(char *internal,
@@ -234,8 +237,9 @@ Cookie IONAME(BeginClose)(
   } else {
     // CLOSE(UNIT=bad unit) is just a no-op
     Terminator oom{sourceFile, sourceLine};
-    return &New<NoopCloseStatementState>{}(oom, sourceFile, sourceLine)
-                .ioStatementState();
+    return &New<NoopCloseStatementState>{oom}(sourceFile, sourceLine)
+                .release()
+                ->ioStatementState();
   }
 }
 
diff --git a/flang/runtime/memory.h b/flang/runtime/memory.h
index b8e84952a99f44..f21b237f3905b7 100644
--- a/flang/runtime/memory.h
+++ b/flang/runtime/memory.h
@@ -32,20 +32,32 @@ template <typename A> void FreeMemoryAndNullify(A *&p) {
   p = nullptr;
 }
 
-template <typename A> struct New {
-  template <typename... X>
-  [[nodiscard]] A &operator()(const Terminator &terminator, X &&... x) {
-    return *new (AllocateMemoryOrCrash(terminator, sizeof(A)))
-        A{std::forward<X>(x)...};
-  }
-};
-
 template <typename A> struct OwningPtrDeleter {
   void operator()(A *p) { FreeMemory(p); }
 };
 
 template <typename A> using OwningPtr = std::unique_ptr<A, OwningPtrDeleter<A>>;
 
+template <typename A> class SizedNew {
+public:
+  explicit SizedNew(const Terminator &terminator) : terminator_{terminator} {}
+  template <typename... X>
+  [[nodiscard]] OwningPtr<A> operator()(std::size_t bytes, X &&... x) {
+    return OwningPtr<A>{new (AllocateMemoryOrCrash(terminator_, bytes))
+            A{std::forward<X>(x)...}};
+  }
+
+private:
+  const Terminator &terminator_;
+};
+
+template <typename A> struct New : public SizedNew<A> {
+  using SizedNew<A>::SizedNew;
+  template <typename... X> [[nodiscard]] OwningPtr<A> operator()(X &&... x) {
+    return SizedNew<A>::operator()(sizeof(A), std::forward<X>(x)...);
+  }
+};
+
 template <typename A> struct Allocator {
   using value_type = A;
   explicit Allocator(const Terminator &t) : terminator{t} {}
diff --git a/flang/runtime/unit-map.cpp b/flang/runtime/unit-map.cpp
index 4e58cf590c2aca..5cbbf059d5f882 100644
--- a/flang/runtime/unit-map.cpp
+++ b/flang/runtime/unit-map.cpp
@@ -64,7 +64,7 @@ void UnitMap::CloseAll(IoErrorHandler &handler) {
 }
 
 ExternalFileUnit &UnitMap::Create(int n, const Terminator &terminator) {
-  Chain &chain{New<Chain>{}(terminator, n)};
+  Chain &chain{*New<Chain>{terminator}(n).release()};
   chain.next.reset(&chain);
   bucket_[Hash(n)].swap(chain.next); // pushes new node as list head
   return chain.unit;
diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp
index 81035ab6fcde83..2eee142081a526 100644
--- a/flang/runtime/unit.cpp
+++ b/flang/runtime/unit.cpp
@@ -95,7 +95,7 @@ UnitMap &ExternalFileUnit::GetUnitMap() {
     return *unitMap;
   }
   Terminator terminator{__FILE__, __LINE__};
-  unitMap = &New<UnitMap>{}(terminator);
+  unitMap = New<UnitMap>{terminator}().release();
   ExternalFileUnit &out{ExternalFileUnit::LookUpOrCreate(6, terminator)};
   out.Predefine(1);
   out.set_mayRead(false);
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 4bf5796ed59d68..0bb69eb91362b4 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -2741,31 +2741,21 @@ void DWARFASTParserClang::ParseSingleMember(
 
           if (TypeSystemClang::IsCXXClassType(member_clang_type) &&
               !member_clang_type.GetCompleteType()) {
-            if (die.GetCU()->GetProducer() == eProducerClang)
-              module_sp->ReportError(
-                  "DWARF DIE at 0x%8.8x (class %s) has a member variable "
-                  "0x%8.8x (%s) whose type is a forward declaration, not a "
-                  "complete definition.\nTry compiling the source file "
-                  "with -fstandalone-debug",
-                  parent_die.GetOffset(), parent_die.GetName(), die.GetOffset(),
-                  name);
-            else
-              module_sp->ReportError(
-                  "DWARF DIE at 0x%8.8x (class %s) has a member variable "
-                  "0x%8.8x (%s) whose type is a forward declaration, not a "
-                  "complete definition.\nPlease file a bug against the "
-                  "compiler and include the preprocessed output for %s",
-                  parent_die.GetOffset(), parent_die.GetName(), die.GetOffset(),
-                  name, GetUnitName(parent_die).c_str());
-            // We have no choice other than to pretend that the member
-            // class is complete. If we don't do this, clang will crash
-            // when trying to layout the class. Since we provide layout
-            // assistance, all ivars in this class and other classes will
-            // be fine, this is the best we can do short of crashing.
+            // Mark the class as complete, ut we make a note of the fact that
+            // this class is not _really_ complete so we can later search for a
+            // definition in a different module.
+            // Since we provide layout assistance, all ivars in this class and
+            // other classes will be fine even if we are not able to find the
+            // definition elsewhere.
             if (TypeSystemClang::StartTagDeclarationDefinition(
                     member_clang_type)) {
               TypeSystemClang::CompleteTagDeclarationDefinition(
                   member_clang_type);
+              const auto *td = TypeSystemClang::GetQualType(
+                                   member_clang_type.GetOpaqueQualType())
+                                   .getTypePtr()
+                                   ->getAsTagDecl();
+              m_ast.GetMetadata(td)->SetIsForcefullyCompleted();
             } else {
               module_sp->ReportError(
                   "DWARF DIE at 0x%8.8x (class %s) has a member variable "
diff --git a/lldb/source/Utility/Scalar.cpp b/lldb/source/Utility/Scalar.cpp
index 7bd42f3b953550..c36ccab21a39fb 100644
--- a/lldb/source/Utility/Scalar.cpp
+++ b/lldb/source/Utility/Scalar.cpp
@@ -23,6 +23,60 @@
 using namespace lldb;
 using namespace lldb_private;
 
+namespace {
+enum class Category { Void, Integral, Float };
+}
+
+static Category GetCategory(Scalar::Type type) {
+  switch (type) {
+  case Scalar::e_void:
+    return Category::Void;
+  case Scalar::e_float:
+  case Scalar::e_double:
+  case Scalar::e_long_double:
+    return Category::Float;
+  case Scalar::e_sint:
+  case Scalar::e_slong:
+  case Scalar::e_slonglong:
+  case Scalar::e_sint128:
+  case Scalar::e_sint256:
+  case Scalar::e_sint512:
+  case Scalar::e_uint:
+  case Scalar::e_ulong:
+  case Scalar::e_ulonglong:
+  case Scalar::e_uint128:
+  case Scalar::e_uint256:
+  case Scalar::e_uint512:
+    return Category::Integral;
+  }
+  llvm_unreachable("Unhandled type!");
+}
+
+static bool IsSigned(Scalar::Type type) {
+  switch (type) {
+  case Scalar::e_void:
+  case Scalar::e_uint:
+  case Scalar::e_ulong:
+  case Scalar::e_ulonglong:
+  case Scalar::e_uint128:
+  case Scalar::e_uint256:
+  case Scalar::e_uint512:
+    return false;
+  case Scalar::e_sint:
+  case Scalar::e_slong:
+  case Scalar::e_slonglong:
+  case Scalar::e_sint128:
+  case Scalar::e_sint256:
+  case Scalar::e_sint512:
+  case Scalar::e_float:
+  case Scalar::e_double:
+  case Scalar::e_long_double:
+    return true;
+  }
+  llvm_unreachable("Unhandled type!");
+}
+
+
 // Promote to max type currently follows the ANSI C rule for type promotion in
 // expressions.
 static Scalar::Type PromoteToMaxType(
@@ -103,40 +157,19 @@ bool Scalar::GetData(DataExtractor &data, size_t limit_byte_size) const {
 void Scalar::GetBytes(llvm::MutableArrayRef<uint8_t> storage) const {
   assert(storage.size() >= GetByteSize());
 
-  switch (m_type) {
-  case e_void:
-    break;
-  case e_sint:
-  case e_uint:
-  case e_slong:
-  case e_ulong:
-  case e_slonglong:
-  case e_ulonglong:
-  case e_sint128:
-  case e_uint128:
-  case e_sint256:
-  case e_uint256:
-  case e_sint512:
-  case e_uint512:
-    StoreIntToMemory(m_integer, storage.data(),
-                     (m_integer.getBitWidth() + 7) / 8);
-    break;
-  case e_float: {
-    float val = m_float.convertToFloat();
-    memcpy(storage.data(), &val, sizeof(val));
+  const auto &store = [&](const llvm::APInt val) {
+    StoreIntToMemory(val, storage.data(), (val.getBitWidth() + 7) / 8);
+  };
+  switch (GetCategory(m_type)) {
+  case Category::Void:
     break;
-  }
-  case e_double: {
-    double val = m_float.convertToDouble();
-    memcpy(storage.data(), &val, sizeof(double));
+  case Category::Integral:
+    store(m_integer);
     break;
-  }
-  case e_long_double: {
-    llvm::APInt val = m_float.bitcastToAPInt();
-    StoreIntToMemory(val, storage.data(), storage.size());
+  case Category::Float:
+    store(m_float.bitcastToAPInt());
     break;
   }
-  }
 }
 
 size_t Scalar::GetByteSize() const {
@@ -167,26 +200,12 @@ size_t Scalar::GetByteSize() const {
 }
 
 bool Scalar::IsZero() const {
-  llvm::APInt zero_int = llvm::APInt::getNullValue(m_integer.getBitWidth() / 8);
-  switch (m_type) {
-  case e_void:
+  switch (GetCategory(m_type)) {
+  case Category::Void:
     break;
-  case e_sint:
-  case e_uint:
-  case e_slong:
-  case e_ulong:
-  case e_slonglong:
-  case e_ulonglong:
-  case e_sint128:
-  case e_uint128:
-  case e_sint256:
-  case e_uint256:
-  case e_uint512:
-  case e_sint512:
-    return llvm::APInt::isSameValue(zero_int, m_integer);
-  case e_float:
-  case e_double:
-  case e_long_double:
+  case Category::Integral:
+    return m_integer.isNullValue();
+  case Category::Float:
     return m_float.isZero();
   }
   return false;
@@ -196,31 +215,16 @@ void Scalar::GetValue(Stream *s, bool show_type) const {
   if (show_type)
     s->Printf("(%s) ", GetTypeAsCString());
 
-  switch (m_type) {
-  case e_void:
-    break;
-  case e_sint:
-  case e_slong:
-  case e_slonglong:
-  case e_sint128:
-  case e_sint256:
-  case e_sint512:
-    s->PutCString(m_integer.toString(10, true));
+  switch (GetCategory(m_type)) {
+  case Category::Void:
     break;
-  case e_uint:
-  case e_ulong:
-  case e_ulonglong:
-  case e_uint128:
-  case e_uint256:
-  case e_uint512:
-    s->PutCString(m_integer.toString(10, false));
+  case Category::Integral:
+    s->PutCString(m_integer.toString(10, IsSigned(m_type)));
     break;
-  case e_float:
-  case e_double:
-  case e_long_double:
+  case Category::Float:
     llvm::SmallString<24> string;
     m_float.toString(string);
-    s->Printf("%s", string.c_str());
+    s->PutCString(string);
     break;
   }
 }
@@ -325,59 +329,6 @@ static size_t GetBitSize(Scalar::Type type) {
   llvm_unreachable("Unhandled type!");
 }
 
-static bool IsSigned(Scalar::Type type) {
-  switch (type) {
-  case Scalar::e_void:
-  case Scalar::e_uint:
-  case Scalar::e_ulong:
-  case Scalar::e_ulonglong:
-  case Scalar::e_uint128:
-  case Scalar::e_uint256:
-  case Scalar::e_uint512:
-    return false;
-  case Scalar::e_sint:
-  case Scalar::e_slong:
-  case Scalar::e_slonglong:
-  case Scalar::e_sint128:
-  case Scalar::e_sint256:
-  case Scalar::e_sint512:
-  case Scalar::e_float:
-  case Scalar::e_double:
-  case Scalar::e_long_double:
-    return true;
-  }
-  llvm_unreachable("Unhandled type!");
-}
-
-namespace {
-enum class Category { Void, Integral, Float };
-}
-
-static Category GetCategory(Scalar::Type type) {
-  switch (type) {
-  case Scalar::e_void:
-    return Category::Void;
-  case Scalar::e_float:
-  case Scalar::e_double:
-  case Scalar::e_long_double:
-    return Category::Float;
-  case Scalar::e_sint:
-  case Scalar::e_slong:
-  case Scalar::e_slonglong:
-  case Scalar::e_sint128:
-  case Scalar::e_sint256:
-  case Scalar::e_sint512:
-  case Scalar::e_uint:
-  case Scalar::e_ulong:
-  case Scalar::e_ulonglong:
-  case Scalar::e_uint128:
-  case Scalar::e_uint256:
-  case Scalar::e_uint512:
-    return Category::Integral;
-  }
-  llvm_unreachable("Unhandled type!");
-}
-
 static const llvm::fltSemantics &GetFltSemantics(Scalar::Type type) {
   switch (type) {
   case Scalar::e_void:
@@ -849,27 +800,14 @@ Scalar &Scalar::operator+=(const Scalar &rhs) {
   const Scalar *b;
   if ((m_type = PromoteToMaxType(*this, rhs, temp_value, a, b)) !=
       Scalar::e_void) {
-    switch (m_type) {
-    case e_void:
+    switch (GetCategory(m_type)) {
+    case Category::Void:
       break;
-    case e_sint:
-    case e_uint:
-    case e_slong:
-    case e_ulong:
-    case e_slonglong:
-    case e_ulonglong:
-    case e_sint128:
-    case e_uint128:
-    case e_sint256:
-    case e_uint256:
-    case e_sint512:
-    case e_uint512:
+    case Category::Integral:
       m_integer = a->m_integer + b->m_integer;
       break;
 
-    case e_float:
-    case e_double:
-    case e_long_double:
+    case Category::Float:
       m_float = a->m_float + b->m_float;
       break;
     }
@@ -878,54 +816,25 @@ Scalar &Scalar::operator+=(const Scalar &rhs) {
 }
 
 Scalar &Scalar::operator<<=(const Scalar &rhs) {
-  switch (m_type) {
-  case e_void:
-  case e_float:
-  case e_double:
-  case e_long_double:
+  if (GetCategory(m_type) == Category::Integral &&
+      GetCategory(rhs.m_type) == Category::Integral)
+    m_integer <<= rhs.m_integer;
+  else
     m_type = e_void;
-    break;
-
-  case e_sint:
-  case e_uint:
-  case e_slong:
-  case e_ulong:
-  case e_slonglong:
-  case e_ulonglong:
-  case e_sint128:
-  case e_uint128:
-  case e_sint256:
-  case e_uint256:
-  case e_sint512:
-  case e_uint512:
-    switch (rhs.m_type) {
-    case e_void:
-    case e_float:
-    case e_double:
-    case e_long_double:
-      m_type = e_void;
-      break;
-    case e_sint:
-    case e_uint:
-    case e_slong:
-    case e_ulong:
-    case e_slonglong:
-    case e_ulonglong:
-    case e_sint128:
-    case e_uint128:
-    case e_sint256:
-    case e_uint256:
-    case e_sint512:
-    case e_uint512:
-      m_integer = m_integer << rhs.m_integer;
-      break;
-    }
-    break;
-  }
   return *this;
 }
 
 bool Scalar::ShiftRightLogical(const Scalar &rhs) {
+  if (GetCategory(m_type) == Category::Integral &&
+      GetCategory(rhs.m_type) == Category::Integral) {
+    m_integer = m_integer.lshr(rhs.m_integer);
+    return true;
+  }
+  m_type = e_void;
+  return false;
+}
+
+Scalar &Scalar::operator>>=(const Scalar &rhs) {
   switch (m_type) {
   case e_void:
   case e_float:
@@ -965,111 +874,24 @@ bool Scalar::ShiftRightLogical(const Scalar &rhs) {
     case e_uint256:
     case e_sint512:
     case e_uint512:
-      m_integer = m_integer.lshr(rhs.m_integer);
+      m_integer = m_integer.ashr(rhs.m_integer);
       break;
     }
     break;
   }
-  return m_type != e_void;
+  return *this;
 }
 
-Scalar &Scalar::operator>>=(const Scalar &rhs) {
-  switch (m_type) {
-  case e_void:
-  case e_float:
-  case e_double:
-  case e_long_double:
-    m_type = e_void;
-    break;
-
-  case e_sint:
-  case e_uint:
-  case e_slong:
-  case e_ulong:
-  case e_slonglong:
-  case e_ulonglong:
-  case e_sint128:
-  case e_uint128:
-  case e_sint256:
-  case e_uint256:
-  case e_sint512:
-  case e_uint512:
-    switch (rhs.m_type) {
-    case e_void:
-    case e_float:
-    case e_double:
-    case e_long_double:
-      m_type = e_void;
-      break;
-    case e_sint:
-    case e_uint:
-    case e_slong:
-    case e_ulong:
-    case e_slonglong:
-    case e_ulonglong:
-    case e_sint128:
-    case e_uint128:
-    case e_sint256:
-    case e_uint256:
-    case e_sint512:
-    case e_uint512:
-      m_integer = m_integer.ashr(rhs.m_integer);
-      break;
-    }
-    break;
-  }
-  return *this;
-}
-
-Scalar &Scalar::operator&=(const Scalar &rhs) {
-  switch (m_type) {
-  case e_void:
-  case e_float:
-  case e_double:
-  case e_long_double:
-    m_type = e_void;
-    break;
-
-  case e_sint:
-  case e_uint:
-  case e_slong:
-  case e_ulong:
-  case e_slonglong:
-  case e_ulonglong:
-  case e_sint128:
-  case e_uint128:
-  case e_sint256:
-  case e_uint256:
-  case e_sint512:
-  case e_uint512:
-    switch (rhs.m_type) {
-    case e_void:
-    case e_float:
-    case e_double:
-    case e_long_double:
-      m_type = e_void;
-      break;
-    case e_sint:
-    case e_uint:
-    case e_slong:
-    case e_ulong:
-    case e_slonglong:
-    case e_ulonglong:
-    case e_sint128:
-    case e_uint128:
-    case e_sint256:
-    case e_uint256:
-    case e_sint512:
-    case e_uint512:
-      m_integer &= rhs.m_integer;
-      break;
-    }
-    break;
-  }
-  return *this;
-}
-
-bool Scalar::AbsoluteValue() {
+Scalar &Scalar::operator&=(const Scalar &rhs) {
+  if (GetCategory(m_type) == Category::Integral &&
+      GetCategory(rhs.m_type) == Category::Integral)
+    m_integer &= rhs.m_integer;
+  else
+    m_type = e_void;
+  return *this;
+}
+
+bool Scalar::AbsoluteValue() {
   switch (m_type) {
   case e_void:
     break;
@@ -1101,26 +923,13 @@ bool Scalar::AbsoluteValue() {
 }
 
 bool Scalar::UnaryNegate() {
-  switch (m_type) {
-  case e_void:
+  switch (GetCategory(m_type)) {
+  case Category::Void:
     break;
-  case e_sint:
-  case e_uint:
-  case e_slong:
-  case e_ulong:
-  case e_slonglong:
-  case e_ulonglong:
-  case e_sint128:
-  case e_uint128:
-  case e_sint256:
-  case e_uint256:
-  case e_sint512:
-  case e_uint512:
+  case Category::Integral:
     m_integer = -m_integer;
     return true;
-  case e_float:
-  case e_double:
-  case e_long_double:
+  case Category::Float:
     m_float.changeSign();
     return true;
   }
@@ -1128,62 +937,17 @@ bool Scalar::UnaryNegate() {
 }
 
 bool Scalar::OnesComplement() {
-  switch (m_type) {
-  case e_sint:
-  case e_uint:
-  case e_slong:
-  case e_ulong:
-  case e_slonglong:
-  case e_ulonglong:
-  case e_sint128:
-  case e_uint128:
-  case e_sint256:
-  case e_uint256:
-  case e_sint512:
-  case e_uint512:
+  if (GetCategory(m_type) == Category::Integral) {
     m_integer = ~m_integer;
     return true;
-
-  case e_void:
-  case e_float:
-  case e_double:
-  case e_long_double:
-    break;
   }
+
   return false;
 }
 
 const Scalar lldb_private::operator+(const Scalar &lhs, const Scalar &rhs) {
-  Scalar result;
-  Scalar temp_value;
-  const Scalar *a;
-  const Scalar *b;
-  if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) !=
-      Scalar::e_void) {
-    switch (result.m_type) {
-    case Scalar::e_void:
-      break;
-    case Scalar::e_sint:
-    case Scalar::e_uint:
-    case Scalar::e_slong:
-    case Scalar::e_ulong:
-    case Scalar::e_slonglong:
-    case Scalar::e_ulonglong:
-    case Scalar::e_sint128:
-    case Scalar::e_uint128:
-    case Scalar::e_sint256:
-    case Scalar::e_uint256:
-    case Scalar::e_sint512:
-    case Scalar::e_uint512:
-      result.m_integer = a->m_integer + b->m_integer;
-      break;
-    case Scalar::e_float:
-    case Scalar::e_double:
-    case Scalar::e_long_double:
-      result.m_float = a->m_float + b->m_float;
-      break;
-    }
-  }
+  Scalar result = lhs;
+  result += rhs;
   return result;
 }
 
@@ -1194,26 +958,13 @@ const Scalar lldb_private::operator-(const Scalar &lhs, const Scalar &rhs) {
   const Scalar *b;
   if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) !=
       Scalar::e_void) {
-    switch (result.m_type) {
-    case Scalar::e_void:
+    switch (GetCategory(result.m_type)) {
+    case Category::Void:
       break;
-    case Scalar::e_sint:
-    case Scalar::e_uint:
-    case Scalar::e_slong:
-    case Scalar::e_ulong:
-    case Scalar::e_slonglong:
-    case Scalar::e_ulonglong:
-    case Scalar::e_sint128:
-    case Scalar::e_uint128:
-    case Scalar::e_sint256:
-    case Scalar::e_uint256:
-    case Scalar::e_sint512:
-    case Scalar::e_uint512:
+    case Category::Integral:
       result.m_integer = a->m_integer - b->m_integer;
       break;
-    case Scalar::e_float:
-    case Scalar::e_double:
-    case Scalar::e_long_double:
+    case Category::Float:
       result.m_float = a->m_float - b->m_float;
       break;
     }
@@ -1227,40 +978,20 @@ const Scalar lldb_private::operator/(const Scalar &lhs, const Scalar &rhs) {
   const Scalar *a;
   const Scalar *b;
   if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) !=
-      Scalar::e_void) {
-    switch (result.m_type) {
-    case Scalar::e_void:
+          Scalar::e_void &&
+      !b->IsZero()) {
+    switch (GetCategory(result.m_type)) {
+    case Category::Void:
       break;
-    case Scalar::e_sint:
-    case Scalar::e_slong:
-    case Scalar::e_slonglong:
-    case Scalar::e_sint128:
-    case Scalar::e_sint256:
-    case Scalar::e_sint512:
-      if (b->m_integer != 0) {
+    case Category::Integral:
+      if (IsSigned(result.m_type))
         result.m_integer = a->m_integer.sdiv(b->m_integer);
-        return result;
-      }
-      break;
-    case Scalar::e_uint:
-    case Scalar::e_ulong:
-    case Scalar::e_ulonglong:
-    case Scalar::e_uint128:
-    case Scalar::e_uint256:
-    case Scalar::e_uint512:
-      if (b->m_integer != 0) {
+      else 
         result.m_integer = a->m_integer.udiv(b->m_integer);
-        return result;
-      }
-      break;
-    case Scalar::e_float:
-    case Scalar::e_double:
-    case Scalar::e_long_double:
-      if (!b->m_float.isZero()) {
-        result.m_float = a->m_float / b->m_float;
-        return result;
-      }
-      break;
+      return result;
+    case Category::Float:
+      result.m_float = a->m_float / b->m_float;
+      return result;
     }
   }
   // For division only, the only way it should make it here is if a promotion
@@ -1276,26 +1007,13 @@ const Scalar lldb_private::operator*(const Scalar &lhs, const Scalar &rhs) {
   const Scalar *b;
   if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) !=
       Scalar::e_void) {
-    switch (result.m_type) {
-    case Scalar::e_void:
+    switch (GetCategory(result.m_type)) {
+    case Category::Void:
       break;
-    case Scalar::e_sint:
-    case Scalar::e_uint:
-    case Scalar::e_slong:
-    case Scalar::e_ulong:
-    case Scalar::e_slonglong:
-    case Scalar::e_ulonglong:
-    case Scalar::e_sint128:
-    case Scalar::e_uint128:
-    case Scalar::e_sint256:
-    case Scalar::e_uint256:
-    case Scalar::e_sint512:
-    case Scalar::e_uint512:
+    case Category::Integral:
       result.m_integer = a->m_integer * b->m_integer;
       break;
-    case Scalar::e_float:
-    case Scalar::e_double:
-    case Scalar::e_long_double:
+    case Category::Float:
       result.m_float = a->m_float * b->m_float;
       break;
     }
@@ -1310,29 +1028,10 @@ const Scalar lldb_private::operator&(const Scalar &lhs, const Scalar &rhs) {
   const Scalar *b;
   if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) !=
       Scalar::e_void) {
-    switch (result.m_type) {
-    case Scalar::e_sint:
-    case Scalar::e_uint:
-    case Scalar::e_slong:
-    case Scalar::e_ulong:
-    case Scalar::e_slonglong:
-    case Scalar::e_ulonglong:
-    case Scalar::e_sint128:
-    case Scalar::e_uint128:
-    case Scalar::e_sint256:
-    case Scalar::e_uint256:
-    case Scalar::e_sint512:
-    case Scalar::e_uint512:
+    if (GetCategory(result.m_type) == Category::Integral)
       result.m_integer = a->m_integer & b->m_integer;
-      break;
-    case Scalar::e_void:
-    case Scalar::e_float:
-    case Scalar::e_double:
-    case Scalar::e_long_double:
-      // No bitwise AND on floats, doubles of long doubles
+    else
       result.m_type = Scalar::e_void;
-      break;
-    }
   }
   return result;
 }
@@ -1344,30 +1043,10 @@ const Scalar lldb_private::operator|(const Scalar &lhs, const Scalar &rhs) {
   const Scalar *b;
   if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) !=
       Scalar::e_void) {
-    switch (result.m_type) {
-    case Scalar::e_sint:
-    case Scalar::e_uint:
-    case Scalar::e_slong:
-    case Scalar::e_ulong:
-    case Scalar::e_slonglong:
-    case Scalar::e_ulonglong:
-    case Scalar::e_sint128:
-    case Scalar::e_uint128:
-    case Scalar::e_sint256:
-    case Scalar::e_uint256:
-    case Scalar::e_sint512:
-    case Scalar::e_uint512:
+    if (GetCategory(result.m_type) == Category::Integral)
       result.m_integer = a->m_integer | b->m_integer;
-      break;
-
-    case Scalar::e_void:
-    case Scalar::e_float:
-    case Scalar::e_double:
-    case Scalar::e_long_double:
-      // No bitwise AND on floats, doubles of long doubles
+    else
       result.m_type = Scalar::e_void;
-      break;
-    }
   }
   return result;
 }
@@ -1379,33 +1058,12 @@ const Scalar lldb_private::operator%(const Scalar &lhs, const Scalar &rhs) {
   const Scalar *b;
   if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) !=
       Scalar::e_void) {
-    switch (result.m_type) {
-    default:
-      break;
-    case Scalar::e_void:
-      break;
-    case Scalar::e_sint:
-    case Scalar::e_slong:
-    case Scalar::e_slonglong:
-    case Scalar::e_sint128:
-    case Scalar::e_sint256:
-    case Scalar::e_sint512:
-      if (b->m_integer != 0) {
+    if (!b->IsZero() && GetCategory(result.m_type) == Category::Integral) {
+      if (IsSigned(result.m_type))
         result.m_integer = a->m_integer.srem(b->m_integer);
-        return result;
-      }
-      break;
-    case Scalar::e_uint:
-    case Scalar::e_ulong:
-    case Scalar::e_ulonglong:
-    case Scalar::e_uint128:
-    case Scalar::e_uint256:
-    case Scalar::e_uint512:
-      if (b->m_integer != 0) {
+      else
         result.m_integer = a->m_integer.urem(b->m_integer);
-        return result;
-      }
-      break;
+      return result;
     }
   }
   result.m_type = Scalar::e_void;
@@ -1419,30 +1077,10 @@ const Scalar lldb_private::operator^(const Scalar &lhs, const Scalar &rhs) {
   const Scalar *b;
   if ((result.m_type = PromoteToMaxType(lhs, rhs, temp_value, a, b)) !=
       Scalar::e_void) {
-    switch (result.m_type) {
-    case Scalar::e_sint:
-    case Scalar::e_uint:
-    case Scalar::e_slong:
-    case Scalar::e_ulong:
-    case Scalar::e_slonglong:
-    case Scalar::e_ulonglong:
-    case Scalar::e_sint128:
-    case Scalar::e_uint128:
-    case Scalar::e_sint256:
-    case Scalar::e_uint256:
-    case Scalar::e_sint512:
-    case Scalar::e_uint512:
+    if (GetCategory(result.m_type) == Category::Integral)
       result.m_integer = a->m_integer ^ b->m_integer;
-      break;
-
-    case Scalar::e_void:
-    case Scalar::e_float:
-    case Scalar::e_double:
-    case Scalar::e_long_double:
-      // No bitwise AND on floats, doubles of long doubles
+    else
       result.m_type = Scalar::e_void;
-      break;
-    }
   }
   return result;
 }
diff --git a/lldb/test/API/functionalities/limit-debug-info/TestLimitDebugInfo.py b/lldb/test/API/functionalities/limit-debug-info/TestLimitDebugInfo.py
index d22aeaace7bf08..396861f5eb762a 100644
--- a/lldb/test/API/functionalities/limit-debug-info/TestLimitDebugInfo.py
+++ b/lldb/test/API/functionalities/limit-debug-info/TestLimitDebugInfo.py
@@ -44,11 +44,16 @@ def test_one_and_two_debug(self):
         # all members.
         self.expect_expr("inherits_from_one.member", result_value="47")
         self.expect_expr("inherits_from_one.one", result_value="142")
-
         self.expect_expr("inherits_from_two.member", result_value="47")
         self.expect_expr("inherits_from_two.one", result_value="142")
         self.expect_expr("inherits_from_two.two", result_value="242")
 
+        self.expect_expr("one_as_member.member", result_value="47")
+        self.expect_expr("one_as_member.one.member", result_value="147")
+        self.expect_expr("two_as_member.member", result_value="47")
+        self.expect_expr("two_as_member.two.one.member", result_value="147")
+        self.expect_expr("two_as_member.two.member", result_value="247")
+
     @skipIf(bugnumber="pr46284", debug_info="gmodules")
     @skipIfWindows # Clang emits type info even with -flimit-debug-info
     def test_two_debug(self):
@@ -63,12 +68,19 @@ def test_two_debug(self):
         self.expect_expr("inherits_from_one.member", result_value="47")
         self.expect("expr inherits_from_one.one", error=True,
             substrs=["no member named 'one' in 'InheritsFromOne'"])
-
         self.expect_expr("inherits_from_two.member", result_value="47")
         self.expect("expr inherits_from_two.one", error=True,
             substrs=["no member named 'one' in 'InheritsFromTwo'"])
         self.expect_expr("inherits_from_two.two", result_value="242")
 
+        self.expect_expr("one_as_member.member", result_value="47")
+        self.expect("expr one_as_member.one.member", error=True,
+                substrs=["no member named 'member' in 'member::One'"])
+        self.expect_expr("two_as_member.member", result_value="47")
+        self.expect("expr two_as_member.two.one.member", error=True,
+                substrs=["no member named 'member' in 'member::One'"])
+        self.expect_expr("two_as_member.two.member", result_value="247")
+
     @skipIf(bugnumber="pr46284", debug_info="gmodules")
     @skipIfWindows # Clang emits type info even with -flimit-debug-info
     def test_one_debug(self):
@@ -85,9 +97,16 @@ def test_one_debug(self):
         # "One".
         self.expect_expr("inherits_from_one.member", result_value="47")
         self.expect_expr("inherits_from_one.one", result_value="142")
-
         self.expect_expr("inherits_from_two.member", result_value="47")
         self.expect("expr inherits_from_two.one", error=True,
             substrs=["no member named 'one' in 'InheritsFromTwo'"])
         self.expect("expr inherits_from_two.two", error=True,
             substrs=["no member named 'two' in 'InheritsFromTwo'"])
+
+        self.expect_expr("one_as_member.member", result_value="47")
+        self.expect_expr("one_as_member.one.member", result_value="147")
+        self.expect_expr("two_as_member.member", result_value="47")
+        self.expect("expr two_as_member.two.one.member", error=True,
+                substrs=["no member named 'one' in 'member::Two'"])
+        self.expect("expr two_as_member.two.member", error=True,
+                substrs=["no member named 'member' in 'member::Two'"])
diff --git a/lldb/test/API/functionalities/limit-debug-info/main.cpp b/lldb/test/API/functionalities/limit-debug-info/main.cpp
index e3049ed74489c1..886b3feec434dd 100644
--- a/lldb/test/API/functionalities/limit-debug-info/main.cpp
+++ b/lldb/test/API/functionalities/limit-debug-info/main.cpp
@@ -10,4 +10,16 @@ struct InheritsFromTwo : Two {
   int member = 47;
 } inherits_from_two;
 
+struct OneAsMember {
+  constexpr OneAsMember() = default;
+  member::One one;
+  int member = 47;
+} one_as_member;
+
+struct TwoAsMember {
+  constexpr TwoAsMember() = default;
+  member::Two two;
+  int member = 47;
+} two_as_member;
+
 int main() { return 0; }
diff --git a/lldb/test/API/functionalities/limit-debug-info/one.cpp b/lldb/test/API/functionalities/limit-debug-info/one.cpp
index 728875dd9e5572..ee275e3321e419 100644
--- a/lldb/test/API/functionalities/limit-debug-info/one.cpp
+++ b/lldb/test/API/functionalities/limit-debug-info/one.cpp
@@ -1,3 +1,4 @@
 #include "onetwo.h"
 
 One::~One() = default;
+member::One::~One() = default;
diff --git a/lldb/test/API/functionalities/limit-debug-info/onetwo.h b/lldb/test/API/functionalities/limit-debug-info/onetwo.h
index 82df76c64b584d..6822d84803fe43 100644
--- a/lldb/test/API/functionalities/limit-debug-info/onetwo.h
+++ b/lldb/test/API/functionalities/limit-debug-info/onetwo.h
@@ -9,3 +9,18 @@ struct Two : One {
   constexpr Two() = default;
   ~Two() override;
 };
+
+namespace member {
+struct One {
+  int member = 147;
+  constexpr One() = default;
+  virtual ~One();
+};
+
+struct Two {
+  One one;
+  int member = 247;
+  constexpr Two() = default;
+  virtual ~Two();
+};
+} // namespace member
diff --git a/lldb/test/API/functionalities/limit-debug-info/two.cpp b/lldb/test/API/functionalities/limit-debug-info/two.cpp
index 928b091728c38f..db98c5e8d3dccc 100644
--- a/lldb/test/API/functionalities/limit-debug-info/two.cpp
+++ b/lldb/test/API/functionalities/limit-debug-info/two.cpp
@@ -1,3 +1,4 @@
 #include "onetwo.h"
 
 Two::~Two() = default;
+member::Two::~Two() = default;
diff --git a/lldb/test/Shell/SymbolFile/DWARF/forward-declarations.s b/lldb/test/Shell/SymbolFile/DWARF/forward-declarations.s
deleted file mode 100644
index 952c4ee7f87ed7..00000000000000
--- a/lldb/test/Shell/SymbolFile/DWARF/forward-declarations.s
+++ /dev/null
@@ -1,111 +0,0 @@
-# Test handling of the situation (including the error message) where a structure
-# has a incomplete member.
-
-# REQUIRES: x86
-
-# RUN: llvm-mc -triple x86_64-pc-linux -filetype=obj %s -o %t
-# RUN: %lldb %t -o "target var b" -b 2>&1 | FileCheck %s
-
-# CHECK: error: {{.*}} DWARF DIE at 0x0000002b (class B) has a member variable 0x00000030 (a) whose type is a forward declaration, not a complete definition.
-# CHECK-NEXT: Please file a bug against the compiler and include the preprocessed output for /tmp/a.cc
-
-# CHECK: b = (a = A @ 0x0000000000000001)
-
-        .type   b,@object               # @b
-        .comm   b,1,1
-        .section        .debug_str,"MS",@progbits,1
-.Linfo_string0:
-        .asciz  "Hand-written DWARF"
-.Lcu_name:
-        .asciz  "/tmp/a.cc"
-.Lcu_compdir:
-        .asciz  "/foo/bar"
-.Lb:
-        .asciz  "b"
-.La:
-        .asciz  "a"
-.LA:
-        .asciz  "A"
-.LB:
-        .asciz  "B"
-
-        .section        .debug_abbrev,"",@progbits
-        .byte   1                       # Abbreviation Code
-        .byte   17                      # DW_TAG_compile_unit
-        .byte   1                       # DW_CHILDREN_yes
-        .byte   37                      # DW_AT_producer
-        .byte   14                      # DW_FORM_strp
-        .byte   3                       # DW_AT_name
-        .byte   14                      # DW_FORM_strp
-        .byte   27                      # DW_AT_comp_dir
-        .byte   14                      # DW_FORM_strp
-        .byte   0                       # EOM(1)
-        .byte   0                       # EOM(2)
-        .byte   2                       # Abbreviation Code
-        .byte   52                      # DW_TAG_variable
-        .byte   0                       # DW_CHILDREN_no
-        .byte   3                       # DW_AT_name
-        .byte   14                      # DW_FORM_strp
-        .byte   73                      # DW_AT_type
-        .byte   19                      # DW_FORM_ref4
-        .byte   2                       # DW_AT_location
-        .byte   24                      # DW_FORM_exprloc
-        .byte   0                       # EOM(1)
-        .byte   0                       # EOM(2)
-        .byte   3                       # Abbreviation Code
-        .byte   19                      # DW_TAG_structure_type
-        .byte   1                       # DW_CHILDREN_yes
-        .byte   3                       # DW_AT_name
-        .byte   14                      # DW_FORM_strp
-        .byte   0                       # EOM(1)
-        .byte   0                       # EOM(2)
-        .byte   4                       # Abbreviation Code
-        .byte   13                      # DW_TAG_member
-        .byte   0                       # DW_CHILDREN_no
-        .byte   3                       # DW_AT_name
-        .byte   14                      # DW_FORM_strp
-        .byte   73                      # DW_AT_type
-        .byte   19                      # DW_FORM_ref4
-        .byte   0                       # EOM(1)
-        .byte   0                       # EOM(2)
-        .byte   5                       # Abbreviation Code
-        .byte   19                      # DW_TAG_structure_type
-        .byte   0                       # DW_CHILDREN_no
-        .byte   60                      # DW_AT_declaration
-        .byte   25                      # DW_FORM_flag_present
-        .byte   3                       # DW_AT_name
-        .byte   14                      # DW_FORM_strp
-        .byte   0                       # EOM(1)
-        .byte   0                       # EOM(2)
-        .byte   0                       # EOM(3)
-
-        .section        .debug_info,"",@progbits
-.Lcu_begin0:
-        .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
-.Ldebug_info_start0:
-        .short  4                       # DWARF version number
-        .long   .debug_abbrev           # Offset Into Abbrev. Section
-        .byte   8                       # Address Size (in bytes)
-        .byte   1                       # Abbrev [1] 0xb:0x46 DW_TAG_compile_unit
-        .long   .Linfo_string0          # DW_AT_producer
-        .long   .Lcu_name               # DW_AT_name
-        .long   .Lcu_compdir            # DW_AT_comp_dir
-        .byte   2                       # Abbrev [2] 0x1e:0x15 DW_TAG_variable
-        .long   .Lb                     # DW_AT_name
-        .long   .LB_die-.Lcu_begin0     # DW_AT_type
-        .byte   9                       # DW_AT_location
-        .byte   3
-        .quad   b
-.LB_die:
-        .byte   3                       # Abbrev [3] 0x33:0x15 DW_TAG_structure_type
-        .long   .LB                     # DW_AT_name
-        .byte   4                       # Abbrev [4] 0x3b:0xc DW_TAG_member
-        .long   .La                     # DW_AT_name
-        .long   .LA_die-.Lcu_begin0     # DW_AT_type
-        .byte   0                       # End Of Children Mark
-.LA_die:
-        .byte   5                       # Abbrev [5] 0x48:0x8 DW_TAG_structure_type
-                                        # DW_AT_declaration
-        .long   .LA                     # DW_AT_name
-        .byte   0                       # End Of Children Mark
-.Ldebug_info_end0:
diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp
index 892f9b6e8de00a..bedf31dc8179fa 100644
--- a/llvm/lib/ObjectYAML/DWARFYAML.cpp
+++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp
@@ -42,6 +42,10 @@ SetVector<StringRef> DWARFYAML::Data::getUsedSectionNames() const {
     SecNames.insert("debug_pubnames");
   if (PubTypes)
     SecNames.insert("debug_pubtypes");
+  if (GNUPubNames)
+    SecNames.insert("debug_gnu_pubnames");
+  if (GNUPubTypes)
+    SecNames.insert("debug_gnu_pubtypes");
   return SecNames;
 }
 
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index c985bd24a4de62..218e7df8e39a51 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -947,6 +947,12 @@ Expected<uint64_t> emitDWARF(typename ELFT::Shdr &SHeader, StringRef Name,
     Err = DWARFYAML::emitPubSection(*OS, *DWARF.PubNames, DWARF.IsLittleEndian);
   else if (Name == ".debug_pubtypes")
     Err = DWARFYAML::emitPubSection(*OS, *DWARF.PubTypes, DWARF.IsLittleEndian);
+  else if (Name == ".debug_gnu_pubnames")
+    Err = DWARFYAML::emitPubSection(*OS, *DWARF.GNUPubNames,
+                                    DWARF.IsLittleEndian, /*IsGNUStyle=*/true);
+  else if (Name == ".debug_gnu_pubtypes")
+    Err = DWARFYAML::emitPubSection(*OS, *DWARF.GNUPubTypes,
+                                    DWARF.IsLittleEndian, /*IsGNUStyle=*/true);
   else
     llvm_unreachable("unexpected emitDWARF() call");
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0c3c7c74458f34..aaeb6b45991545 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -838,6 +838,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::UADDSAT, VT, Legal);
       setOperationAction(ISD::SSUBSAT, VT, Legal);
       setOperationAction(ISD::USUBSAT, VT, Legal);
+
+      setOperationAction(ISD::TRUNCATE, VT, Custom);
     }
     for (MVT VT : { MVT::v4f16, MVT::v2f32,
                     MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
@@ -1432,6 +1434,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::FCMLTz)
     MAKE_CASE(AArch64ISD::SADDV)
     MAKE_CASE(AArch64ISD::UADDV)
+    MAKE_CASE(AArch64ISD::SRHADD)
+    MAKE_CASE(AArch64ISD::URHADD)
     MAKE_CASE(AArch64ISD::SMINV)
     MAKE_CASE(AArch64ISD::UMINV)
     MAKE_CASE(AArch64ISD::SMAXV)
@@ -3260,6 +3264,14 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
                        Op.getOperand(3));
   }
+
+  case Intrinsic::aarch64_neon_srhadd:
+  case Intrinsic::aarch64_neon_urhadd: {
+    bool IsSignedAdd = IntNo == Intrinsic::aarch64_neon_srhadd;
+    unsigned Opcode = IsSignedAdd ? AArch64ISD::SRHADD : AArch64ISD::URHADD;
+    return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
+                       Op.getOperand(2));
+  }
   }
 }
 
@@ -3524,6 +3536,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerDYNAMIC_STACKALLOC(Op, DAG);
   case ISD::VSCALE:
     return LowerVSCALE(Op, DAG);
+  case ISD::TRUNCATE:
+    return LowerTRUNCATE(Op, DAG);
   case ISD::LOAD:
     if (useSVEForFixedLengthVectorVT(Op.getValueType()))
       return LowerFixedLengthVectorLoadToSVE(Op, DAG);
@@ -8773,6 +8787,78 @@ static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
   return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
 }
 
+// Attempt to form urhadd(OpA, OpB) from
+// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1)).
+// The original form of this expression is
+// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and before this function
+// is called the srl will have been lowered to AArch64ISD::VLSHR and the
+// ((OpA + OpB + 1) >> 1) expression will have been changed to (OpB - (~OpA)).
+// This pass can also recognize a variant of this pattern that uses sign
+// extension instead of zero extension and form a srhadd(OpA, OpB) from it.
+SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+
+  if (!VT.isVector() || VT.isScalableVector())
+    return Op;
+
+  // Since we are looking for a right shift by a constant value of 1 and we are
+  // operating on types at least 16 bits in length (sign/zero extended OpA and
+  // OpB, which are at least 8 bits), it follows that the truncate will always
+  // discard the shifted-in bit and therefore the right shift will be logical
+  // regardless of the signedness of OpA and OpB.
+  SDValue Shift = Op.getOperand(0);
+  if (Shift.getOpcode() != AArch64ISD::VLSHR)
+    return Op;
+
+  // Is the right shift using an immediate value of 1?
+  uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
+  if (ShiftAmount != 1)
+    return Op;
+
+  SDValue Sub = Shift->getOperand(0);
+  if (Sub.getOpcode() != ISD::SUB)
+    return Op;
+
+  SDValue Xor = Sub.getOperand(1);
+  if (Xor.getOpcode() != ISD::XOR)
+    return Op;
+
+  SDValue ExtendOpA = Xor.getOperand(0);
+  SDValue ExtendOpB = Sub.getOperand(0);
+  unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
+  unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
+  if (!(ExtendOpAOpc == ExtendOpBOpc &&
+        (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
+    return Op;
+
+  // Is the result of the right shift being truncated to the same value type as
+  // the original operands, OpA and OpB?
+  SDValue OpA = ExtendOpA.getOperand(0);
+  SDValue OpB = ExtendOpB.getOperand(0);
+  EVT OpAVT = OpA.getValueType();
+  assert(ExtendOpA.getValueType() == ExtendOpB.getValueType());
+  if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
+    return Op;
+
+  // Is the XOR using a constant amount of all ones in the right hand side?
+  uint64_t C;
+  if (!isAllConstantBuildVector(Xor.getOperand(1), C))
+    return Op;
+
+  unsigned ElemSizeInBits = VT.getScalarSizeInBits();
+  APInt CAsAPInt(ElemSizeInBits, C);
+  if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
+    return Op;
+
+  SDLoc DL(Op);
+  bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
+  unsigned RHADDOpc = IsSignExtend ? AArch64ISD::SRHADD : AArch64ISD::URHADD;
+  SDValue ResultURHADD = DAG.getNode(RHADDOpc, DL, VT, OpA, OpB);
+
+  return ResultURHADD;
+}
+
 SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
                                                       SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
@@ -10982,6 +11068,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+  unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
 
   // Optimize concat_vectors of truncated vectors, where the intermediate
   // type is illegal, to avoid said illegality,  e.g.,
@@ -10994,9 +11081,8 @@ static SDValue performConcatVectorsCombine(SDNode *N,
   // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
   // on both input and result type, so we might generate worse code.
   // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
-  if (N->getNumOperands() == 2 &&
-      N0->getOpcode() == ISD::TRUNCATE &&
-      N1->getOpcode() == ISD::TRUNCATE) {
+  if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
+      N1Opc == ISD::TRUNCATE) {
     SDValue N00 = N0->getOperand(0);
     SDValue N10 = N1->getOperand(0);
     EVT N00VT = N00.getValueType();
@@ -11021,6 +11107,52 @@ static SDValue performConcatVectorsCombine(SDNode *N,
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
+  // Optimise concat_vectors of two [us]rhadds that use extracted subvectors
+  // from the same original vectors. Combine these into a single [us]rhadd that
+  // operates on the two original vectors. Example:
+  //  (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
+  //                                        extract_subvector (v16i8 OpB,
+  //                                        <0>))),
+  //                         (v8i8 (urhadd (extract_subvector (v16i8 OpA, <8>),
+  //                                        extract_subvector (v16i8 OpB,
+  //                                        <8>)))))
+  // ->
+  //  (v16i8(urhadd(v16i8 OpA, v16i8 OpB)))
+  if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
+      (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD)) {
+    SDValue N00 = N0->getOperand(0);
+    SDValue N01 = N0->getOperand(1);
+    SDValue N10 = N1->getOperand(0);
+    SDValue N11 = N1->getOperand(1);
+
+    EVT N00VT = N00.getValueType();
+    EVT N10VT = N10.getValueType();
+
+    if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+        N01->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+        N10->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+        N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) {
+      SDValue N00Source = N00->getOperand(0);
+      SDValue N01Source = N01->getOperand(0);
+      SDValue N10Source = N10->getOperand(0);
+      SDValue N11Source = N11->getOperand(0);
+
+      if (N00Source == N10Source && N01Source == N11Source &&
+          N00Source.getValueType() == VT && N01Source.getValueType() == VT) {
+        assert(N0.getValueType() == N1.getValueType());
+
+        uint64_t N00Index = N00.getConstantOperandVal(1);
+        uint64_t N01Index = N01.getConstantOperandVal(1);
+        uint64_t N10Index = N10.getConstantOperandVal(1);
+        uint64_t N11Index = N11.getConstantOperandVal(1);
+
+        if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
+            N10Index == N00VT.getVectorNumElements())
+          return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source);
+      }
+    }
+  }
+
   // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
   // splat. The indexed instructions are going to be expecting a DUPLANE64, so
   // canonicalise to that.
@@ -11039,7 +11171,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
   // becomes
   //    (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
 
-  if (N1->getOpcode() != ISD::BITCAST)
+  if (N1Opc != ISD::BITCAST)
     return SDValue();
   SDValue RHS = N1->getOperand(0);
   MVT RHSTy = RHS.getValueType().getSimpleVT();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index bc59f0aedc4bf8..210b8c84270193 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -187,6 +187,10 @@ enum NodeType : unsigned {
   SADDV,
   UADDV,
 
+  // Vector rounding halving addition
+  SRHADD,
+  URHADD,
+
   // Vector across-lanes min/max
   // Only the lower result lane is defined.
   SMINV,
@@ -863,6 +867,7 @@ class AArch64TargetLowering : public TargetLowering {
   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 276573f82d660c..f4a5f639e4973d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -554,6 +554,9 @@ def AArch64uminv    : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
 def AArch64smaxv    : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
 def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
 
+def AArch64srhadd   : SDNode<"AArch64ISD::SRHADD", SDT_AArch64binvec>;
+def AArch64urhadd   : SDNode<"AArch64ISD::URHADD", SDT_AArch64binvec>;
+
 def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
 def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -4073,7 +4076,7 @@ defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrd
 defm SQRSHL   : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
 defm SQSHL    : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
 defm SQSUB    : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
-defm SRHADD   : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>;
+defm SRHADD   : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", AArch64srhadd>;
 defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
 defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
 defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
@@ -4090,7 +4093,7 @@ defm UQADD    : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
 defm UQRSHL   : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
 defm UQSHL    : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
 defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
-defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>;
+defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", AArch64urhadd>;
 defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
 defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
 defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 8d9ebe4572315b..7b3c503facf18e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -377,29 +377,31 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
     break;
   }
   case Instruction::Shl: {
-    // If we are truncating the result of this SHL, and if it's a shift of a
-    // constant amount, we can always perform a SHL in a smaller type.
-    const APInt *Amt;
-    if (match(I->getOperand(1), m_APInt(Amt))) {
-      uint32_t BitWidth = Ty->getScalarSizeInBits();
-      if (Amt->getLimitedValue(BitWidth) < BitWidth)
-        return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
-    }
+    // If we are truncating the result of this SHL, and if it's a shift of an
+    // inrange amount, we can always perform a SHL in a smaller type.
+    uint32_t BitWidth = Ty->getScalarSizeInBits();
+    KnownBits AmtKnownBits =
+        llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
+    if (AmtKnownBits.getMaxValue().ult(BitWidth))
+      return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+             canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
     break;
   }
   case Instruction::LShr: {
     // If this is a truncate of a logical shr, we can truncate it to a smaller
     // lshr iff we know that the bits we would otherwise be shifting in are
     // already zeros.
-    const APInt *Amt;
-    if (match(I->getOperand(1), m_APInt(Amt))) {
-      uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
-      uint32_t BitWidth = Ty->getScalarSizeInBits();
-      if (Amt->getLimitedValue(BitWidth) < BitWidth &&
-          IC.MaskedValueIsZero(I->getOperand(0),
-            APInt::getBitsSetFrom(OrigBitWidth, BitWidth), 0, CxtI)) {
-        return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
-      }
+    // TODO: It is enough to check that the bits we would be shifting in are
+    //       zero - use AmtKnownBits.getMaxValue().
+    uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+    uint32_t BitWidth = Ty->getScalarSizeInBits();
+    KnownBits AmtKnownBits =
+        llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
+    APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
+    if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
+        IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
+      return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+             canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
     }
     break;
   }
@@ -409,15 +411,15 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
     // original type and the sign bit of the truncate type are similar.
     // TODO: It is enough to check that the bits we would be shifting in are
     //       similar to sign bit of the truncate type.
-    const APInt *Amt;
-    if (match(I->getOperand(1), m_APInt(Amt))) {
-      uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
-      uint32_t BitWidth = Ty->getScalarSizeInBits();
-      if (Amt->getLimitedValue(BitWidth) < BitWidth &&
-          OrigBitWidth - BitWidth <
-              IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
-        return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
-    }
+    uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+    uint32_t BitWidth = Ty->getScalarSizeInBits();
+    KnownBits AmtKnownBits =
+        llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
+    unsigned ShiftedBits = OrigBitWidth - BitWidth;
+    if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
+        ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
+      return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+             canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
     break;
   }
   case Instruction::Trunc:
diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
index cd650e1debf865..a5d223cc8aefbe 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
@@ -1,8 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @shadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: shadd8b:
-;CHECK: shadd.8b
+; CHECK-LABEL: shadd8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    shadd.8b v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -10,8 +15,12 @@ define <8 x i8> @shadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <16 x i8> @shadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: shadd16b:
-;CHECK: shadd.16b
+; CHECK-LABEL: shadd16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    shadd.16b v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -19,8 +28,12 @@ define <16 x i8> @shadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @shadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: shadd4h:
-;CHECK: shadd.4h
+; CHECK-LABEL: shadd4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    shadd.4h v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -28,8 +41,12 @@ define <4 x i16> @shadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <8 x i16> @shadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: shadd8h:
-;CHECK: shadd.8h
+; CHECK-LABEL: shadd8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    shadd.8h v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -37,8 +54,12 @@ define <8 x i16> @shadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @shadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: shadd2s:
-;CHECK: shadd.2s
+; CHECK-LABEL: shadd2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    shadd.2s v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -46,8 +67,12 @@ define <2 x i32> @shadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <4 x i32> @shadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: shadd4s:
-;CHECK: shadd.4s
+; CHECK-LABEL: shadd4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    shadd.4s v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -55,8 +80,12 @@ define <4 x i32> @shadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @uhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: uhadd8b:
-;CHECK: uhadd.8b
+; CHECK-LABEL: uhadd8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    uhadd.8b v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -64,8 +93,12 @@ define <8 x i8> @uhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <16 x i8> @uhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: uhadd16b:
-;CHECK: uhadd.16b
+; CHECK-LABEL: uhadd16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    uhadd.16b v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -73,8 +106,12 @@ define <16 x i8> @uhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @uhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: uhadd4h:
-;CHECK: uhadd.4h
+; CHECK-LABEL: uhadd4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    uhadd.4h v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -82,8 +119,12 @@ define <4 x i16> @uhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <8 x i16> @uhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: uhadd8h:
-;CHECK: uhadd.8h
+; CHECK-LABEL: uhadd8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    uhadd.8h v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -91,8 +132,12 @@ define <8 x i16> @uhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @uhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: uhadd2s:
-;CHECK: uhadd.2s
+; CHECK-LABEL: uhadd2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    uhadd.2s v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -100,8 +145,12 @@ define <2 x i32> @uhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <4 x i32> @uhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: uhadd4s:
-;CHECK: uhadd.4s
+; CHECK-LABEL: uhadd4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    uhadd.4s v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -125,8 +174,12 @@ declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) nounwind
 declare <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i8> @srhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: srhadd8b:
-;CHECK: srhadd.8b
+; CHECK-LABEL: srhadd8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    srhadd.8b v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -134,8 +187,12 @@ define <8 x i8> @srhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <16 x i8> @srhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: srhadd16b:
-;CHECK: srhadd.16b
+; CHECK-LABEL: srhadd16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    srhadd.16b v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -143,8 +200,12 @@ define <16 x i8> @srhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @srhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: srhadd4h:
-;CHECK: srhadd.4h
+; CHECK-LABEL: srhadd4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    srhadd.4h v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -152,8 +213,12 @@ define <4 x i16> @srhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <8 x i16> @srhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: srhadd8h:
-;CHECK: srhadd.8h
+; CHECK-LABEL: srhadd8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    srhadd.8h v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -161,8 +226,12 @@ define <8 x i16> @srhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @srhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: srhadd2s:
-;CHECK: srhadd.2s
+; CHECK-LABEL: srhadd2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    srhadd.2s v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -170,8 +239,12 @@ define <2 x i32> @srhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <4 x i32> @srhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: srhadd4s:
-;CHECK: srhadd.4s
+; CHECK-LABEL: srhadd4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    srhadd.4s v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -179,8 +252,12 @@ define <4 x i32> @srhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @urhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: urhadd8b:
-;CHECK: urhadd.8b
+; CHECK-LABEL: urhadd8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    urhadd.8b v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -188,8 +265,12 @@ define <8 x i8> @urhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <16 x i8> @urhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: urhadd16b:
-;CHECK: urhadd.16b
+; CHECK-LABEL: urhadd16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    urhadd.16b v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -197,8 +278,12 @@ define <16 x i8> @urhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @urhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: urhadd4h:
-;CHECK: urhadd.4h
+; CHECK-LABEL: urhadd4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    urhadd.4h v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -206,8 +291,12 @@ define <4 x i16> @urhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <8 x i16> @urhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: urhadd8h:
-;CHECK: urhadd.8h
+; CHECK-LABEL: urhadd8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    urhadd.8h v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -215,8 +304,12 @@ define <8 x i16> @urhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @urhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: urhadd2s:
-;CHECK: urhadd.2s
+; CHECK-LABEL: urhadd2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    urhadd.2s v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -224,14 +317,210 @@ define <2 x i32> @urhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <4 x i32> @urhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: urhadd4s:
-;CHECK: urhadd.4s
+; CHECK-LABEL: urhadd4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    urhadd.4s v0, v0, v1
+; CHECK-NEXT:    ret
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
+define void @testLowerToSRHADD8b(<8 x i8> %src1, <8 x i8> %src2, <8 x i8>* %dest) nounwind {
+; CHECK-LABEL: testLowerToSRHADD8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srhadd.8b v0, v0, v1
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
+  %sextsrc1 = sext <8 x i8> %src1 to <8 x i16>
+  %sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
+  %add1 = add <8 x i16> %sextsrc1, %sextsrc2
+  %add2 = add <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %resulti16 = lshr <8 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <8 x i16> %resulti16 to <8 x i8>
+  store <8 x i8> %result, <8 x i8>* %dest, align 8
+  ret void
+}
+
+define void @testLowerToSRHADD4h(<4 x i16> %src1, <4 x i16> %src2, <4 x i16>* %dest) nounwind {
+; CHECK-LABEL: testLowerToSRHADD4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srhadd.4h v0, v0, v1
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
+  %sextsrc1 = sext <4 x i16> %src1 to <4 x i32>
+  %sextsrc2 = sext <4 x i16> %src2 to <4 x i32>
+  %add1 = add <4 x i32> %sextsrc1, %sextsrc2
+  %add2 = add <4 x i32> %add1, <i32 1, i32 1, i32 1, i32 1>
+  %resulti16 = lshr <4 x i32> %add2, <i32 1, i32 1, i32 1, i32 1>
+  %result = trunc <4 x i32> %resulti16 to <4 x i16>
+  store <4 x i16> %result, <4 x i16>* %dest, align 8
+  ret void
+}
+
+define void @testLowerToSRHADD2s(<2 x i32> %src1, <2 x i32> %src2, <2 x i32>* %dest) nounwind {
+; CHECK-LABEL: testLowerToSRHADD2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srhadd.2s v0, v0, v1
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
+  %sextsrc1 = sext <2 x i32> %src1 to <2 x i64>
+  %sextsrc2 = sext <2 x i32> %src2 to <2 x i64>
+  %add1 = add <2 x i64> %sextsrc1, %sextsrc2
+  %add2 = add <2 x i64> %add1, <i64 1, i64 1>
+  %resulti16 = lshr <2 x i64> %add2, <i64 1, i64 1>
+  %result = trunc <2 x i64> %resulti16 to <2 x i32>
+  store <2 x i32> %result, <2 x i32>* %dest, align 8
+  ret void
+}
+
+define void @testLowerToSRHADD16b(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
+; CHECK-LABEL: testLowerToSRHADD16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srhadd.16b v0, v0, v1
+; CHECK-NEXT:    str q0, [x0]
+; CHECK-NEXT:    ret
+  %sextsrc1 = sext <16 x i8> %src1 to <16 x i16>
+  %sextsrc2 = sext <16 x i8> %src2 to <16 x i16>
+  %add1 = add <16 x i16> %sextsrc1, %sextsrc2
+  %add2 = add <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %resulti16 = lshr <16 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <16 x i16> %resulti16 to <16 x i8>
+  store <16 x i8> %result, <16 x i8>* %dest, align 16
+  ret void
+}
+
+define void @testLowerToSRHADD8h(<8 x i16> %src1, <8 x i16> %src2, <8 x i16>* %dest) nounwind {
+; CHECK-LABEL: testLowerToSRHADD8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srhadd.8h v0, v0, v1
+; CHECK-NEXT:    str q0, [x0]
+; CHECK-NEXT:    ret
+  %sextsrc1 = sext <8 x i16> %src1 to <8 x i32>
+  %sextsrc2 = sext <8 x i16> %src2 to <8 x i32>
+  %add1 = add <8 x i32> %sextsrc1, %sextsrc2
+  %add2 = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %resulti16 = lshr <8 x i32> %add2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %result = trunc <8 x i32> %resulti16 to <8 x i16>
+  store <8 x i16> %result, <8 x i16>* %dest, align 16
+  ret void
+}
+
+define void @testLowerToSRHADD4s(<4 x i32> %src1, <4 x i32> %src2, <4 x i32>* %dest) nounwind {
+; CHECK-LABEL: testLowerToSRHADD4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srhadd.4s v0, v0, v1
+; CHECK-NEXT:    str q0, [x0]
+; CHECK-NEXT:    ret
+  %sextsrc1 = sext <4 x i32> %src1 to <4 x i64>
+  %sextsrc2 = sext <4 x i32> %src2 to <4 x i64>
+  %add1 = add <4 x i64> %sextsrc1, %sextsrc2
+  %add2 = add <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1>
+  %resulti16 = lshr <4 x i64> %add2, <i64 1, i64 1, i64 1, i64 1>
+  %result = trunc <4 x i64> %resulti16 to <4 x i32>
+  store <4 x i32> %result, <4 x i32>* %dest, align 16
+  ret void
+}
+
+define void @testLowerToURHADD8b(<8 x i8> %src1, <8 x i8> %src2, <8 x i8>* %dest) nounwind {
+; CHECK-LABEL: testLowerToURHADD8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urhadd.8b v0, v0, v1
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <8 x i8> %src1 to <8 x i16>
+  %zextsrc2 = zext <8 x i8> %src2 to <8 x i16>
+  %add1 = add <8 x i16> %zextsrc1, %zextsrc2
+  %add2 = add <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %resulti16 = lshr <8 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <8 x i16> %resulti16 to <8 x i8>
+  store <8 x i8> %result, <8 x i8>* %dest, align 8
+  ret void
+}
+
+define void @testLowerToURHADD4h(<4 x i16> %src1, <4 x i16> %src2, <4 x i16>* %dest) nounwind {
+; CHECK-LABEL: testLowerToURHADD4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urhadd.4h v0, v0, v1
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
+  %zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
+  %add1 = add <4 x i32> %zextsrc1, %zextsrc2
+  %add2 = add <4 x i32> %add1, <i32 1, i32 1, i32 1, i32 1>
+  %resulti16 = lshr <4 x i32> %add2, <i32 1, i32 1, i32 1, i32 1>
+  %result = trunc <4 x i32> %resulti16 to <4 x i16>
+  store <4 x i16> %result, <4 x i16>* %dest, align 8
+  ret void
+}
+
+define void @testLowerToURHADD2s(<2 x i32> %src1, <2 x i32> %src2, <2 x i32>* %dest) nounwind {
+; CHECK-LABEL: testLowerToURHADD2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urhadd.2s v0, v0, v1
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <2 x i32> %src1 to <2 x i64>
+  %zextsrc2 = zext <2 x i32> %src2 to <2 x i64>
+  %add1 = add <2 x i64> %zextsrc1, %zextsrc2
+  %add2 = add <2 x i64> %add1, <i64 1, i64 1>
+  %resulti16 = lshr <2 x i64> %add2, <i64 1, i64 1>
+  %result = trunc <2 x i64> %resulti16 to <2 x i32>
+  store <2 x i32> %result, <2 x i32>* %dest, align 8
+  ret void
+}
+
+define void @testLowerToURHADD16b(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
+; CHECK-LABEL: testLowerToURHADD16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urhadd.16b v0, v0, v1
+; CHECK-NEXT:    str q0, [x0]
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <16 x i8> %src1 to <16 x i16>
+  %zextsrc2 = zext <16 x i8> %src2 to <16 x i16>
+  %add1 = add <16 x i16> %zextsrc1, %zextsrc2
+  %add2 = add <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %resulti16 = lshr <16 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <16 x i16> %resulti16 to <16 x i8>
+  store <16 x i8> %result, <16 x i8>* %dest, align 16
+  ret void
+}
+
+define void @testLowerToURHADD8h(<8 x i16> %src1, <8 x i16> %src2, <8 x i16>* %dest) nounwind {
+; CHECK-LABEL: testLowerToURHADD8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urhadd.8h v0, v0, v1
+; CHECK-NEXT:    str q0, [x0]
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
+  %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
+  %add1 = add <8 x i32> %zextsrc1, %zextsrc2
+  %add2 = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %resulti16 = lshr <8 x i32> %add2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %result = trunc <8 x i32> %resulti16 to <8 x i16>
+  store <8 x i16> %result, <8 x i16>* %dest, align 16
+  ret void
+}
+
+define void @testLowerToURHADD4s(<4 x i32> %src1, <4 x i32> %src2, <4 x i32>* %dest) nounwind {
+; CHECK-LABEL: testLowerToURHADD4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urhadd.4s v0, v0, v1
+; CHECK-NEXT:    str q0, [x0]
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
+  %zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
+  %add1 = add <4 x i64> %zextsrc1, %zextsrc2
+  %add2 = add <4 x i64> %add1, <i64 1, i64 1, i64 1, i64 1>
+  %resulti16 = lshr <4 x i64> %add2, <i64 1, i64 1, i64 1, i64 1>
+  %result = trunc <4 x i64> %resulti16 to <4 x i32>
+  store <4 x i32> %result, <4 x i32>* %dest, align 16
+  ret void
+}
+
 declare <8 x i8>  @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
 declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
diff --git a/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll b/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
index 999b5d58f43898..89e4a3c1aaed63 100644
--- a/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
+++ b/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
@@ -35,12 +35,10 @@ define <2 x i16> @test1_vec(<2 x i16> %a) {
 
 define <2 x i16> @test1_vec_nonuniform(<2 x i16> %a) {
 ; CHECK-LABEL: @test1_vec_nonuniform(
-; CHECK-NEXT:    [[B:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], <i32 8, i32 9>
-; CHECK-NEXT:    [[D:%.*]] = mul nuw nsw <2 x i32> [[B]], <i32 5, i32 6>
-; CHECK-NEXT:    [[E:%.*]] = or <2 x i32> [[C]], [[D]]
-; CHECK-NEXT:    [[F:%.*]] = trunc <2 x i32> [[E]] to <2 x i16>
-; CHECK-NEXT:    ret <2 x i16> [[F]]
+; CHECK-NEXT:    [[C:%.*]] = lshr <2 x i16> [[A:%.*]], <i16 8, i16 9>
+; CHECK-NEXT:    [[D:%.*]] = mul <2 x i16> [[A]], <i16 5, i16 6>
+; CHECK-NEXT:    [[E:%.*]] = or <2 x i16> [[C]], [[D]]
+; CHECK-NEXT:    ret <2 x i16> [[E]]
 ;
   %b = zext <2 x i16> %a to <2 x i32>
   %c = lshr <2 x i32> %b, <i32 8, i32 9>
diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
index 10d59bfff57f47..18b411103122c0 100644
--- a/llvm/test/Transforms/InstCombine/cast.ll
+++ b/llvm/test/Transforms/InstCombine/cast.ll
@@ -502,12 +502,10 @@ define <2 x i16> @test40vec(<2 x i16> %a) {
 
 define <2 x i16> @test40vec_nonuniform(<2 x i16> %a) {
 ; ALL-LABEL: @test40vec_nonuniform(
-; ALL-NEXT:    [[T:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32>
-; ALL-NEXT:    [[T21:%.*]] = lshr <2 x i32> [[T]], <i32 9, i32 10>
-; ALL-NEXT:    [[T5:%.*]] = shl <2 x i32> [[T]], <i32 8, i32 9>
-; ALL-NEXT:    [[T32:%.*]] = or <2 x i32> [[T21]], [[T5]]
-; ALL-NEXT:    [[R:%.*]] = trunc <2 x i32> [[T32]] to <2 x i16>
-; ALL-NEXT:    ret <2 x i16> [[R]]
+; ALL-NEXT:    [[T21:%.*]] = lshr <2 x i16> [[A:%.*]], <i16 9, i16 10>
+; ALL-NEXT:    [[T5:%.*]] = shl <2 x i16> [[A]], <i16 8, i16 9>
+; ALL-NEXT:    [[T32:%.*]] = or <2 x i16> [[T21]], [[T5]]
+; ALL-NEXT:    ret <2 x i16> [[T32]]
 ;
   %t = zext <2 x i16> %a to <2 x i32>
   %t21 = lshr <2 x i32> %t, <i32 9, i32 10>
diff --git a/llvm/test/Transforms/InstCombine/trunc.ll b/llvm/test/Transforms/InstCombine/trunc.ll
index 4e9f440978a59b..d8a615cc4c9a38 100644
--- a/llvm/test/Transforms/InstCombine/trunc.ll
+++ b/llvm/test/Transforms/InstCombine/trunc.ll
@@ -286,12 +286,11 @@ define <2 x i64> @test8_vec(<2 x i32> %A, <2 x i32> %B) {
 
 define <2 x i64> @test8_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test8_vec_nonuniform(
-; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128>
-; CHECK-NEXT:    [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128>
-; CHECK-NEXT:    [[E:%.*]] = shl <2 x i128> [[D]], <i128 32, i128 48>
-; CHECK-NEXT:    [[F:%.*]] = or <2 x i128> [[E]], [[C]]
-; CHECK-NEXT:    [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64>
-; CHECK-NEXT:    ret <2 x i64> [[G]]
+; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[E:%.*]] = shl <2 x i64> [[D]], <i64 32, i64 48>
+; CHECK-NEXT:    [[F:%.*]] = or <2 x i64> [[E]], [[C]]
+; CHECK-NEXT:    ret <2 x i64> [[F]]
 ;
   %C = zext <2 x i32> %A to <2 x i128>
   %D = zext <2 x i32> %B to <2 x i128>
@@ -343,12 +342,11 @@ define i8 @test10(i32 %X) {
 
 define i64 @test11(i32 %A, i32 %B) {
 ; CHECK-LABEL: @test11(
-; CHECK-NEXT:    [[C:%.*]] = zext i32 [[A:%.*]] to i128
+; CHECK-NEXT:    [[C:%.*]] = zext i32 [[A:%.*]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = zext i32 [[TMP1]] to i128
-; CHECK-NEXT:    [[F:%.*]] = shl i128 [[C]], [[E]]
-; CHECK-NEXT:    [[G:%.*]] = trunc i128 [[F]] to i64
-; CHECK-NEXT:    ret i64 [[G]]
+; CHECK-NEXT:    [[E:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[F:%.*]] = shl i64 [[C]], [[E]]
+; CHECK-NEXT:    ret i64 [[F]]
 ;
   %C = zext i32 %A to i128
   %D = zext i32 %B to i128
@@ -360,12 +358,11 @@ define i64 @test11(i32 %A, i32 %B) {
 
 define <2 x i64> @test11_vec(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test11_vec(
-; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128>
+; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128>
-; CHECK-NEXT:    [[F:%.*]] = shl <2 x i128> [[C]], [[E]]
-; CHECK-NEXT:    [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64>
-; CHECK-NEXT:    ret <2 x i64> [[G]]
+; CHECK-NEXT:    [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    [[F:%.*]] = shl <2 x i64> [[C]], [[E]]
+; CHECK-NEXT:    ret <2 x i64> [[F]]
 ;
   %C = zext <2 x i32> %A to <2 x i128>
   %D = zext <2 x i32> %B to <2 x i128>
@@ -377,12 +374,11 @@ define <2 x i64> @test11_vec(<2 x i32> %A, <2 x i32> %B) {
 
 define <2 x i64> @test11_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test11_vec_nonuniform(
-; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128>
+; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 15>
-; CHECK-NEXT:    [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128>
-; CHECK-NEXT:    [[F:%.*]] = shl <2 x i128> [[C]], [[E]]
-; CHECK-NEXT:    [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64>
-; CHECK-NEXT:    ret <2 x i64> [[G]]
+; CHECK-NEXT:    [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    [[F:%.*]] = shl <2 x i64> [[C]], [[E]]
+; CHECK-NEXT:    ret <2 x i64> [[F]]
 ;
   %C = zext <2 x i32> %A to <2 x i128>
   %D = zext <2 x i32> %B to <2 x i128>
@@ -411,12 +407,11 @@ define <2 x i64> @test11_vec_undef(<2 x i32> %A, <2 x i32> %B) {
 
 define i64 @test12(i32 %A, i32 %B) {
 ; CHECK-LABEL: @test12(
-; CHECK-NEXT:    [[C:%.*]] = zext i32 [[A:%.*]] to i128
+; CHECK-NEXT:    [[C:%.*]] = zext i32 [[A:%.*]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = zext i32 [[TMP1]] to i128
-; CHECK-NEXT:    [[F:%.*]] = lshr i128 [[C]], [[E]]
-; CHECK-NEXT:    [[G:%.*]] = trunc i128 [[F]] to i64
-; CHECK-NEXT:    ret i64 [[G]]
+; CHECK-NEXT:    [[E:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[F:%.*]] = lshr i64 [[C]], [[E]]
+; CHECK-NEXT:    ret i64 [[F]]
 ;
   %C = zext i32 %A to i128
   %D = zext i32 %B to i128
@@ -428,12 +423,11 @@ define i64 @test12(i32 %A, i32 %B) {
 
 define <2 x i64> @test12_vec(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test12_vec(
-; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128>
+; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128>
-; CHECK-NEXT:    [[F:%.*]] = lshr <2 x i128> [[C]], [[E]]
-; CHECK-NEXT:    [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64>
-; CHECK-NEXT:    ret <2 x i64> [[G]]
+; CHECK-NEXT:    [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    [[F:%.*]] = lshr <2 x i64> [[C]], [[E]]
+; CHECK-NEXT:    ret <2 x i64> [[F]]
 ;
   %C = zext <2 x i32> %A to <2 x i128>
   %D = zext <2 x i32> %B to <2 x i128>
@@ -445,12 +439,11 @@ define <2 x i64> @test12_vec(<2 x i32> %A, <2 x i32> %B) {
 
 define <2 x i64> @test12_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test12_vec_nonuniform(
-; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128>
+; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 15>
-; CHECK-NEXT:    [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128>
-; CHECK-NEXT:    [[F:%.*]] = lshr <2 x i128> [[C]], [[E]]
-; CHECK-NEXT:    [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64>
-; CHECK-NEXT:    ret <2 x i64> [[G]]
+; CHECK-NEXT:    [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    [[F:%.*]] = lshr <2 x i64> [[C]], [[E]]
+; CHECK-NEXT:    ret <2 x i64> [[F]]
 ;
   %C = zext <2 x i32> %A to <2 x i128>
   %D = zext <2 x i32> %B to <2 x i128>
@@ -479,12 +472,11 @@ define <2 x i64> @test12_vec_undef(<2 x i32> %A, <2 x i32> %B) {
 
 define i64 @test13(i32 %A, i32 %B) {
 ; CHECK-LABEL: @test13(
-; CHECK-NEXT:    [[C:%.*]] = sext i32 [[A:%.*]] to i128
+; CHECK-NEXT:    [[C:%.*]] = sext i32 [[A:%.*]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = zext i32 [[TMP1]] to i128
-; CHECK-NEXT:    [[F:%.*]] = ashr i128 [[C]], [[E]]
-; CHECK-NEXT:    [[G:%.*]] = trunc i128 [[F]] to i64
-; CHECK-NEXT:    ret i64 [[G]]
+; CHECK-NEXT:    [[E:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[F:%.*]] = ashr i64 [[C]], [[E]]
+; CHECK-NEXT:    ret i64 [[F]]
 ;
   %C = sext i32 %A to i128
   %D = zext i32 %B to i128
@@ -496,12 +488,11 @@ define i64 @test13(i32 %A, i32 %B) {
 
 define <2 x i64> @test13_vec(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test13_vec(
-; CHECK-NEXT:    [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i128>
+; CHECK-NEXT:    [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128>
-; CHECK-NEXT:    [[F:%.*]] = ashr <2 x i128> [[C]], [[E]]
-; CHECK-NEXT:    [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64>
-; CHECK-NEXT:    ret <2 x i64> [[G]]
+; CHECK-NEXT:    [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    [[F:%.*]] = ashr <2 x i64> [[C]], [[E]]
+; CHECK-NEXT:    ret <2 x i64> [[F]]
 ;
   %C = sext <2 x i32> %A to <2 x i128>
   %D = zext <2 x i32> %B to <2 x i128>
@@ -513,12 +504,11 @@ define <2 x i64> @test13_vec(<2 x i32> %A, <2 x i32> %B) {
 
 define <2 x i64> @test13_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test13_vec_nonuniform(
-; CHECK-NEXT:    [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i128>
+; CHECK-NEXT:    [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 15>
-; CHECK-NEXT:    [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i128>
-; CHECK-NEXT:    [[F:%.*]] = ashr <2 x i128> [[C]], [[E]]
-; CHECK-NEXT:    [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64>
-; CHECK-NEXT:    ret <2 x i64> [[G]]
+; CHECK-NEXT:    [[E:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    [[F:%.*]] = ashr <2 x i64> [[C]], [[E]]
+; CHECK-NEXT:    ret <2 x i64> [[F]]
 ;
   %C = sext <2 x i32> %A to <2 x i128>
   %D = zext <2 x i32> %B to <2 x i128>
diff --git a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-gnu-pubnames.yaml b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-gnu-pubnames.yaml
new file mode 100644
index 00000000000000..71c7981117554e
--- /dev/null
+++ b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-gnu-pubnames.yaml
@@ -0,0 +1,261 @@
+## Test that yaml2obj emits .debug_gnu_pubnames section.
+
+## a) Generate the '.debug_gnu_pubnames' section from the 'DWARF' entry.
+
+## Generate and verify a 32-bit little endian .debug_gnu_pubnames section.
+
+# RUN: yaml2obj --docnum=1 -DENDIAN=ELFDATA2LSB %s -o %t1.le.o
+# RUN: llvm-readobj --sections --section-data %t1.le.o | \
+# RUN:   FileCheck -DSIZE=32 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF32-LE
+
+#            SHDR: Index: 1
+#       SHDR-NEXT: Name: .debug_gnu_pubnames (1)
+#       SHDR-NEXT: Type: SHT_PROGBITS (0x1)
+#       SHDR-NEXT: Flags [ (0x0)
+#       SHDR-NEXT: ]
+#       SHDR-NEXT: Address: 0x0
+#       SHDR-NEXT: Offset: 0x40
+#       SHDR-NEXT: Size: [[SIZE]]
+#       SHDR-NEXT: Link: 0
+#       SHDR-NEXT: Info: 0
+#       SHDR-NEXT: AddressAlignment: [[ADDRALIGN]]
+#       SHDR-NEXT: EntrySize: 0
+# DWARF32-LE-NEXT: SectionData (
+# DWARF32-LE-NEXT:   0000: 34120000 02003412 00002143 00007856 |4.....4...!C..xV|
+##                         ^-------                            unit_length (4-byte)
+##                                  ^---                       version (2-byte)
+##                                      ^--------              debug_info_offset (4-byte)
+##                                               ^--------     debug_info_length (4-byte)
+##                                                        ^--- offset (4-byte)
+# DWARF32-LE-NEXT:   0010: 34123061 62630021 43658730 64656600 |4.0abc.!Ce.0def.|
+##                         ----
+##                             ^-                              descriptor (1-byte)
+##                               ^--------                     name "abc\0"
+##                                        ^--------            offset (4-byte)
+##                                                 ^-          descriptor (1-byte)
+##                                                    ^------- name "def\0"
+# DWARF32-LE-NEXT: )
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    [[ENDIAN]]
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+DWARF:
+  debug_gnu_pubnames:
+    Length:
+      TotalLength: 0x1234
+    Version:    2
+    UnitOffset: 0x1234
+    UnitSize:   0x4321
+    Entries:
+      - DieOffset:  0x12345678
+        Descriptor: 0x30
+        Name:       abc
+      - DieOffset:  0x87654321
+        Descriptor: 0x30
+        Name:       def
+
+## Generate and verify a 32-bit big endian .debug_gnu_pubnames section.
+
+# RUN: yaml2obj --docnum=1 -DENDIAN=ELFDATA2MSB %s -o %t1.be.o
+# RUN: llvm-readobj --sections --section-data %t1.be.o | \
+# RUN:   FileCheck -DSIZE=32 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF32-BE
+
+# DWARF32-BE-NEXT: SectionData (
+# DWARF32-BE-NEXT:   0000: 00001234 00020000 12340000 43211234 |...4.....4..C!.4|
+##                         ^-------                            unit_length (4-byte)
+##                                  ^---                       version (2-byte)
+##                                      ^--------              debug_info_offset (4-byte)
+##                                               ^--------     debug_info_length (4-byte)
+##                                                        ^--- offset (4-byte)
+# DWARF32-BE-NEXT:   0010: 56783061 62630087 65432130 64656600 |Vx0abc..eC!0def.|
+##                         ----
+##                             ^-                              descriptor (1-byte)
+##                               ^--------                     name "abc\0"
+##                                        ^--------            offset (4-byte)
+##                                                 ^-          descriptor (1-byte)
+##                                                    ^------- name "def\0"
+# DWARF32-BE-NEXT: )
+
+## b) Generate the .debug_gnu_pubnames section from raw section content.
+
+# RUN: yaml2obj --docnum=2 %s -o %t2.o
+# RUN: llvm-readobj --sections --section-data %t2.o | \
+# RUN:   FileCheck %s -DADDRALIGN=0 -DSIZE=3 --check-prefixes=SHDR,ARBITRARY-CONTENT
+
+#      ARBITRARY-CONTENT: SectionData (
+# ARBITRARY-CONTENT-NEXT:   0000: 112233
+# ARBITRARY-CONTENT-NEXT: )
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:    .debug_gnu_pubnames
+    Type:    SHT_PROGBITS
+    Content: "112233"
+
+## c) Generate the .debug_gnu_pubnames section when the "Size" is specified.
+
+# RUN: yaml2obj --docnum=3 %s -o %t3.o
+# RUN: llvm-readobj --sections --section-data %t3.o | \
+# RUN:   FileCheck -DSIZE=16 -DADDRALIGN=0 %s --check-prefixes=SHDR,SIZE
+
+#      SIZE: SectionData (
+# SIZE-NEXT:   0000: 00000000 00000000 00000000 00000000  |................|
+# SIZE-NEXT: )
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name: .debug_gnu_pubnames
+    Type: SHT_PROGBITS
+    Size: 0x10
+
+## d) Test that yaml2obj emits an error message when both the "Size" and the
+## "debug_gnu_pubnames" entry are specified at the same time.
+
+# RUN: not yaml2obj --docnum=4 %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR: yaml2obj: error: cannot specify section '.debug_gnu_pubnames' contents in the 'DWARF' entry and the 'Content' or 'Size' in the 'Sections' entry at the same time
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name: .debug_gnu_pubnames
+    Type: SHT_PROGBITS
+    Size: 0x10
+DWARF:
+  debug_gnu_pubnames:
+    Length:
+      TotalLength: 0x1234
+    Version:    2
+    UnitOffset: 0x1234
+    UnitSize:   0x4321
+    Entries:    []
+
+## e) Test that yaml2obj emits an error message when both the "Content" and the
+## "debug_gnu_pubnames" entry are specified at the same time.
+
+# RUN: not yaml2obj --docnum=5 %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:    .debug_gnu_pubnames
+    Type:    SHT_PROGBITS
+    Content: "00"
+DWARF:
+  debug_gnu_pubnames:
+    Length:
+      TotalLength: 0x1234
+    Version:    2
+    UnitOffset: 0x1234
+    UnitSize:   0x4321
+    Entries:    []
+
+## f) Test that all the properties can be overridden by the section header when
+## the "debug_gnu_pubnames" entry doesn't exist.
+
+# RUN: yaml2obj --docnum=6 %s -o %t6.o
+# RUN: llvm-readelf --sections %t6.o | FileCheck %s --check-prefix=OVERRIDDEN
+
+#      OVERRIDDEN: [Nr] Name                Type   Address          Off    Size   ES Flg Lk Inf Al
+#      OVERRIDDEN: [ 1] .debug_gnu_pubnames STRTAB 0000000000002020 000050 00000e 01   A  2   1  2
+# OVERRIDDEN-NEXT: [ 2] .sec                STRTAB 0000000000000000 00005e 000000 00      0   0  0
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:         .debug_gnu_pubnames
+    Type:         SHT_STRTAB  ## SHT_PROGBITS by default.
+    Flags:        [SHF_ALLOC] ## 0 by default.
+    Link:         .sec        ## 0 by default.
+    EntSize:      1           ## 0 by default.
+    Info:         1           ## 0 by default.
+    AddressAlign: 2           ## 0 by default.
+    Address:      0x2020      ## 0x00 by default.
+    Offset:       0x50        ## 0x40 for the first section.
+    Size:         0x0e        ## Set the "Size" so that we can reuse the check tag "OVERRIDDEN".
+  - Name:         .sec        ## Linked by .debug_gnu_pubnames.
+    Type:         SHT_STRTAB
+
+## g) Test that all the properties can be overridden by the section header when
+## the "debug_gnu_pubnames" entry exists.
+
+# RUN: yaml2obj --docnum=7 %s -o %t7.o
+# RUN: llvm-readelf --sections %t7.o | FileCheck %s --check-prefix=OVERRIDDEN
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:         .debug_gnu_pubnames
+    Type:         SHT_STRTAB  ## SHT_PROGBITS by default.
+    Flags:        [SHF_ALLOC] ## 0 by default.
+    Link:         .sec        ## 0 by default.
+    EntSize:      1           ## 0 by default.
+    Info:         1           ## 0 by default.
+    AddressAlign: 2           ## 1 by default.
+    Address:      0x2020      ## 0x00 by default.
+    Offset:       0x50        ## 0x40 for the first section.
+  - Name:         .sec        ## Linked by .debug_gnu_pubnames.
+    Type:         SHT_STRTAB
+DWARF:
+  debug_gnu_pubnames:
+    Length:
+      TotalLength: 0x1234
+    Version:    2
+    UnitOffset: 0x1234
+    UnitSize:   0x4321
+    Entries:    []
+
+## h) Test that yaml2obj emits an error if 'Descriptor' is missing.
+
+# RUN: not yaml2obj --docnum=8 %s -o %t8.o 2>&1 | FileCheck %s --check-prefix=MISSING-KEY --ignore-case
+
+#      MISSING-KEY: YAML:260:9: error: missing required key 'Descriptor'
+# MISSING-KEY-NEXT:       - DieOffset: 0x12345678
+# MISSING-KEY-NEXT:         ^
+# MISSING-KEY-NEXT: yaml2obj: error: failed to parse YAML input: Invalid argument
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+DWARF:
+  debug_gnu_pubnames:
+    Length:
+      TotalLength: 0x1234
+    Version:    2
+    UnitOffset: 0x1234
+    UnitSize:   0x4321
+    Entries:
+      - DieOffset: 0x12345678
+        Name:      abc
diff --git a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-gnu-pubtypes.yaml b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-gnu-pubtypes.yaml
new file mode 100644
index 00000000000000..b88d21b5fc7af5
--- /dev/null
+++ b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-gnu-pubtypes.yaml
@@ -0,0 +1,261 @@
+## Test that yaml2obj emits .debug_gnu_pubtypes section.
+
+## a) Generate the '.debug_gnu_pubtypes' section from the 'DWARF' entry.
+
+## Generate and verify a 32-bit little endian .debug_gnu_pubtypes section.
+
+# RUN: yaml2obj --docnum=1 -DENDIAN=ELFDATA2LSB %s -o %t1.le.o
+# RUN: llvm-readobj --sections --section-data %t1.le.o | \
+# RUN:   FileCheck -DSIZE=32 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF32-LE
+
+#            SHDR: Index: 1
+#       SHDR-NEXT: Name: .debug_gnu_pubtypes (1)
+#       SHDR-NEXT: Type: SHT_PROGBITS (0x1)
+#       SHDR-NEXT: Flags [ (0x0)
+#       SHDR-NEXT: ]
+#       SHDR-NEXT: Address: 0x0
+#       SHDR-NEXT: Offset: 0x40
+#       SHDR-NEXT: Size: [[SIZE]]
+#       SHDR-NEXT: Link: 0
+#       SHDR-NEXT: Info: 0
+#       SHDR-NEXT: AddressAlignment: [[ADDRALIGN]]
+#       SHDR-NEXT: EntrySize: 0
+# DWARF32-LE-NEXT: SectionData (
+# DWARF32-LE-NEXT:   0000: 34120000 02003412 00002143 00007856 |4.....4...!C..xV|
+##                         ^-------                            unit_length (4-byte)
+##                                  ^---                       version (2-byte)
+##                                      ^--------              debug_info_offset (4-byte)
+##                                               ^--------     debug_info_length (4-byte)
+##                                                        ^--- offset (4-byte)
+# DWARF32-LE-NEXT:   0010: 34121261 62630021 43658734 64656600 |4..abc.!Ce.4def.|
+##                         ----
+##                             ^-                              descriptor (1-byte)
+##                               ^--------                     name "abc\0"
+##                                        ^--------            offset (4-byte)
+##                                                 ^-          descriptor (1-byte)
+##                                                    ^------- name "def\0"
+# DWARF32-LE-NEXT: )
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    [[ENDIAN]]
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+DWARF:
+  debug_gnu_pubtypes:
+    Length:
+      TotalLength: 0x1234
+    Version:    2
+    UnitOffset: 0x1234
+    UnitSize:   0x4321
+    Entries:
+      - DieOffset:  0x12345678
+        Descriptor: 0x12
+        Name:       abc
+      - DieOffset:  0x87654321
+        Descriptor: 0x34
+        Name:       def
+
+## Generate and verify a 32-bit big endian .debug_gnu_pubtypes section.
+
+# RUN: yaml2obj --docnum=1 -DENDIAN=ELFDATA2MSB %s -o %t1.be.o
+# RUN: llvm-readobj --sections --section-data %t1.be.o | \
+# RUN:   FileCheck -DSIZE=32 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF32-BE
+
+# DWARF32-BE-NEXT: SectionData (
+# DWARF32-BE-NEXT:   0000: 00001234 00020000 12340000 43211234 |...4.....4..C!.4|
+##                         ^-------                            unit_length (4-byte)
+##                                  ^---                       version (2-byte)
+##                                      ^--------              debug_info_offset (4-byte)
+##                                               ^--------     debug_info_length (4-byte)
+##                                                        ^--- offset (4-byte)
+# DWARF32-BE-NEXT:   0010: 56781261 62630087 65432134 64656600 |Vx.abc..eC!4def.|
+##                         ----
+##                             ^-                              descriptor (1-byte)
+##                               ^--------                     name "abc\0"
+##                                        ^--------            offset (4-byte)
+##                                                 ^-          descriptor (1-byte)
+##                                                    ^------- name "def\0"
+# DWARF32-BE-NEXT: )
+
+## b) Generate the .debug_gnu_pubtypes section from raw section content.
+
+# RUN: yaml2obj --docnum=2 %s -o %t2.o
+# RUN: llvm-readobj --sections --section-data %t2.o | \
+# RUN:   FileCheck %s -DADDRALIGN=0 -DSIZE=3 --check-prefixes=SHDR,ARBITRARY-CONTENT
+
+#      ARBITRARY-CONTENT: SectionData (
+# ARBITRARY-CONTENT-NEXT:   0000: 112233
+# ARBITRARY-CONTENT-NEXT: )
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:    .debug_gnu_pubtypes
+    Type:    SHT_PROGBITS
+    Content: "112233"
+
+## c) Generate the .debug_gnu_pubtypes section when the "Size" is specified.
+
+# RUN: yaml2obj --docnum=3 %s -o %t3.o
+# RUN: llvm-readobj --sections --section-data %t3.o | \
+# RUN:   FileCheck -DSIZE=16 -DADDRALIGN=0 %s --check-prefixes=SHDR,SIZE
+
+#      SIZE: SectionData (
+# SIZE-NEXT:   0000: 00000000 00000000 00000000 00000000  |................|
+# SIZE-NEXT: )
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name: .debug_gnu_pubtypes
+    Type: SHT_PROGBITS
+    Size: 0x10
+
+## d) Test that yaml2obj emits an error message when both the "Size" and the
+## "debug_gnu_pubtypes" entry are specified at the same time.
+
+# RUN: not yaml2obj --docnum=4 %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR: yaml2obj: error: cannot specify section '.debug_gnu_pubtypes' contents in the 'DWARF' entry and the 'Content' or 'Size' in the 'Sections' entry at the same time
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name: .debug_gnu_pubtypes
+    Type: SHT_PROGBITS
+    Size: 0x10
+DWARF:
+  debug_gnu_pubtypes:
+    Length:
+      TotalLength: 0x1234
+    Version:    2
+    UnitOffset: 0x1234
+    UnitSize:   0x4321
+    Entries:    []
+
+## e) Test that yaml2obj emits an error message when both the "Content" and the
+## "debug_gnu_pubtypes" entry are specified at the same time.
+
+# RUN: not yaml2obj --docnum=5 %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:    .debug_gnu_pubtypes
+    Type:    SHT_PROGBITS
+    Content: "00"
+DWARF:
+  debug_gnu_pubtypes:
+    Length:
+      TotalLength: 0x1234
+    Version:    2
+    UnitOffset: 0x1234
+    UnitSize:   0x4321
+    Entries:    []
+
+## f) Test that all the properties can be overridden by the section header when
+## the "debug_gnu_pubtypes" entry doesn't exist.
+
+# RUN: yaml2obj --docnum=6 %s -o %t6.o
+# RUN: llvm-readelf --sections %t6.o | FileCheck %s --check-prefix=OVERRIDDEN
+
+#      OVERRIDDEN: [Nr] Name                Type   Address          Off    Size   ES Flg Lk Inf Al
+#      OVERRIDDEN: [ 1] .debug_gnu_pubtypes STRTAB 0000000000002020 000050 00000e 01   A  2   1  2
+# OVERRIDDEN-NEXT: [ 2] .sec                STRTAB 0000000000000000 00005e 000000 00      0   0  0
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:         .debug_gnu_pubtypes
+    Type:         SHT_STRTAB  ## SHT_PROGBITS by default.
+    Flags:        [SHF_ALLOC] ## 0 by default.
+    Link:         .sec        ## 0 by default.
+    EntSize:      1           ## 0 by default.
+    Info:         1           ## 0 by default.
+    AddressAlign: 2           ## 0 by default.
+    Address:      0x2020      ## 0x00 by default.
+    Offset:       0x50        ## 0x40 for the first section.
+    Size:         0x0e        ## Set the "Size" so that we can reuse the check tag "OVERRIDDEN".
+  - Name:         .sec        ## Linked by .debug_gnu_pubtypes.
+    Type:         SHT_STRTAB
+
+## g) Test that all the properties can be overridden by the section header when
+## the "debug_gnu_pubtypes" entry exists.
+
+# RUN: yaml2obj --docnum=7 %s -o %t7.o
+# RUN: llvm-readelf --sections %t7.o | FileCheck %s --check-prefix=OVERRIDDEN
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:         .debug_gnu_pubtypes
+    Type:         SHT_STRTAB  ## SHT_PROGBITS by default.
+    Flags:        [SHF_ALLOC] ## 0 by default.
+    Link:         .sec        ## 0 by default.
+    EntSize:      1           ## 0 by default.
+    Info:         1           ## 0 by default.
+    AddressAlign: 2           ## 1 by default.
+    Address:      0x2020      ## 0x00 by default.
+    Offset:       0x50        ## 0x40 for the first section.
+  - Name:         .sec        ## Linked by .debug_gnu_pubtypes.
+    Type:         SHT_STRTAB
+DWARF:
+  debug_gnu_pubtypes:
+    Length:
+      TotalLength: 0x1234
+    Version:    2
+    UnitOffset: 0x1234
+    UnitSize:   0x4321
+    Entries:    []
+
+## h) Test that yaml2obj emits an error if 'Descriptor' is missing.
+
+# RUN: not yaml2obj --docnum=8 %s -o %t8.o 2>&1 | FileCheck %s --check-prefix=MISSING-KEY --ignore-case
+
+#      MISSING-KEY: YAML:260:9: error: missing required key 'Descriptor'
+# MISSING-KEY-NEXT:       - DieOffset: 0x12345678
+# MISSING-KEY-NEXT:         ^
+# MISSING-KEY-NEXT: yaml2obj: error: failed to parse YAML input: Invalid argument
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+DWARF:
+  debug_gnu_pubtypes:
+    Length:
+      TotalLength: 0x1234
+    Version:    2
+    UnitOffset: 0x1234
+    UnitSize:   0x4321
+    Entries:
+      - DieOffset: 0x12345678
+        Name:      abc
diff --git a/llvm/unittests/ObjectYAML/DWARFYAMLTest.cpp b/llvm/unittests/ObjectYAML/DWARFYAMLTest.cpp
index 138c0999c1cc1b..e93773999ad30e 100644
--- a/llvm/unittests/ObjectYAML/DWARFYAMLTest.cpp
+++ b/llvm/unittests/ObjectYAML/DWARFYAMLTest.cpp
@@ -45,8 +45,8 @@ TEST(DebugAddrSection, TestParseDebugAddrYAML) {
     Length:  0x1234
     Version: 5
 )";
-  auto SectionsOrErr = DWARFYAML::emitDebugSections(Yaml);
-  EXPECT_THAT_EXPECTED(SectionsOrErr, Succeeded());
+  auto DWARFOrErr = parseDWARFYAML(Yaml);
+  EXPECT_THAT_EXPECTED(DWARFOrErr, Succeeded());
 }
 
 TEST(DebugAddrSection, TestMissingVersion) {
@@ -55,8 +55,8 @@ TEST(DebugAddrSection, TestMissingVersion) {
   - Format: DWARF64
     Length: 0x1234
 )";
-  auto SectionsOrErr = DWARFYAML::emitDebugSections(Yaml);
-  EXPECT_THAT_ERROR(SectionsOrErr.takeError(),
+  auto DWARFOrErr = parseDWARFYAML(Yaml);
+  EXPECT_THAT_ERROR(DWARFOrErr.takeError(),
                     FailedWithMessage("missing required key 'Version'"));
 }
 
@@ -68,8 +68,8 @@ TEST(DebugAddrSection, TestUnexpectedKey) {
     Version: 5
     Blah:    unexpected
 )";
-  auto SectionsOrErr = DWARFYAML::emitDebugSections(Yaml);
-  EXPECT_THAT_ERROR(SectionsOrErr.takeError(),
+  auto DWARFOrErr = parseDWARFYAML(Yaml);
+  EXPECT_THAT_ERROR(DWARFOrErr.takeError(),
                     FailedWithMessage("unknown key 'Blah'"));
 }
 
diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp
index 577d52188351e7..71d397b47ea249 100644
--- a/mlir/lib/Transforms/BufferPlacement.cpp
+++ b/mlir/lib/Transforms/BufferPlacement.cpp
@@ -142,6 +142,12 @@ class BufferPlacementAliasAnalysis {
         this->aliases[std::get<0>(entry)].insert(std::get<1>(entry));
     };
 
+    // Add additional aliases created by view changes to the alias list.
+    op->walk([&](ViewLikeOpInterface viewInterface) {
+      aliases[viewInterface.getViewSource()].insert(
+          viewInterface.getOperation()->getResult(0));
+    });
+
     // Query all branch interfaces to link block argument aliases.
     op->walk([&](BranchOpInterface branchInterface) {
       Block *parentBlock = branchInterface.getOperation()->getBlock();
diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir
index 225a186caeb0af..c3bce4ea545839 100644
--- a/mlir/test/Transforms/buffer-placement.mlir
+++ b/mlir/test/Transforms/buffer-placement.mlir
@@ -914,3 +914,22 @@ func @inner_region_control_flow_div(
 // CHECK-NEXT: test.region_if_yield %[[ALLOC8]]
 //      CHECK: dealloc %[[ALLOC0]]
 // CHECK-NEXT: return %[[ALLOC1]]
+
+// -----
+
+// CHECK-LABEL: func @subview
+func @subview(%arg0 : index, %arg1 : index, %arg2 : memref<?x?xf32>) {
+  %0 = alloc() : memref<64x4xf32, offset: 0, strides: [4, 1]>
+  %1 = subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] :
+    memref<64x4xf32, offset: 0, strides: [4, 1]>
+  to memref<?x?xf32, offset: ?, strides: [?, ?]>
+  "linalg.copy"(%1, %arg2) :
+    (memref<?x?xf32, offset: ?, strides: [?, ?]>, memref<?x?xf32>) -> ()
+  return
+}
+
+// CHECK-NEXT: %[[ALLOC:.*]] = alloc()
+// CHECK-NEXT: subview
+// CHECK-NEXT: linalg.copy
+// CHECK-NEXT: dealloc %[[ALLOC]]
+// CHECK-NEXT: return