diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index eeb7609ff6b5f1..6bb76d1b917db3 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1183,11 +1183,16 @@ class MCPlusBuilder { bool clearOffset(MCInst &Inst) const; /// Return the label of \p Inst, if available. - MCSymbol *getLabel(const MCInst &Inst) const; + MCSymbol *getInstLabel(const MCInst &Inst) const; + + /// Set the label of \p Inst or return the existing label for the instruction. + /// This label will be emitted right before \p Inst is emitted to MCStreamer. + MCSymbol *getOrCreateInstLabel(MCInst &Inst, const Twine &Name, + MCContext *Ctx) const; /// Set the label of \p Inst. This label will be emitted right before \p Inst /// is emitted to MCStreamer. - bool setLabel(MCInst &Inst, MCSymbol *Label) const; + void setInstLabel(MCInst &Inst, MCSymbol *Label) const; /// Get instruction size specified via annotation. std::optional getSize(const MCInst &Inst) const; diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index d544ece13a832f..b29ebbbfa18c4b 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1967,7 +1967,7 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, OS << " # Offset: " << *Offset; if (std::optional Size = MIB->getSize(Instruction)) OS << " # Size: " << *Size; - if (MCSymbol *Label = MIB->getLabel(Instruction)) + if (MCSymbol *Label = MIB->getInstLabel(Instruction)) OS << " # Label: " << *Label; MIB->printAnnotations(Instruction, OS); diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index d4b668c1d7e7bd..97d19b75200f51 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -489,7 +489,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF, if (!EmitCodeOnly) { // A symbol to be emitted before the instruction to mark its location. - MCSymbol *InstrLabel = BC.MIB->getLabel(Instr); + MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr); if (opts::UpdateDebugSections && BF.getDWARFUnit()) { LastLocSeen = emitLineInfo(BF, Instr.getLoc(), LastLocSeen, diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 54f2f9d972a461..00df42c11e2239 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1424,7 +1424,7 @@ Error BinaryFunction::disassemble() { InstrMapType::iterator II = Instructions.find(Offset); assert(II != Instructions.end() && "reference to non-existing instruction"); - BC.MIB->setLabel(II->second, Label); + BC.MIB->setInstLabel(II->second, Label); } // Reset symbolizer for the disassembler. diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp index 54618aeb95cccb..82bddf76d5b8c9 100644 --- a/bolt/lib/Core/Exceptions.cpp +++ b/bolt/lib/Core/Exceptions.cpp @@ -408,12 +408,11 @@ void BinaryFunction::updateEHRanges() { // Same symbol is used for the beginning and the end of the range. MCSymbol *EHSymbol; - if (MCSymbol *InstrLabel = BC.MIB->getLabel(Instr)) { + if (MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr)) { EHSymbol = InstrLabel; } else { std::unique_lock Lock(BC.CtxMutex); - EHSymbol = BC.Ctx->createNamedTempSymbol("EH"); - BC.MIB->setLabel(Instr, EHSymbol); + EHSymbol = BC.MIB->getOrCreateInstLabel(Instr, "EH", BC.Ctx.get()); } // At this point we could be in one of the following states: diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp index 44e5f88d8950fc..bd9bd0c45922a5 100644 --- a/bolt/lib/Core/MCPlusBuilder.cpp +++ b/bolt/lib/Core/MCPlusBuilder.cpp @@ -12,6 +12,7 @@ #include "bolt/Core/MCPlusBuilder.h" #include "bolt/Core/MCPlus.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrDesc.h" @@ -266,17 +267,29 @@ bool MCPlusBuilder::clearOffset(MCInst &Inst) const { return true; } -MCSymbol *MCPlusBuilder::getLabel(const MCInst &Inst) const { +MCSymbol *MCPlusBuilder::getInstLabel(const MCInst &Inst) const { if (std::optional Label = getAnnotationOpValue(Inst, MCAnnotation::kLabel)) return reinterpret_cast(*Label); return nullptr; } -bool MCPlusBuilder::setLabel(MCInst &Inst, MCSymbol *Label) const { +MCSymbol *MCPlusBuilder::getOrCreateInstLabel(MCInst &Inst, const Twine &Name, + MCContext *Ctx) const { + MCSymbol *Label = getInstLabel(Inst); + if (Label) + return Label; + + Label = Ctx->createNamedTempSymbol(Name); + setAnnotationOpValue(Inst, MCAnnotation::kLabel, + reinterpret_cast(Label)); + return Label; +} + +void MCPlusBuilder::setInstLabel(MCInst &Inst, MCSymbol *Label) const { + assert(!getInstLabel(Inst) && "Instruction already has assigned label."); setAnnotationOpValue(Inst, MCAnnotation::kLabel, reinterpret_cast(Label)); - return true; } std::optional MCPlusBuilder::getSize(const MCInst &Inst) const { diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp index 6377c1197253c8..0d7dc1070ce75e 100644 --- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp +++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp @@ -770,11 +770,8 @@ Error LinuxKernelRewriter::rewriteORCTables() { continue; // Issue label for the instruction. - MCSymbol *Label = BC.MIB->getLabel(Inst); - if (!Label) { - Label = BC.Ctx->createTempSymbol("__ORC_"); - BC.MIB->setLabel(Inst, Label); - } + MCSymbol *Label = + BC.MIB->getOrCreateInstLabel(Inst, "__ORC_", BC.Ctx.get()); if (Error E = emitORCEntry(0, *ErrorOrState, Label)) return E; @@ -908,11 +905,8 @@ Error LinuxKernelRewriter::readStaticCalls() { BC.MIB->addAnnotation(*Inst, "StaticCall", EntryID); - MCSymbol *Label = BC.MIB->getLabel(*Inst); - if (!Label) { - Label = BC.Ctx->createTempSymbol("__SC_"); - BC.MIB->setLabel(*Inst, Label); - } + MCSymbol *Label = + BC.MIB->getOrCreateInstLabel(*Inst, "__SC_", BC.Ctx.get()); StaticCallEntries.push_back({EntryID, BF, Label}); } diff --git a/bolt/test/runtime/instrument-wrong-target.s b/bolt/test/runtime/X86/instrument-wrong-target.s similarity index 88% rename from bolt/test/runtime/instrument-wrong-target.s rename to bolt/test/runtime/X86/instrument-wrong-target.s index b25c924ffbcc0f..343d93a89ed130 100644 --- a/bolt/test/runtime/instrument-wrong-target.s +++ b/bolt/test/runtime/X86/instrument-wrong-target.s @@ -1,8 +1,8 @@ # Test that BOLT errs when trying to instrument a binary with a different # architecture than the one BOLT is built for. -# REQUIRES: x86_64-linux,bolt-runtime -# REQUIRES: target-x86_64 && aarch64-registered-target +# REQUIRES: system-linux,bolt-runtime +# REQUIRES: aarch64-registered-target # RUN: llvm-mc -triple aarch64 -filetype=obj %s -o %t.o # RUN: ld.lld -q -pie -o %t.exe %t.o diff --git a/clang/docs/LibASTMatchersReference.html b/clang/docs/LibASTMatchersReference.html index c40d679e383bb2..8a06084955aa6b 100644 --- a/clang/docs/LibASTMatchersReference.html +++ b/clang/docs/LibASTMatchersReference.html @@ -7049,7 +7049,7 @@

AST Traversal Matchers

-Matcher<CXXFoldExpr>hasFoldInitast_matchers::Matcher<Expr> InnerMacher +Matcher<CXXFoldExpr>hasFoldInitMatcher<Expr> InnerMacher
Matches the operand that does not contain the parameter pack.
 
 Example matches `(0 + ... + args)` and `(args * ... * 1)`
@@ -7089,7 +7089,7 @@ 

AST Traversal Matchers

-Matcher<CXXFoldExpr>hasPatternast_matchers::Matcher<Expr> InnerMacher +Matcher<CXXFoldExpr>hasPatternMatcher<Expr> InnerMacher
Matches the operand that contains the parameter pack.
 
 Example matches `(0 + ... + args)`
@@ -7859,7 +7859,7 @@ 

AST Traversal Matchers

-Matcher<ClassTemplateSpecializationDecl>forEachTemplateArgumentclang::ast_matchers::Matcher<TemplateArgument> InnerMatcher +Matcher<ClassTemplateSpecializationDecl>forEachTemplateArgumentMatcher<TemplateArgument> InnerMatcher
Matches classTemplateSpecialization, templateSpecializationType and
 functionDecl nodes where the template argument matches the inner matcher.
 This matcher may produce multiple matches.
@@ -8454,7 +8454,7 @@ 

AST Traversal Matchers

-Matcher<Expr>ignoringElidableConstructorCallast_matchers::Matcher<Expr> InnerMatcher +Matcher<Expr>ignoringElidableConstructorCallMatcher<Expr> InnerMatcher
Matches expressions that match InnerMatcher that are possibly wrapped in an
 elidable constructor and other corresponding bookkeeping nodes.
 
@@ -8691,7 +8691,7 @@ 

AST Traversal Matchers

-Matcher<FunctionDecl>forEachTemplateArgumentclang::ast_matchers::Matcher<TemplateArgument> InnerMatcher +Matcher<FunctionDecl>forEachTemplateArgumentMatcher<TemplateArgument> InnerMatcher
Matches classTemplateSpecialization, templateSpecializationType and
 functionDecl nodes where the template argument matches the inner matcher.
 This matcher may produce multiple matches.
@@ -8959,7 +8959,7 @@ 

AST Traversal Matchers

-Matcher<InitListExpr>hasInitunsigned N, ast_matchers::Matcher<Expr> InnerMatcher +Matcher<InitListExpr>hasInitunsigned N, Matcher<Expr> InnerMatcher
Matches the n'th item of an initializer list expression.
 
 Example matches y.
@@ -10026,7 +10026,7 @@ 

AST Traversal Matchers

-Matcher<TemplateSpecializationType>forEachTemplateArgumentclang::ast_matchers::Matcher<TemplateArgument> InnerMatcher +Matcher<TemplateSpecializationType>forEachTemplateArgumentMatcher<TemplateArgument> InnerMatcher
Matches classTemplateSpecialization, templateSpecializationType and
 functionDecl nodes where the template argument matches the inner matcher.
 This matcher may produce multiple matches.
diff --git a/clang/docs/tools/dump_ast_matchers.py b/clang/docs/tools/dump_ast_matchers.py
index cc7024d1627b97..705ff0d4d40985 100755
--- a/clang/docs/tools/dump_ast_matchers.py
+++ b/clang/docs/tools/dump_ast_matchers.py
@@ -116,6 +116,8 @@ def strip_doxygen(comment):
 
 def unify_arguments(args):
     """Gets rid of anything the user doesn't care about in the argument list."""
+    args = re.sub(r"clang::ast_matchers::internal::", r"", args)
+    args = re.sub(r"ast_matchers::internal::", r"", args)
     args = re.sub(r"internal::", r"", args)
     args = re.sub(r"extern const\s+(.*)&", r"\1 ", args)
     args = re.sub(r"&", r" ", args)
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index dc1f49525a004a..ced89ff127ab3e 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -4580,8 +4580,7 @@ AST_POLYMORPHIC_MATCHER_P2(hasArgument,
 ///       return (args * ... * 1);
 ///   }
 /// \endcode
-AST_MATCHER_P(CXXFoldExpr, hasFoldInit, ast_matchers::internal::Matcher,
-              InnerMacher) {
+AST_MATCHER_P(CXXFoldExpr, hasFoldInit, internal::Matcher, InnerMacher) {
   const auto *const Init = Node.getInit();
   return Init && InnerMacher.matches(*Init, Finder, Builder);
 }
@@ -4603,8 +4602,7 @@ AST_MATCHER_P(CXXFoldExpr, hasFoldInit, ast_matchers::internal::Matcher,
 ///       return (args * ... * 1);
 ///   }
 /// \endcode
-AST_MATCHER_P(CXXFoldExpr, hasPattern, ast_matchers::internal::Matcher,
-              InnerMacher) {
+AST_MATCHER_P(CXXFoldExpr, hasPattern, internal::Matcher, InnerMacher) {
   const Expr *const Pattern = Node.getPattern();
   return Pattern && InnerMacher.matches(*Pattern, Finder, Builder);
 }
@@ -4685,8 +4683,8 @@ AST_MATCHER(CXXFoldExpr, isBinaryFold) { return Node.getInit() != nullptr; }
 /// \code
 ///   int x{y}.
 /// \endcode
-AST_MATCHER_P2(InitListExpr, hasInit, unsigned, N,
-               ast_matchers::internal::Matcher, InnerMatcher) {
+AST_MATCHER_P2(InitListExpr, hasInit, unsigned, N, internal::Matcher,
+               InnerMatcher) {
   return N < Node.getNumInits() &&
           InnerMatcher.matches(*Node.getInit(N), Finder, Builder);
 }
@@ -5309,7 +5307,7 @@ AST_POLYMORPHIC_MATCHER_P(
     forEachTemplateArgument,
     AST_POLYMORPHIC_SUPPORTED_TYPES(ClassTemplateSpecializationDecl,
                                     TemplateSpecializationType, FunctionDecl),
-    clang::ast_matchers::internal::Matcher, InnerMatcher) {
+    internal::Matcher, InnerMatcher) {
   ArrayRef TemplateArgs =
       clang::ast_matchers::internal::getTemplateSpecializationArgs(Node);
   clang::ast_matchers::internal::BoundNodesTreeBuilder Result;
@@ -8525,8 +8523,8 @@ AST_MATCHER(FunctionDecl, hasTrailingReturn) {
 ///
 /// ``varDecl(hasInitializer(ignoringElidableConstructorCall(callExpr())))``
 /// matches ``H D = G()`` in C++11 through C++17 (and beyond).
-AST_MATCHER_P(Expr, ignoringElidableConstructorCall,
-              ast_matchers::internal::Matcher, InnerMatcher) {
+AST_MATCHER_P(Expr, ignoringElidableConstructorCall, internal::Matcher,
+              InnerMatcher) {
   // E tracks the node that we are examining.
   const Expr *E = &Node;
   // If present, remove an outer `ExprWithCleanups` corresponding to the
diff --git a/clang/include/clang/InstallAPI/Context.h b/clang/include/clang/InstallAPI/Context.h
index 7d105920734fde..3e2046642c7fe8 100644
--- a/clang/include/clang/InstallAPI/Context.h
+++ b/clang/include/clang/InstallAPI/Context.h
@@ -9,6 +9,9 @@
 #ifndef LLVM_CLANG_INSTALLAPI_CONTEXT_H
 #define LLVM_CLANG_INSTALLAPI_CONTEXT_H
 
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/InstallAPI/HeaderFile.h"
 #include "llvm/TextAPI/InterfaceFile.h"
 #include "llvm/TextAPI/RecordVisitor.h"
 #include "llvm/TextAPI/RecordsSlice.h"
@@ -24,8 +27,23 @@ struct InstallAPIContext {
   /// Library attributes that are typically passed as linker inputs.
   llvm::MachO::RecordsSlice::BinaryAttrs BA;
 
-  /// Active target triple to parse.
-  llvm::Triple TargetTriple{};
+  /// All headers that represent a library.
+  HeaderSeq InputHeaders;
+
+  /// Active language mode to parse in.
+  Language LangMode = Language::ObjC;
+
+  /// Active header access type.
+  HeaderType Type = HeaderType::Unknown;
+
+  /// Active TargetSlice for symbol record collection.
+  std::shared_ptr Slice;
+
+  /// FileManager for all I/O operations.
+  FileManager *FM = nullptr;
+
+  /// DiagnosticsEngine for all error reporting.
+  DiagnosticsEngine *Diags = nullptr;
 
   /// File Path of output location.
   llvm::StringRef OutputLoc{};
diff --git a/clang/include/clang/InstallAPI/Frontend.h b/clang/include/clang/InstallAPI/Frontend.h
new file mode 100644
index 00000000000000..7ee87ae028d079
--- /dev/null
+++ b/clang/include/clang/InstallAPI/Frontend.h
@@ -0,0 +1,48 @@
+//===- InstallAPI/Frontend.h -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Top level wrappers for InstallAPI frontend operations.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_INSTALLAPI_FRONTEND_H
+#define LLVM_CLANG_INSTALLAPI_FRONTEND_H
+
+#include "clang/AST/ASTConsumer.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/InstallAPI/Context.h"
+#include "clang/InstallAPI/Visitor.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace clang {
+namespace installapi {
+
+/// Create a buffer that contains all headers to scan
+/// for global symbols with.
+std::unique_ptr
+createInputBuffer(const InstallAPIContext &Ctx);
+
+class InstallAPIAction : public ASTFrontendAction {
+public:
+  explicit InstallAPIAction(llvm::MachO::RecordsSlice &Records)
+      : Records(Records) {}
+
+  std::unique_ptr CreateASTConsumer(CompilerInstance &CI,
+                                                 StringRef InFile) override {
+    return std::make_unique(CI.getASTContext(), Records);
+  }
+
+private:
+  llvm::MachO::RecordsSlice &Records;
+};
+} // namespace installapi
+} // namespace clang
+
+#endif // LLVM_CLANG_INSTALLAPI_FRONTEND_H
diff --git a/clang/include/clang/InstallAPI/HeaderFile.h b/clang/include/clang/InstallAPI/HeaderFile.h
index fc64a43b3def5c..70e83bbb3e76f6 100644
--- a/clang/include/clang/InstallAPI/HeaderFile.h
+++ b/clang/include/clang/InstallAPI/HeaderFile.h
@@ -15,6 +15,7 @@
 
 #include "clang/Basic/LangStandard.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Regex.h"
 #include 
 #include 
@@ -32,6 +33,20 @@ enum class HeaderType {
   Project,
 };
 
+inline StringRef getName(const HeaderType T) {
+  switch (T) {
+  case HeaderType::Public:
+    return "Public";
+  case HeaderType::Private:
+    return "Private";
+  case HeaderType::Project:
+    return "Project";
+  case HeaderType::Unknown:
+    return "Unknown";
+  }
+  llvm_unreachable("unexpected header type");
+}
+
 class HeaderFile {
   /// Full input path to header.
   std::string FullPath;
@@ -52,6 +67,14 @@ class HeaderFile {
 
   static llvm::Regex getFrameworkIncludeRule();
 
+  HeaderType getType() const { return Type; }
+  StringRef getIncludeName() const { return IncludeName; }
+  StringRef getPath() const { return FullPath; }
+
+  bool useIncludeName() const {
+    return Type != HeaderType::Project && !IncludeName.empty();
+  }
+
   bool operator==(const HeaderFile &Other) const {
     return std::tie(Type, FullPath, IncludeName, Language) ==
            std::tie(Other.Type, Other.FullPath, Other.IncludeName,
diff --git a/clang/include/clang/InstallAPI/Visitor.h b/clang/include/clang/InstallAPI/Visitor.h
new file mode 100644
index 00000000000000..95d669688e4f9c
--- /dev/null
+++ b/clang/include/clang/InstallAPI/Visitor.h
@@ -0,0 +1,51 @@
+//===- InstallAPI/Visitor.h -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// ASTVisitor Interface for InstallAPI frontend operations.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_INSTALLAPI_VISITOR_H
+#define LLVM_CLANG_INSTALLAPI_VISITOR_H
+
+#include "clang/AST/Mangle.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/TextAPI/RecordsSlice.h"
+
+namespace clang {
+namespace installapi {
+
+/// ASTVisitor for collecting declarations that represent global symbols.
+class InstallAPIVisitor final : public ASTConsumer,
+                                public RecursiveASTVisitor {
+public:
+  InstallAPIVisitor(ASTContext &ASTCtx, llvm::MachO::RecordsSlice &Slice)
+      : Slice(Slice),
+        MC(ItaniumMangleContext::create(ASTCtx, ASTCtx.getDiagnostics())),
+        Layout(ASTCtx.getTargetInfo().getDataLayoutString()) {}
+  void HandleTranslationUnit(ASTContext &ASTCtx) override;
+
+  /// Collect global variables.
+  bool VisitVarDecl(const VarDecl *D);
+
+private:
+  std::string getMangledName(const NamedDecl *D) const;
+  std::string getBackendMangledName(llvm::Twine Name) const;
+
+  llvm::MachO::RecordsSlice &Slice;
+  std::unique_ptr MC;
+  StringRef Layout;
+};
+
+} // namespace installapi
+} // namespace clang
+
+#endif // LLVM_CLANG_INSTALLAPI_VISITOR_H
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index f193f959d3a6c0..b151f8d0d7a79c 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -2587,7 +2587,10 @@ bool ByteCodeExprGen::visitExpr(const Expr *E) {
 
     if (!this->emitFinishInit(E))
       return false;
-    return this->emitRetValue(E);
+    // We are destroying the locals AFTER the Ret op.
+    // The Ret op needs to copy the (alive) values, but the
+    // destructors may still turn the entire expression invalid.
+    return this->emitRetValue(E) && RootScope.destroyLocals();
   }
 
   return false;
@@ -3414,14 +3417,15 @@ bool ByteCodeExprGen::emitRecordDestruction(const Record *R) {
   // Now emit the destructor and recurse into base classes.
   if (const CXXDestructorDecl *Dtor = R->getDestructor();
       Dtor && !Dtor->isTrivial()) {
-    if (const Function *DtorFunc = getFunction(Dtor)) {
-      assert(DtorFunc->hasThisPointer());
-      assert(DtorFunc->getNumParams() == 1);
-      if (!this->emitDupPtr(SourceInfo{}))
-        return false;
-      if (!this->emitCall(DtorFunc, 0, SourceInfo{}))
-        return false;
-    }
+    const Function *DtorFunc = getFunction(Dtor);
+    if (!DtorFunc)
+      return false;
+    assert(DtorFunc->hasThisPointer());
+    assert(DtorFunc->getNumParams() == 1);
+    if (!this->emitDupPtr(SourceInfo{}))
+      return false;
+    if (!this->emitCall(DtorFunc, 0, SourceInfo{}))
+      return false;
   }
 
   for (const Record::Base &Base : llvm::reverse(R->bases())) {
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h
index 5b3b533dba3877..acbbcc3dc9619a 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.h
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.h
@@ -332,7 +332,7 @@ template  class VariableScope {
   }
 
   virtual void emitDestruction() {}
-  virtual void emitDestructors() {}
+  virtual bool emitDestructors() { return true; }
   VariableScope *getParent() const { return Parent; }
 
 protected:
@@ -356,13 +356,18 @@ template  class LocalScope : public VariableScope {
   }
 
   /// Overriden to support explicit destruction.
-  void emitDestruction() override {
+  void emitDestruction() override { destroyLocals(); }
+
+  /// Explicit destruction of local variables.
+  bool destroyLocals() {
     if (!Idx)
-      return;
-    this->emitDestructors();
+      return true;
+
+    bool Success = this->emitDestructors();
     this->Ctx->emitDestroy(*Idx, SourceInfo{});
     removeStoredOpaqueValues();
     this->Idx = std::nullopt;
+    return Success;
   }
 
   void addLocal(const Scope::Local &Local) override {
@@ -374,19 +379,25 @@ template  class LocalScope : public VariableScope {
     this->Ctx->Descriptors[*Idx].emplace_back(Local);
   }
 
-  void emitDestructors() override {
+  bool emitDestructors() override {
     if (!Idx)
-      return;
+      return true;
     // Emit destructor calls for local variables of record
     // type with a destructor.
     for (Scope::Local &Local : this->Ctx->Descriptors[*Idx]) {
       if (!Local.Desc->isPrimitive() && !Local.Desc->isPrimitiveArray()) {
-        this->Ctx->emitGetPtrLocal(Local.Offset, SourceInfo{});
-        this->Ctx->emitDestruction(Local.Desc);
-        this->Ctx->emitPopPtr(SourceInfo{});
+        if (!this->Ctx->emitGetPtrLocal(Local.Offset, SourceInfo{}))
+          return false;
+
+        if (!this->Ctx->emitDestruction(Local.Desc))
+          return false;
+
+        if (!this->Ctx->emitPopPtr(SourceInfo{}))
+          return false;
         removeIfStoredOpaqueValue(Local);
       }
     }
+    return true;
   }
 
   void removeStoredOpaqueValues() {
diff --git a/clang/lib/AST/Interp/EvalEmitter.cpp b/clang/lib/AST/Interp/EvalEmitter.cpp
index 9cae25f5c4d642..c9c2bf9b145b22 100644
--- a/clang/lib/AST/Interp/EvalEmitter.cpp
+++ b/clang/lib/AST/Interp/EvalEmitter.cpp
@@ -38,8 +38,11 @@ EvaluationResult EvalEmitter::interpretExpr(const Expr *E,
   this->ConvertResultToRValue = ConvertResultToRValue;
   EvalResult.setSource(E);
 
-  if (!this->visitExpr(E) && EvalResult.empty())
+  if (!this->visitExpr(E)) {
+    // EvalResult may already have a result set, but something failed
+    // after that (e.g. evaluating destructors).
     EvalResult.setInvalid();
+  }
 
   return std::move(this->EvalResult);
 }
diff --git a/clang/lib/AST/Interp/EvaluationResult.h b/clang/lib/AST/Interp/EvaluationResult.h
index 28e1ae6ba3e7ab..ecf2250074cc9e 100644
--- a/clang/lib/AST/Interp/EvaluationResult.h
+++ b/clang/lib/AST/Interp/EvaluationResult.h
@@ -72,7 +72,8 @@ class EvaluationResult final {
     Kind = LValue;
   }
   void setInvalid() {
-    assert(empty());
+    // We are NOT asserting empty() here, since setting it to invalid
+    // is allowed even if there is already a result.
     Kind = Invalid;
   }
   void setValid() {
diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h
index db52f6649c18ba..13e004371f912c 100644
--- a/clang/lib/AST/Interp/Interp.h
+++ b/clang/lib/AST/Interp/Interp.h
@@ -1680,7 +1680,7 @@ bool CastFloatingIntegral(InterpState &S, CodePtr OpPC) {
     auto Status = F.convertToInteger(Result);
 
     // Float-to-Integral overflow check.
-    if ((Status & APFloat::opStatus::opInvalidOp) && F.isFinite()) {
+    if ((Status & APFloat::opStatus::opInvalidOp)) {
       const Expr *E = S.Current->getExpr(OpPC);
       QualType Type = E->getType();
 
@@ -1933,7 +1933,7 @@ inline bool ArrayElemPop(InterpState &S, CodePtr OpPC, uint32_t Index) {
 inline bool ArrayDecay(InterpState &S, CodePtr OpPC) {
   const Pointer &Ptr = S.Stk.pop();
 
-  if (Ptr.isDummy()) {
+  if (Ptr.isZero() || Ptr.isDummy()) {
     S.Stk.push(Ptr);
     return true;
   }
diff --git a/clang/lib/AST/Interp/Pointer.h b/clang/lib/AST/Interp/Pointer.h
index fa2e03d71190f5..34ecdb967960d5 100644
--- a/clang/lib/AST/Interp/Pointer.h
+++ b/clang/lib/AST/Interp/Pointer.h
@@ -215,7 +215,6 @@ class Pointer {
       assert(Offset == PastEndMark && "cannot get base of a block");
       return Pointer(Pointee, Base, 0);
     }
-    assert(Offset == Base && "not an inner field");
     unsigned NewBase = Base - getInlineDesc()->Offset;
     return Pointer(Pointee, NewBase, NewBase);
   }
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 701f1ac852c256..e1ff0d92f6b2f8 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -130,42 +130,42 @@ class MatchDescendantVisitor
 
   bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) {
     // These are unevaluated, except the result expression.
-    if(ignoreUnevaluatedContext)
+    if (ignoreUnevaluatedContext)
       return TraverseStmt(Node->getResultExpr());
     return VisitorBase::TraverseGenericSelectionExpr(Node);
   }
 
   bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) {
     // Unevaluated context.
-    if(ignoreUnevaluatedContext)
+    if (ignoreUnevaluatedContext)
       return true;
     return VisitorBase::TraverseUnaryExprOrTypeTraitExpr(Node);
   }
 
   bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) {
     // Unevaluated context.
-    if(ignoreUnevaluatedContext)
+    if (ignoreUnevaluatedContext)
       return true;
     return VisitorBase::TraverseTypeOfExprTypeLoc(Node);
   }
 
   bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) {
     // Unevaluated context.
-    if(ignoreUnevaluatedContext)
+    if (ignoreUnevaluatedContext)
       return true;
     return VisitorBase::TraverseDecltypeTypeLoc(Node);
   }
 
   bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) {
     // Unevaluated context.
-    if(ignoreUnevaluatedContext)
+    if (ignoreUnevaluatedContext)
       return true;
     return VisitorBase::TraverseCXXNoexceptExpr(Node);
   }
 
   bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) {
     // Unevaluated context.
-    if(ignoreUnevaluatedContext)
+    if (ignoreUnevaluatedContext)
       return true;
     return VisitorBase::TraverseCXXTypeidExpr(Node);
   }
@@ -213,24 +213,26 @@ class MatchDescendantVisitor
 
 // Because we're dealing with raw pointers, let's define what we mean by that.
 static auto hasPointerType() {
-    return hasType(hasCanonicalType(pointerType()));
+  return hasType(hasCanonicalType(pointerType()));
 }
 
-static auto hasArrayType() {
-    return hasType(hasCanonicalType(arrayType()));
-}
+static auto hasArrayType() { return hasType(hasCanonicalType(arrayType())); }
 
-AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher, innerMatcher) {
+AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher,
+              innerMatcher) {
   const DynTypedMatcher &DTM = static_cast(innerMatcher);
 
-  MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, true);
+  MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All,
+                                 true);
   return Visitor.findMatch(DynTypedNode::create(Node));
 }
 
-AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher, innerMatcher) {
+AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher,
+              innerMatcher) {
   const DynTypedMatcher &DTM = static_cast(innerMatcher);
 
-  MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, false);
+  MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All,
+                                 false);
   return Visitor.findMatch(DynTypedNode::create(Node));
 }
 
@@ -268,10 +270,9 @@ static auto isInUnspecifiedLvalueContext(internal::Matcher innerMatcher) {
         hasLHS(innerMatcher)
       )
     ));
-// clang-format on
+  // clang-format on
 }
 
-
 // Returns a matcher that matches any expression `e` such that `InnerMatcher`
 // matches `e` and `e` is in an Unspecified Pointer Context (UPC).
 static internal::Matcher
@@ -315,7 +316,7 @@ isInUnspecifiedPointerContext(internal::Matcher InnerMatcher) {
   // clang-format on
 
   return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher,
-		    PtrSubtractionMatcher));
+                    PtrSubtractionMatcher));
   // FIXME: any more cases? (UPC excludes the RHS of an assignment.  For now we
   // don't have to check that.)
 }
@@ -481,7 +482,9 @@ class Gadget {
 #ifndef NDEBUG
   StringRef getDebugName() const {
     switch (K) {
-#define GADGET(x) case Kind::x: return #x;
+#define GADGET(x)                                                              \
+  case Kind::x:                                                                \
+    return #x;
 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
     }
     llvm_unreachable("Unhandled Gadget::Kind enum");
@@ -502,7 +505,6 @@ class Gadget {
   Kind K;
 };
 
-
 /// Warning gadgets correspond to unsafe code patterns that warrants
 /// an immediate warning.
 class WarningGadget : public Gadget {
@@ -513,10 +515,10 @@ class WarningGadget : public Gadget {
   bool isWarningGadget() const final { return true; }
 };
 
-/// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be
-/// properly recognized in order to emit fixes. For example, if a raw pointer-type
-/// variable is replaced by a safe C++ container, every use of such variable must be
-/// carefully considered and possibly updated.
+/// Fixable gadgets correspond to code patterns that aren't always unsafe but
+/// need to be properly recognized in order to emit fixes. For example, if a raw
+/// pointer-type variable is replaced by a safe C++ container, every use of such
+/// variable must be carefully considered and possibly updated.
 class FixableGadget : public Gadget {
 public:
   FixableGadget(Kind K) : Gadget(K) {}
@@ -531,20 +533,19 @@ class FixableGadget : public Gadget {
     return std::nullopt;
   }
 
-  /// Returns a list of two elements where the first element is the LHS of a pointer assignment
-  /// statement and the second element is the RHS. This two-element list represents the fact that
-  /// the LHS buffer gets its bounds information from the RHS buffer. This information will be used
-  /// later to group all those variables whose types must be modified together to prevent type
-  /// mismatches.
+  /// Returns a list of two elements where the first element is the LHS of a
+  /// pointer assignment statement and the second element is the RHS. This
+  /// two-element list represents the fact that the LHS buffer gets its bounds
+  /// information from the RHS buffer. This information will be used later to
+  /// group all those variables whose types must be modified together to prevent
+  /// type mismatches.
   virtual std::optional>
   getStrategyImplications() const {
     return std::nullopt;
   }
 };
 
-static auto toSupportedVariable() {
-  return to(varDecl());
-}
+static auto toSupportedVariable() { return to(varDecl()); }
 
 using FixableGadgetList = std::vector>;
 using WarningGadgetList = std::vector>;
@@ -565,10 +566,10 @@ class IncrementGadget : public WarningGadget {
   }
 
   static Matcher matcher() {
-    return stmt(unaryOperator(
-      hasOperatorName("++"),
-      hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
-    ).bind(OpTag));
+    return stmt(
+        unaryOperator(hasOperatorName("++"),
+                      hasUnaryOperand(ignoringParenImpCasts(hasPointerType())))
+            .bind(OpTag));
   }
 
   const UnaryOperator *getBaseStmt() const override { return Op; }
@@ -600,10 +601,10 @@ class DecrementGadget : public WarningGadget {
   }
 
   static Matcher matcher() {
-    return stmt(unaryOperator(
-      hasOperatorName("--"),
-      hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
-    ).bind(OpTag));
+    return stmt(
+        unaryOperator(hasOperatorName("--"),
+                      hasUnaryOperand(ignoringParenImpCasts(hasPointerType())))
+            .bind(OpTag));
   }
 
   const UnaryOperator *getBaseStmt() const override { return Op; }
@@ -754,26 +755,25 @@ class PointerInitGadget : public FixableGadget {
 private:
   static constexpr const char *const PointerInitLHSTag = "ptrInitLHS";
   static constexpr const char *const PointerInitRHSTag = "ptrInitRHS";
-  const VarDecl * PtrInitLHS;         // the LHS pointer expression in `PI`
-  const DeclRefExpr * PtrInitRHS;         // the RHS pointer expression in `PI`
+  const VarDecl *PtrInitLHS;     // the LHS pointer expression in `PI`
+  const DeclRefExpr *PtrInitRHS; // the RHS pointer expression in `PI`
 
 public:
   PointerInitGadget(const MatchFinder::MatchResult &Result)
       : FixableGadget(Kind::PointerInit),
-    PtrInitLHS(Result.Nodes.getNodeAs(PointerInitLHSTag)),
-    PtrInitRHS(Result.Nodes.getNodeAs(PointerInitRHSTag)) {}
+        PtrInitLHS(Result.Nodes.getNodeAs(PointerInitLHSTag)),
+        PtrInitRHS(Result.Nodes.getNodeAs(PointerInitRHSTag)) {}
 
   static bool classof(const Gadget *G) {
     return G->getKind() == Kind::PointerInit;
   }
 
   static Matcher matcher() {
-    auto PtrInitStmt = declStmt(hasSingleDecl(varDecl(
-                                 hasInitializer(ignoringImpCasts(declRefExpr(
-                                                  hasPointerType(),
-                                                    toSupportedVariable()).
-                                                  bind(PointerInitRHSTag)))).
-                                              bind(PointerInitLHSTag)));
+    auto PtrInitStmt = declStmt(hasSingleDecl(
+        varDecl(hasInitializer(ignoringImpCasts(
+                    declRefExpr(hasPointerType(), toSupportedVariable())
+                        .bind(PointerInitRHSTag))))
+            .bind(PointerInitLHSTag)));
 
     return stmt(PtrInitStmt);
   }
@@ -793,8 +793,7 @@ class PointerInitGadget : public FixableGadget {
 
   virtual std::optional>
   getStrategyImplications() const override {
-      return std::make_pair(PtrInitLHS,
-                            cast(PtrInitRHS->getDecl()));
+    return std::make_pair(PtrInitLHS, cast(PtrInitRHS->getDecl()));
   }
 };
 
@@ -807,8 +806,8 @@ class PtrToPtrAssignmentGadget : public FixableGadget {
 private:
   static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
   static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
-  const DeclRefExpr * PtrLHS;         // the LHS pointer expression in `PA`
-  const DeclRefExpr * PtrRHS;         // the RHS pointer expression in `PA`
+  const DeclRefExpr *PtrLHS; // the LHS pointer expression in `PA`
+  const DeclRefExpr *PtrRHS; // the RHS pointer expression in `PA`
 
 public:
   PtrToPtrAssignmentGadget(const MatchFinder::MatchResult &Result)
@@ -821,13 +820,13 @@ class PtrToPtrAssignmentGadget : public FixableGadget {
   }
 
   static Matcher matcher() {
-    auto PtrAssignExpr = binaryOperator(allOf(hasOperatorName("="),
-      hasRHS(ignoringParenImpCasts(declRefExpr(hasPointerType(),
-                                               toSupportedVariable()).
-                                   bind(PointerAssignRHSTag))),
-                                   hasLHS(declRefExpr(hasPointerType(),
-                                                      toSupportedVariable()).
-                                          bind(PointerAssignLHSTag))));
+    auto PtrAssignExpr = binaryOperator(
+        allOf(hasOperatorName("="),
+              hasRHS(ignoringParenImpCasts(
+                  declRefExpr(hasPointerType(), toSupportedVariable())
+                      .bind(PointerAssignRHSTag))),
+              hasLHS(declRefExpr(hasPointerType(), toSupportedVariable())
+                         .bind(PointerAssignLHSTag))));
 
     return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr));
   }
@@ -981,9 +980,8 @@ class ULCArraySubscriptGadget : public FixableGadget {
 
   static Matcher matcher() {
     auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
-    auto BaseIsArrayOrPtrDRE =
-        hasBase(ignoringParenImpCasts(declRefExpr(ArrayOrPtr,
-                                                  toSupportedVariable())));
+    auto BaseIsArrayOrPtrDRE = hasBase(
+        ignoringParenImpCasts(declRefExpr(ArrayOrPtr, toSupportedVariable())));
     auto Target =
         arraySubscriptExpr(BaseIsArrayOrPtrDRE).bind(ULCArraySubscriptTag);
 
@@ -1025,9 +1023,9 @@ class UPCStandalonePointerGadget : public FixableGadget {
 
   static Matcher matcher() {
     auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
-    auto target = expr(
-        ignoringParenImpCasts(declRefExpr(allOf(ArrayOrPtr,
-                              toSupportedVariable())).bind(DeclRefExprTag)));
+    auto target = expr(ignoringParenImpCasts(
+        declRefExpr(allOf(ArrayOrPtr, toSupportedVariable()))
+            .bind(DeclRefExprTag)));
     return stmt(isInUnspecifiedPointerContext(target));
   }
 
@@ -1036,9 +1034,7 @@ class UPCStandalonePointerGadget : public FixableGadget {
 
   virtual const Stmt *getBaseStmt() const override { return Node; }
 
-  virtual DeclUseList getClaimedVarUseSites() const override {
-    return {Node};
-  }
+  virtual DeclUseList getClaimedVarUseSites() const override { return {Node}; }
 };
 
 class PointerDereferenceGadget : public FixableGadget {
@@ -1103,10 +1099,10 @@ class UPCAddressofArraySubscriptGadget : public FixableGadget {
 
   static Matcher matcher() {
     return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
-        unaryOperator(hasOperatorName("&"),
-                      hasUnaryOperand(arraySubscriptExpr(
-                          hasBase(ignoringParenImpCasts(declRefExpr(
-                                                  toSupportedVariable()))))))
+        unaryOperator(
+            hasOperatorName("&"),
+            hasUnaryOperand(arraySubscriptExpr(hasBase(
+                ignoringParenImpCasts(declRefExpr(toSupportedVariable()))))))
             .bind(UPCAddressofArraySubscriptTag)))));
   }
 
@@ -1195,13 +1191,13 @@ class DeclUseTracker {
 class UPCPreIncrementGadget : public FixableGadget {
 private:
   static constexpr const char *const UPCPreIncrementTag =
-    "PointerPreIncrementUnderUPC";
+      "PointerPreIncrementUnderUPC";
   const UnaryOperator *Node; // the `++Ptr` node
 
 public:
   UPCPreIncrementGadget(const MatchFinder::MatchResult &Result)
-    : FixableGadget(Kind::UPCPreIncrement),
-      Node(Result.Nodes.getNodeAs(UPCPreIncrementTag)) {
+      : FixableGadget(Kind::UPCPreIncrement),
+        Node(Result.Nodes.getNodeAs(UPCPreIncrementTag)) {
     assert(Node != nullptr && "Expecting a non-null matching result");
   }
 
@@ -1215,10 +1211,9 @@ class UPCPreIncrementGadget : public FixableGadget {
     // can have the matcher be general, so long as `getClaimedVarUseSites` does
     // things right.
     return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
-								    unaryOperator(isPreInc(),
-										  hasUnaryOperand(declRefExpr(
-                                                    toSupportedVariable()))
-										  ).bind(UPCPreIncrementTag)))));
+        unaryOperator(isPreInc(),
+                      hasUnaryOperand(declRefExpr(toSupportedVariable())))
+            .bind(UPCPreIncrementTag)))));
   }
 
   virtual std::optional
@@ -1782,9 +1777,9 @@ static SourceRange getSourceRangeToTokenEnd(const Decl *D,
                                             const LangOptions &LangOpts) {
   SourceLocation Begin = D->getBeginLoc();
   SourceLocation
-    End = // `D->getEndLoc` should always return the starting location of the
-    // last token, so we should get the end of the token
-    Lexer::getLocForEndOfToken(D->getEndLoc(), 0, SM, LangOpts);
+      End = // `D->getEndLoc` should always return the starting location of the
+      // last token, so we should get the end of the token
+      Lexer::getLocForEndOfToken(D->getEndLoc(), 0, SM, LangOpts);
 
   return SourceRange(Begin, End);
 }
@@ -1976,7 +1971,7 @@ PointerDereferenceGadget::getFixits(const FixitStrategy &S) const {
     if (auto LocPastOperand =
             getPastLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts())) {
       return FixItList{{FixItHint::CreateRemoval(derefRange),
-			FixItHint::CreateInsertion(*LocPastOperand, "[0]")}};
+                        FixItHint::CreateInsertion(*LocPastOperand, "[0]")}};
     }
     break;
   }
@@ -2162,7 +2157,8 @@ FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx,
   // NULL pointer, we use the default constructor to initialize the span
   // object, i.e., a `std:span` variable declaration with no initializer.
   // So the fix-it is just to remove the initializer.
-  if (Init->isNullPointerConstant(Ctx,
+  if (Init->isNullPointerConstant(
+          Ctx,
           // FIXME: Why does this function not ask for `const ASTContext
           // &`? It should. Maybe worth an NFC patch later.
           Expr::NullPointerConstantValueDependence::
@@ -2230,8 +2226,10 @@ FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx,
 }
 
 #ifndef NDEBUG
-#define DEBUG_NOTE_DECL_FAIL(D, Msg)  \
-Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), "failed to produce fixit for declaration '" + (D)->getNameAsString() + "'" + (Msg))
+#define DEBUG_NOTE_DECL_FAIL(D, Msg)                                           \
+  Handler.addDebugNoteForVar((D), (D)->getBeginLoc(),                          \
+                             "failed to produce fixit for declaration '" +     \
+                                 (D)->getNameAsString() + "'" + (Msg))
 #else
 #define DEBUG_NOTE_DECL_FAIL(D, Msg)
 #endif
@@ -2239,8 +2237,8 @@ Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), "failed to produce fixit for
 // For the given variable declaration with a pointer-to-T type, returns the text
 // `std::span`.  If it is unable to generate the text, returns
 // `std::nullopt`.
-static std::optional createSpanTypeForVarDecl(const VarDecl *VD,
-                                                           const ASTContext &Ctx) {
+static std::optional
+createSpanTypeForVarDecl(const VarDecl *VD, const ASTContext &Ctx) {
   assert(VD->getType()->isPointerType());
 
   std::optional PteTyQualifiers = std::nullopt;
@@ -2277,8 +2275,8 @@ static std::optional createSpanTypeForVarDecl(const VarDecl *VD,
 //    the non-empty fix-it list, if fix-its are successfuly generated; empty
 //    list otherwise.
 static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx,
-					 const StringRef UserFillPlaceHolder,
-					 UnsafeBufferUsageHandler &Handler) {
+                                         const StringRef UserFillPlaceHolder,
+                                         UnsafeBufferUsageHandler &Handler) {
   if (hasUnsupportedSpecifiers(D, Ctx.getSourceManager()))
     return {};
 
@@ -2431,9 +2429,9 @@ createOverloadsForFixedParams(const FixitStrategy &S, const FunctionDecl *FD,
         // print parameter name if provided:
         if (IdentifierInfo *II = Parm->getIdentifier())
           SS << ' ' << II->getName().str();
-      } else if (auto ParmTypeText = getRangeText(
-                     getSourceRangeToTokenEnd(Parm, SM, LangOpts),
-                     SM, LangOpts)) {
+      } else if (auto ParmTypeText =
+                     getRangeText(getSourceRangeToTokenEnd(Parm, SM, LangOpts),
+                                  SM, LangOpts)) {
         // print the whole `Parm` without modification:
         SS << ParmTypeText->str();
       } else
@@ -2577,7 +2575,8 @@ static FixItList fixVariableWithSpan(const VarDecl *VD,
                                      UnsafeBufferUsageHandler &Handler) {
   const DeclStmt *DS = Tracker.lookupDecl(VD);
   if (!DS) {
-    DEBUG_NOTE_DECL_FAIL(VD, " : variables declared this way not implemented yet");
+    DEBUG_NOTE_DECL_FAIL(VD,
+                         " : variables declared this way not implemented yet");
     return {};
   }
   if (!DS->isSingleDecl()) {
@@ -2979,8 +2978,8 @@ void clang::checkUnsafeBufferUsage(const Decl *D,
 #endif
 
   assert(D && D->getBody());
-  // We do not want to visit a Lambda expression defined inside a method independently.
-  // Instead, it should be visited along with the outer method.
+  // We do not want to visit a Lambda expression defined inside a method
+  // independently. Instead, it should be visited along with the outer method.
   // FIXME: do we want to do the same thing for `BlockDecl`s?
   if (const auto *fd = dyn_cast(D)) {
     if (fd->getParent()->isLambda() && fd->getParent()->isLocalClass())
@@ -2990,7 +2989,7 @@ void clang::checkUnsafeBufferUsage(const Decl *D,
   // Do not emit fixit suggestions for functions declared in an
   // extern "C" block.
   if (const auto *FD = dyn_cast(D)) {
-      for (FunctionDecl *FReDecl : FD->redecls()) {
+    for (FunctionDecl *FReDecl : FD->redecls()) {
       if (FReDecl->isExternC()) {
         EmitSuggestions = false;
         break;
@@ -3002,7 +3001,7 @@ void clang::checkUnsafeBufferUsage(const Decl *D,
   FixableGadgetSets FixablesForAllVars;
 
   auto [FixableGadgets, WarningGadgets, Tracker] =
-    findGadgets(D, Handler, EmitSuggestions);
+      findGadgets(D, Handler, EmitSuggestions);
 
   if (!EmitSuggestions) {
     // Our job is very easy without suggestions. Just warn about
@@ -3055,36 +3054,36 @@ void clang::checkUnsafeBufferUsage(const Decl *D,
   // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
   for (auto it = FixablesForAllVars.byVar.cbegin();
        it != FixablesForAllVars.byVar.cend();) {
-      // FIXME: need to deal with global variables later
-      if ((!it->first->isLocalVarDecl() && !isa(it->first))) {
+    // FIXME: need to deal with global variables later
+    if ((!it->first->isLocalVarDecl() && !isa(it->first))) {
 #ifndef NDEBUG
-          Handler.addDebugNoteForVar(
-              it->first, it->first->getBeginLoc(),
-              ("failed to produce fixit for '" + it->first->getNameAsString() +
-               "' : neither local nor a parameter"));
+      Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(),
+                                 ("failed to produce fixit for '" +
+                                  it->first->getNameAsString() +
+                                  "' : neither local nor a parameter"));
 #endif
-        it = FixablesForAllVars.byVar.erase(it);
-      } else if (it->first->getType().getCanonicalType()->isReferenceType()) {
+      it = FixablesForAllVars.byVar.erase(it);
+    } else if (it->first->getType().getCanonicalType()->isReferenceType()) {
 #ifndef NDEBUG
-        Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(),
-                                   ("failed to produce fixit for '" +
-                                    it->first->getNameAsString() +
-                                    "' : has a reference type"));
+      Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(),
+                                 ("failed to produce fixit for '" +
+                                  it->first->getNameAsString() +
+                                  "' : has a reference type"));
 #endif
-        it = FixablesForAllVars.byVar.erase(it);
-      } else if (Tracker.hasUnclaimedUses(it->first)) {
-        it = FixablesForAllVars.byVar.erase(it);
-      } else if (it->first->isInitCapture()) {
+      it = FixablesForAllVars.byVar.erase(it);
+    } else if (Tracker.hasUnclaimedUses(it->first)) {
+      it = FixablesForAllVars.byVar.erase(it);
+    } else if (it->first->isInitCapture()) {
 #ifndef NDEBUG
-        Handler.addDebugNoteForVar(
-            it->first, it->first->getBeginLoc(),
-                                   ("failed to produce fixit for '" + it->first->getNameAsString() +
-                                    "' : init capture"));
+      Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(),
+                                 ("failed to produce fixit for '" +
+                                  it->first->getNameAsString() +
+                                  "' : init capture"));
 #endif
-        it = FixablesForAllVars.byVar.erase(it);
-      } else {
-        ++it;
-      }
+      it = FixablesForAllVars.byVar.erase(it);
+    } else {
+      ++it;
+    }
   }
 
 #ifndef NDEBUG
@@ -3115,7 +3114,7 @@ void clang::checkUnsafeBufferUsage(const Decl *D,
   for (auto it : FixablesForAllVars.byVar) {
     for (const FixableGadget *fixable : it.second) {
       std::optional> ImplPair =
-                                  fixable->getStrategyImplications();
+          fixable->getStrategyImplications();
       if (ImplPair) {
         std::pair Impl = std::move(*ImplPair);
         PtrAssignmentGraph[Impl.first].insert(Impl.second);
@@ -3144,10 +3143,10 @@ void clang::checkUnsafeBufferUsage(const Decl *D,
   for (const auto &[Var, ignore] : UnsafeOps.byVar) {
     if (VisitedVarsDirected.find(Var) == VisitedVarsDirected.end()) {
 
-      std::queue QueueDirected{};
+      std::queue QueueDirected{};
       QueueDirected.push(Var);
-      while(!QueueDirected.empty()) {
-        const VarDecl* CurrentVar = QueueDirected.front();
+      while (!QueueDirected.empty()) {
+        const VarDecl *CurrentVar = QueueDirected.front();
         QueueDirected.pop();
         VisitedVarsDirected.insert(CurrentVar);
         auto AdjacentNodes = PtrAssignmentGraph[CurrentVar];
@@ -3178,11 +3177,11 @@ void clang::checkUnsafeBufferUsage(const Decl *D,
   for (const auto &[Var, ignore] : UnsafeOps.byVar) {
     if (VisitedVars.find(Var) == VisitedVars.end()) {
       VarGrpTy &VarGroup = Groups.emplace_back();
-      std::queue Queue{};
+      std::queue Queue{};
 
       Queue.push(Var);
-      while(!Queue.empty()) {
-        const VarDecl* CurrentVar = Queue.front();
+      while (!Queue.empty()) {
+        const VarDecl *CurrentVar = Queue.front();
         Queue.pop();
         VisitedVars.insert(CurrentVar);
         VarGroup.push_back(CurrentVar);
diff --git a/clang/lib/CodeGen/ABIInfo.cpp b/clang/lib/CodeGen/ABIInfo.cpp
index 1b56cf7c596d06..efcff958ce5452 100644
--- a/clang/lib/CodeGen/ABIInfo.cpp
+++ b/clang/lib/CodeGen/ABIInfo.cpp
@@ -184,6 +184,58 @@ ABIArgInfo ABIInfo::getNaturalAlignIndirectInReg(QualType Ty,
                                       /*ByVal*/ false, Realign);
 }
 
+void ABIInfo::appendAttributeMangling(TargetAttr *Attr,
+                                      raw_ostream &Out) const {
+  if (Attr->isDefaultVersion())
+    return;
+  appendAttributeMangling(Attr->getFeaturesStr(), Out);
+}
+
+void ABIInfo::appendAttributeMangling(TargetVersionAttr *Attr,
+                                      raw_ostream &Out) const {
+  appendAttributeMangling(Attr->getNamesStr(), Out);
+}
+
+void ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
+                                      raw_ostream &Out) const {
+  appendAttributeMangling(Attr->getFeatureStr(Index), Out);
+  Out << '.' << Attr->getMangledIndex(Index);
+}
+
+void ABIInfo::appendAttributeMangling(StringRef AttrStr,
+                                      raw_ostream &Out) const {
+  if (AttrStr == "default") {
+    Out << ".default";
+    return;
+  }
+
+  Out << '.';
+  const TargetInfo &TI = CGT.getTarget();
+  ParsedTargetAttr Info = TI.parseTargetAttr(AttrStr);
+
+  llvm::sort(Info.Features, [&TI](StringRef LHS, StringRef RHS) {
+    // Multiversioning doesn't allow "no-${feature}", so we can
+    // only have "+" prefixes here.
+    assert(LHS.starts_with("+") && RHS.starts_with("+") &&
+           "Features should always have a prefix.");
+    return TI.multiVersionSortPriority(LHS.substr(1)) >
+           TI.multiVersionSortPriority(RHS.substr(1));
+  });
+
+  bool IsFirst = true;
+  if (!Info.CPU.empty()) {
+    IsFirst = false;
+    Out << "arch_" << Info.CPU;
+  }
+
+  for (StringRef Feat : Info.Features) {
+    if (!IsFirst)
+      Out << '_';
+    IsFirst = false;
+    Out << Feat.substr(1);
+  }
+}
+
 // Pin the vtable to this file.
 SwiftABIInfo::~SwiftABIInfo() = default;
 
diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h
index b9a5ef6e436693..ff4ae44a42c332 100644
--- a/clang/lib/CodeGen/ABIInfo.h
+++ b/clang/lib/CodeGen/ABIInfo.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_ABIINFO_H
 #define LLVM_CLANG_LIB_CODEGEN_ABIINFO_H
 
+#include "clang/AST/Attr.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/Type.h"
 #include "llvm/IR/CallingConv.h"
@@ -111,6 +112,15 @@ class ABIInfo {
 
   CodeGen::ABIArgInfo getNaturalAlignIndirectInReg(QualType Ty,
                                                    bool Realign = false) const;
+
+  virtual void appendAttributeMangling(TargetAttr *Attr,
+                                       raw_ostream &Out) const;
+  virtual void appendAttributeMangling(TargetVersionAttr *Attr,
+                                       raw_ostream &Out) const;
+  virtual void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
+                                       raw_ostream &Out) const;
+  virtual void appendAttributeMangling(StringRef AttrStr,
+                                       raw_ostream &Out) const;
 };
 
 /// Target specific hooks for defining how a type should be passed or returned
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index d05cf1c6e1814e..13f68237b464d6 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -3221,12 +3221,11 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
       llvm::StructType *STy =
           dyn_cast(ArgI.getCoerceToType());
-      llvm::TypeSize StructSize;
-      llvm::TypeSize PtrElementSize;
       if (ArgI.isDirect() && !ArgI.getCanBeFlattened() && STy &&
           STy->getNumElements() > 1) {
-        StructSize = CGM.getDataLayout().getTypeAllocSize(STy);
-        PtrElementSize =
+        [[maybe_unused]] llvm::TypeSize StructSize =
+            CGM.getDataLayout().getTypeAllocSize(STy);
+        [[maybe_unused]] llvm::TypeSize PtrElementSize =
             CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(Ty));
         if (STy->containsHomogeneousScalableVectorTypes()) {
           assert(StructSize == PtrElementSize &&
@@ -5310,12 +5309,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
       llvm::StructType *STy =
           dyn_cast(ArgInfo.getCoerceToType());
-      llvm::Type *SrcTy = ConvertTypeForMem(I->Ty);
-      llvm::TypeSize SrcTypeSize;
-      llvm::TypeSize DstTypeSize;
       if (STy && ArgInfo.isDirect() && !ArgInfo.getCanBeFlattened()) {
-        SrcTypeSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
-        DstTypeSize = CGM.getDataLayout().getTypeAllocSize(STy);
+        llvm::Type *SrcTy = ConvertTypeForMem(I->Ty);
+        [[maybe_unused]] llvm::TypeSize SrcTypeSize =
+            CGM.getDataLayout().getTypeAllocSize(SrcTy);
+        [[maybe_unused]] llvm::TypeSize DstTypeSize =
+            CGM.getDataLayout().getTypeAllocSize(STy);
         if (STy->containsHomogeneousScalableVectorTypes()) {
           assert(SrcTypeSize == DstTypeSize &&
                  "Only allow non-fractional movement of structure with "
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index d16d12fac8b03c..82a97ecfaa0078 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1727,59 +1727,6 @@ static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM,
     Out << ".resolver";
 }
 
-static void AppendTargetVersionMangling(const CodeGenModule &CGM,
-                                        const TargetVersionAttr *Attr,
-                                        raw_ostream &Out) {
-  if (Attr->isDefaultVersion()) {
-    Out << ".default";
-    return;
-  }
-  Out << "._";
-  const TargetInfo &TI = CGM.getTarget();
-  llvm::SmallVector Feats;
-  Attr->getFeatures(Feats);
-  llvm::stable_sort(Feats, [&TI](const StringRef FeatL, const StringRef FeatR) {
-    return TI.multiVersionSortPriority(FeatL) <
-           TI.multiVersionSortPriority(FeatR);
-  });
-  for (const auto &Feat : Feats) {
-    Out << 'M';
-    Out << Feat;
-  }
-}
-
-static void AppendTargetMangling(const CodeGenModule &CGM,
-                                 const TargetAttr *Attr, raw_ostream &Out) {
-  if (Attr->isDefaultVersion())
-    return;
-
-  Out << '.';
-  const TargetInfo &Target = CGM.getTarget();
-  ParsedTargetAttr Info = Target.parseTargetAttr(Attr->getFeaturesStr());
-  llvm::sort(Info.Features, [&Target](StringRef LHS, StringRef RHS) {
-    // Multiversioning doesn't allow "no-${feature}", so we can
-    // only have "+" prefixes here.
-    assert(LHS.starts_with("+") && RHS.starts_with("+") &&
-           "Features should always have a prefix.");
-    return Target.multiVersionSortPriority(LHS.substr(1)) >
-           Target.multiVersionSortPriority(RHS.substr(1));
-  });
-
-  bool IsFirst = true;
-
-  if (!Info.CPU.empty()) {
-    IsFirst = false;
-    Out << "arch_" << Info.CPU;
-  }
-
-  for (StringRef Feat : Info.Features) {
-    if (!IsFirst)
-      Out << '_';
-    IsFirst = false;
-    Out << Feat.substr(1);
-  }
-}
-
 // Returns true if GD is a function decl with internal linkage and
 // needs a unique suffix after the mangled name.
 static bool isUniqueInternalLinkageDecl(GlobalDecl GD,
@@ -1789,41 +1736,6 @@ static bool isUniqueInternalLinkageDecl(GlobalDecl GD,
          (CGM.getFunctionLinkage(GD) == llvm::GlobalValue::InternalLinkage);
 }
 
-static void AppendTargetClonesMangling(const CodeGenModule &CGM,
-                                       const TargetClonesAttr *Attr,
-                                       unsigned VersionIndex,
-                                       raw_ostream &Out) {
-  const TargetInfo &TI = CGM.getTarget();
-  if (TI.getTriple().isAArch64()) {
-    StringRef FeatureStr = Attr->getFeatureStr(VersionIndex);
-    if (FeatureStr == "default") {
-      Out << ".default";
-      return;
-    }
-    Out << "._";
-    SmallVector Features;
-    FeatureStr.split(Features, "+");
-    llvm::stable_sort(Features,
-                      [&TI](const StringRef FeatL, const StringRef FeatR) {
-                        return TI.multiVersionSortPriority(FeatL) <
-                               TI.multiVersionSortPriority(FeatR);
-                      });
-    for (auto &Feat : Features) {
-      Out << 'M';
-      Out << Feat;
-    }
-  } else {
-    Out << '.';
-    StringRef FeatureStr = Attr->getFeatureStr(VersionIndex);
-    if (FeatureStr.starts_with("arch="))
-      Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1);
-    else
-      Out << FeatureStr;
-
-    Out << '.' << Attr->getMangledIndex(VersionIndex);
-  }
-}
-
 static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
                                       const NamedDecl *ND,
                                       bool OmitMultiVersionMangling = false) {
@@ -1877,16 +1789,25 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
                                              FD->getAttr(),
                                              GD.getMultiVersionIndex(), Out);
         break;
-      case MultiVersionKind::Target:
-        AppendTargetMangling(CGM, FD->getAttr(), Out);
+      case MultiVersionKind::Target: {
+        auto *Attr = FD->getAttr();
+        const ABIInfo &Info = CGM.getTargetCodeGenInfo().getABIInfo();
+        Info.appendAttributeMangling(Attr, Out);
         break;
-      case MultiVersionKind::TargetVersion:
-        AppendTargetVersionMangling(CGM, FD->getAttr(), Out);
+      }
+      case MultiVersionKind::TargetVersion: {
+        auto *Attr = FD->getAttr();
+        const ABIInfo &Info = CGM.getTargetCodeGenInfo().getABIInfo();
+        Info.appendAttributeMangling(Attr, Out);
         break;
-      case MultiVersionKind::TargetClones:
-        AppendTargetClonesMangling(CGM, FD->getAttr(),
-                                   GD.getMultiVersionIndex(), Out);
+      }
+      case MultiVersionKind::TargetClones: {
+        auto *Attr = FD->getAttr();
+        unsigned Index = GD.getMultiVersionIndex();
+        const ABIInfo &Info = CGM.getTargetCodeGenInfo().getABIInfo();
+        Info.appendAttributeMangling(Attr, Index, Out);
         break;
+      }
       case MultiVersionKind::None:
         llvm_unreachable("None multiversion type isn't valid here");
       }
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 94f8e7be2ee6eb..adfdd516351901 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -9,6 +9,7 @@
 #include "ABIInfoImpl.h"
 #include "TargetInfo.h"
 #include "clang/Basic/DiagnosticFrontend.h"
+#include "llvm/TargetParser/AArch64TargetParser.h"
 
 using namespace clang;
 using namespace clang::CodeGen;
@@ -75,6 +76,12 @@ class AArch64ABIInfo : public ABIInfo {
   bool allowBFloatArgsAndRet() const override {
     return getTarget().hasBFloat16Type();
   }
+
+  using ABIInfo::appendAttributeMangling;
+  void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
+                               raw_ostream &Out) const override;
+  void appendAttributeMangling(StringRef AttrStr,
+                               raw_ostream &Out) const override;
 };
 
 class AArch64SwiftABIInfo : public SwiftABIInfo {
@@ -857,6 +864,39 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABI(
           << Callee->getDeclName();
 }
 
+void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
+                                             unsigned Index,
+                                             raw_ostream &Out) const {
+  appendAttributeMangling(Attr->getFeatureStr(Index), Out);
+}
+
+void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr,
+                                             raw_ostream &Out) const {
+  if (AttrStr == "default") {
+    Out << ".default";
+    return;
+  }
+
+  Out << "._";
+  SmallVector Features;
+  AttrStr.split(Features, "+");
+  for (auto &Feat : Features)
+    Feat = Feat.trim();
+
+  // FIXME: It was brought up in #79316 that sorting the features which are
+  // used for mangling based on their multiversion priority is not a good
+  // practice. Changing the feature priorities will break the ABI. Perhaps
+  // it would be preferable to perform a lexicographical sort instead.
+  const TargetInfo &TI = CGT.getTarget();
+  llvm::sort(Features, [&TI](const StringRef LHS, const StringRef RHS) {
+    return TI.multiVersionSortPriority(LHS) < TI.multiVersionSortPriority(RHS);
+  });
+
+  for (auto &Feat : Features)
+    if (auto Ext = llvm::AArch64::parseArchExtension(Feat))
+      Out << 'M' << Ext->Name;
+}
+
 std::unique_ptr
 CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM,
                                         AArch64ABIKind Kind) {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index dbfc729bba24c7..66c3a237c12117 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4479,10 +4479,9 @@ renderDebugOptions(const ToolChain &TC, const Driver &D, const llvm::Triple &T,
                       options::OPT_gpubnames, options::OPT_gno_pubnames);
   if (DwarfFission != DwarfFissionKind::None ||
       (PubnamesArg && checkDebugInfoOption(PubnamesArg, Args, D, TC)))
-    if (DebuggerTuning != llvm::DebuggerKind::LLDB &&
-        (!PubnamesArg ||
-         (!PubnamesArg->getOption().matches(options::OPT_gno_gnu_pubnames) &&
-          !PubnamesArg->getOption().matches(options::OPT_gno_pubnames))))
+    if (!PubnamesArg ||
+        (!PubnamesArg->getOption().matches(options::OPT_gno_gnu_pubnames) &&
+         !PubnamesArg->getOption().matches(options::OPT_gno_pubnames)))
       CmdArgs.push_back(PubnamesArg && PubnamesArg->getOption().matches(
                                            options::OPT_gpubnames)
                             ? "-gpubnames"
@@ -5959,7 +5958,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   if (Arg *A = Args.getLastArg(options::OPT_fbasic_block_address_map,
                                options::OPT_fno_basic_block_address_map)) {
-    if (Triple.isX86() && Triple.isOSBinFormatELF()) {
+    if ((Triple.isX86() || Triple.isAArch64()) && Triple.isOSBinFormatELF()) {
       if (A->getOption().matches(options::OPT_fbasic_block_address_map))
         A->render(Args, CmdArgs);
     } else {
diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h
index 34d1de0a060055..11e1e7d032586f 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -1306,75 +1306,15 @@ float min(float __x, float __y) { return __builtin_fminf(__x, __y); }
 __DEVICE__
 double min(double __x, double __y) { return __builtin_fmin(__x, __y); }
 
-// Define host min/max functions.
-#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) &&                  \
-    !defined(__HIP_NO_HOST_MIN_MAX_IN_GLOBAL_NAMESPACE__)
-
-#pragma push_macro("DEFINE_MIN_MAX_FUNCTIONS")
-#pragma push_macro("DEFINE_MIN_MAX_FUNCTIONS")
-#define DEFINE_MIN_MAX_FUNCTIONS(ret_type, type1, type2)                       \
-  inline ret_type min(const type1 __a, const type2 __b) {                      \
-    return (__a < __b) ? __a : __b;                                            \
-  }                                                                            \
-  inline ret_type max(const type1 __a, const type2 __b) {                      \
-    return (__a > __b) ? __a : __b;                                            \
-  }
-
-// Define min and max functions for same type comparisons
-DEFINE_MIN_MAX_FUNCTIONS(int, int, int)
-DEFINE_MIN_MAX_FUNCTIONS(unsigned int, unsigned int, unsigned int)
-DEFINE_MIN_MAX_FUNCTIONS(long, long, long)
-DEFINE_MIN_MAX_FUNCTIONS(unsigned long, unsigned long, unsigned long)
-DEFINE_MIN_MAX_FUNCTIONS(long long, long long, long long)
-DEFINE_MIN_MAX_FUNCTIONS(unsigned long long, unsigned long long,
-                         unsigned long long)
-
-// The host min/max functions below accept mixed signed/unsigned integer
-// parameters and perform unsigned comparisons, which may produce unexpected
-// results if a signed integer was passed unintentionally. To avoid this
-// happening silently, these overloaded functions are not defined by default.
-// However, for compatibility with CUDA, they will be defined if users define
-// __HIP_DEFINE_MIXED_HOST_MIN_MAX__.
-#ifdef __HIP_DEFINE_MIXED_HOST_MIN_MAX__
-DEFINE_MIN_MAX_FUNCTIONS(unsigned int, int, unsigned int)
-DEFINE_MIN_MAX_FUNCTIONS(unsigned int, unsigned int, int)
-DEFINE_MIN_MAX_FUNCTIONS(unsigned long, long, unsigned long)
-DEFINE_MIN_MAX_FUNCTIONS(unsigned long, unsigned long, long)
-DEFINE_MIN_MAX_FUNCTIONS(unsigned long long, long long, unsigned long long)
-DEFINE_MIN_MAX_FUNCTIONS(unsigned long long, unsigned long long, long long)
-#endif // ifdef __HIP_DEFINE_MIXED_HOST_MIN_MAX__
-
-// Floating-point comparisons using built-in functions
-inline float min(float const __a, float const __b) {
-  return __builtin_fminf(__a, __b);
-}
-inline double min(double const __a, double const __b) {
-  return __builtin_fmin(__a, __b);
-}
-inline double min(float const __a, double const __b) {
-  return __builtin_fmin(__a, __b);
-}
-inline double min(double const __a, float const __b) {
-  return __builtin_fmin(__a, __b);
+#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
+__host__ inline static int min(int __arg1, int __arg2) {
+  return __arg1 < __arg2 ? __arg1 : __arg2;
 }
 
-inline float max(float const __a, float const __b) {
-  return __builtin_fmaxf(__a, __b);
-}
-inline double max(double const __a, double const __b) {
-  return __builtin_fmax(__a, __b);
-}
-inline double max(float const __a, double const __b) {
-  return __builtin_fmax(__a, __b);
+__host__ inline static int max(int __arg1, int __arg2) {
+  return __arg1 > __arg2 ? __arg1 : __arg2;
 }
-inline double max(double const __a, float const __b) {
-  return __builtin_fmax(__a, __b);
-}
-
-#pragma pop_macro("DEFINE_MIN_MAX_FUNCTIONS")
-
-#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) &&
-       // !defined(__HIP_NO_HOST_MIN_MAX_IN_GLOBAL_NAMESPACE__)
+#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
 #endif
 
 #pragma pop_macro("__DEVICE__")
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 96e3ebdecbdf83..1d451b5f5b25de 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2099,9 +2099,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a,
 }
 
 /// Adds, with saturation, the corresponding elements of two 128-bit
-///    signed [16 x i8] vectors, saving each sum in the corresponding element of
-///    a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are
-///    saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80.
+///    signed [16 x i8] vectors, saving each sum in the corresponding element
+///    of a 128-bit result vector of [16 x i8].
+///
+///    Positive sums greater than 0x7F are saturated to 0x7F. Negative sums
+///    less than 0x80 are saturated to 0x80.
 ///
 /// \headerfile 
 ///
@@ -2119,10 +2121,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a,
 }
 
 /// Adds, with saturation, the corresponding elements of two 128-bit
-///    signed [8 x i16] vectors, saving each sum in the corresponding element of
-///    a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF
-///    are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
-///    0x8000.
+///    signed [8 x i16] vectors, saving each sum in the corresponding element
+///    of a 128-bit result vector of [8 x i16].
+///
+///    Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
+///    less than 0x8000 are saturated to 0x8000.
 ///
 /// \headerfile 
 ///
@@ -2141,8 +2144,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a,
 
 /// Adds, with saturation, the corresponding elements of two 128-bit
 ///    unsigned [16 x i8] vectors, saving each sum in the corresponding element
-///    of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF
-///    are saturated to 0xFF. Negative sums are saturated to 0x00.
+///    of a 128-bit result vector of [16 x i8].
+///
+///    Positive sums greater than 0xFF are saturated to 0xFF. Negative sums are
+///    saturated to 0x00.
 ///
 /// \headerfile 
 ///
@@ -2161,8 +2166,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a,
 
 /// Adds, with saturation, the corresponding elements of two 128-bit
 ///    unsigned [8 x i16] vectors, saving each sum in the corresponding element
-///    of a 128-bit result vector of [8 x i16]. Positive sums greater than
-///    0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000.
+///    of a 128-bit result vector of [8 x i16].
+///
+///    Positive sums greater than 0xFFFF are saturated to 0xFFFF. Negative sums
+///    are saturated to 0x0000.
 ///
 /// \headerfile 
 ///
@@ -2518,10 +2525,12 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a,
   return (__m128i)((__v2du)__a - (__v2du)__b);
 }
 
-/// Subtracts corresponding 8-bit signed integer values in the input and
-///    returns the differences in the corresponding bytes in the destination.
-///    Differences greater than 0x7F are saturated to 0x7F, and differences less
-///    than 0x80 are saturated to 0x80.
+/// Subtracts, with saturation, corresponding 8-bit signed integer values in
+///    the input and returns the differences in the corresponding bytes in the
+///    destination.
+///
+///    Differences greater than 0x7F are saturated to 0x7F, and differences
+///    less than 0x80 are saturated to 0x80.
 ///
 /// \headerfile 
 ///
@@ -2538,8 +2547,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a,
   return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b);
 }
 
-/// Subtracts corresponding 16-bit signed integer values in the input and
-///    returns the differences in the corresponding bytes in the destination.
+/// Subtracts, with saturation, corresponding 16-bit signed integer values in
+///    the input and returns the differences in the corresponding bytes in the
+///    destination.
+///
 ///    Differences greater than 0x7FFF are saturated to 0x7FFF, and values less
 ///    than 0x8000 are saturated to 0x8000.
 ///
@@ -2558,9 +2569,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a,
   return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b);
 }
 
-/// Subtracts corresponding 8-bit unsigned integer values in the input
-///    and returns the differences in the corresponding bytes in the
-///    destination. Differences less than 0x00 are saturated to 0x00.
+/// Subtracts, with saturation, corresponding 8-bit unsigned integer values in
+///    the input and returns the differences in the corresponding bytes in the
+///    destination.
+///
+///    Differences less than 0x00 are saturated to 0x00.
 ///
 /// \headerfile 
 ///
@@ -2577,9 +2590,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a,
   return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b);
 }
 
-/// Subtracts corresponding 16-bit unsigned integer values in the input
-///    and returns the differences in the corresponding bytes in the
-///    destination. Differences less than 0x0000 are saturated to 0x0000.
+/// Subtracts, with saturation, corresponding 16-bit unsigned integer values in
+///    the input and returns the differences in the corresponding bytes in the
+///    destination.
+///
+///    Differences less than 0x0000 are saturated to 0x0000.
 ///
 /// \headerfile 
 ///
@@ -4050,26 +4065,22 @@ void _mm_mfence(void);
 } // extern "C"
 #endif
 
-/// Converts 16-bit signed integers from both 128-bit integer vector
-///    operands into 8-bit signed integers, and packs the results into the
-///    destination. Positive values greater than 0x7F are saturated to 0x7F.
-///    Negative values less than 0x80 are saturated to 0x80.
+/// Converts, with saturation, 16-bit signed integers from both 128-bit integer
+///    vector operands into 8-bit signed integers, and packs the results into
+///    the destination.
+///
+///    Positive values greater than 0x7F are saturated to 0x7F. Negative values
+///    less than 0x80 are saturated to 0x80.
 ///
 /// \headerfile 
 ///
 /// This intrinsic corresponds to the  VPACKSSWB / PACKSSWB  instruction.
 ///
 /// \param __a
-///   A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
-///   a signed integer and is converted to a 8-bit signed integer with
-///   saturation. Values greater than 0x7F are saturated to 0x7F. Values less
-///   than 0x80 are saturated to 0x80. The converted [8 x i8] values are
+///   A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
 ///   written to the lower 64 bits of the result.
 /// \param __b
-///   A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
-///   a signed integer and is converted to a 8-bit signed integer with
-///   saturation. Values greater than 0x7F are saturated to 0x7F. Values less
-///   than 0x80 are saturated to 0x80. The converted [8 x i8] values are
+///   A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
 ///   written to the higher 64 bits of the result.
 /// \returns A 128-bit vector of [16 x i8] containing the converted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a,
@@ -4077,26 +4088,22 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a,
   return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
 }
 
-/// Converts 32-bit signed integers from both 128-bit integer vector
-///    operands into 16-bit signed integers, and packs the results into the
-///    destination. Positive values greater than 0x7FFF are saturated to 0x7FFF.
-///    Negative values less than 0x8000 are saturated to 0x8000.
+/// Converts, with saturation, 32-bit signed integers from both 128-bit integer
+///    vector operands into 16-bit signed integers, and packs the results into
+///    the destination.
+///
+///    Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative
+///    values less than 0x8000 are saturated to 0x8000.
 ///
 /// \headerfile 
 ///
 /// This intrinsic corresponds to the  VPACKSSDW / PACKSSDW  instruction.
 ///
 /// \param __a
-///    A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as
-///    a signed integer and is converted to a 16-bit signed integer with
-///    saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values
-///    less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values
+///    A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values
 ///    are written to the lower 64 bits of the result.
 /// \param __b
-///    A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as
-///    a signed integer and is converted to a 16-bit signed integer with
-///    saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values
-///    less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values
+///    A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values
 ///    are written to the higher 64 bits of the result.
 /// \returns A 128-bit vector of [8 x i16] containing the converted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a,
@@ -4104,26 +4111,22 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a,
   return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
 }
 
-/// Converts 16-bit signed integers from both 128-bit integer vector
-///    operands into 8-bit unsigned integers, and packs the results into the
-///    destination. Values greater than 0xFF are saturated to 0xFF. Values less
-///    than 0x00 are saturated to 0x00.
+/// Converts, with saturation, 16-bit signed integers from both 128-bit integer
+///    vector operands into 8-bit unsigned integers, and packs the results into
+///    the destination.
+///
+///    Values greater than 0xFF are saturated to 0xFF. Values less than 0x00
+///    are saturated to 0x00.
 ///
 /// \headerfile 
 ///
 /// This intrinsic corresponds to the  VPACKUSWB / PACKUSWB  instruction.
 ///
 /// \param __a
-///    A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
-///    a signed integer and is converted to an 8-bit unsigned integer with
-///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
-///    than 0x00 are saturated to 0x00. The converted [8 x i8] values are
+///    A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
 ///    written to the lower 64 bits of the result.
 /// \param __b
-///    A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
-///    a signed integer and is converted to an 8-bit unsigned integer with
-///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
-///    than 0x00 are saturated to 0x00. The converted [8 x i8] values are
+///    A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
 ///    written to the higher 64 bits of the result.
 /// \returns A 128-bit vector of [16 x i8] containing the converted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a,
diff --git a/clang/lib/Headers/fmaintrin.h b/clang/lib/Headers/fmaintrin.h
index ea832fac4f9922..22d1a780bbfd4e 100644
--- a/clang/lib/Headers/fmaintrin.h
+++ b/clang/lib/Headers/fmaintrin.h
@@ -60,7 +60,8 @@ _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 
 /// Computes a scalar multiply-add of the single-precision values in the
 ///    low 32 bits of 128-bit vectors of [4 x float].
-/// \code
+///
+/// \code{.operation}
 /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
 /// result[127:32] = __A[127:32]
 /// \endcode
@@ -88,7 +89,8 @@ _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 
 /// Computes a scalar multiply-add of the double-precision values in the
 ///    low 64 bits of 128-bit vectors of [2 x double].
-/// \code
+///
+/// \code{.operation}
 /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
 /// result[127:64] = __A[127:64]
 /// \endcode
@@ -156,7 +158,8 @@ _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 
 /// Computes a scalar multiply-subtract of the single-precision values in
 ///    the low 32 bits of 128-bit vectors of [4 x float].
-/// \code
+///
+/// \code{.operation}
 /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
 /// result[127:32] = __A[127:32]
 /// \endcode
@@ -184,7 +187,8 @@ _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 
 /// Computes a scalar multiply-subtract of the double-precision values in
 ///    the low 64 bits of 128-bit vectors of [2 x double].
-/// \code
+///
+/// \code{.operation}
 /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
 /// result[127:64] = __A[127:64]
 /// \endcode
@@ -252,7 +256,8 @@ _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 
 /// Computes a scalar negated multiply-add of the single-precision values in
 ///    the low 32 bits of 128-bit vectors of [4 x float].
-/// \code
+///
+/// \code{.operation}
 /// result[31:0] = -(__A[31:0] * __B[31:0]) + __C[31:0]
 /// result[127:32] = __A[127:32]
 /// \endcode
@@ -280,7 +285,8 @@ _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 
 /// Computes a scalar negated multiply-add of the double-precision values
 ///    in the low 64 bits of 128-bit vectors of [2 x double].
-/// \code
+///
+/// \code{.operation}
 /// result[63:0] = -(__A[63:0] * __B[63:0]) + __C[63:0]
 /// result[127:64] = __A[127:64]
 /// \endcode
@@ -348,7 +354,8 @@ _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 
 /// Computes a scalar negated multiply-subtract of the single-precision
 ///    values in the low 32 bits of 128-bit vectors of [4 x float].
-/// \code
+///
+/// \code{.operation}
 /// result[31:0] = -(__A[31:0] * __B[31:0]) - __C[31:0]
 /// result[127:32] = __A[127:32]
 /// \endcode
@@ -376,7 +383,8 @@ _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 
 /// Computes a scalar negated multiply-subtract of the double-precision
 ///    values in the low 64 bits of 128-bit vectors of [2 x double].
-/// \code
+///
+/// \code{.operation}
 /// result[63:0] = -(__A[63:0] * __B[63:0]) - __C[63:0]
 /// result[127:64] = __A[127:64]
 /// \endcode
@@ -404,7 +412,8 @@ _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 
 /// Computes a multiply with alternating add/subtract of 128-bit vectors of
 ///    [4 x float].
-/// \code
+///
+/// \code{.operation}
 /// result[31:0]  = (__A[31:0] * __B[31:0]) - __C[31:0]
 /// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32]
 /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
@@ -430,7 +439,8 @@ _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
 
 /// Computes a multiply with alternating add/subtract of 128-bit vectors of
 ///    [2 x double].
-/// \code
+///
+/// \code{.operation}
 /// result[63:0]  = (__A[63:0] * __B[63:0]) - __C[63:0]
 /// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64]
 /// \endcode
@@ -454,7 +464,8 @@ _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
 
 /// Computes a multiply with alternating add/subtract of 128-bit vectors of
 ///    [4 x float].
-/// \code
+///
+/// \code{.operation}
 /// result[31:0]  = (__A[31:0] * __B[31:0]) + __C[31:0]
 /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
 /// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64]
@@ -480,7 +491,8 @@ _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
 
 /// Computes a multiply with alternating add/subtract of 128-bit vectors of
 ///    [2 x double].
-/// \code
+///
+/// \code{.operation}
 /// result[63:0]  = (__A[63:0] * __B[63:0]) + __C[63:0]
 /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
 /// \endcode
@@ -664,7 +676,8 @@ _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 
 /// Computes a multiply with alternating add/subtract of 256-bit vectors of
 ///    [8 x float].
-/// \code
+///
+/// \code{.operation}
 /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
 /// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32]
 /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
@@ -694,7 +707,8 @@ _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
 
 /// Computes a multiply with alternating add/subtract of 256-bit vectors of
 ///    [4 x double].
-/// \code
+///
+/// \code{.operation}
 /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
 /// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64]
 /// result[191:128] = (__A[191:128] * __B[191:128]) - __C[191:128]
@@ -720,7 +734,8 @@ _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
 
 /// Computes a vector multiply with alternating add/subtract of 256-bit
 ///    vectors of [8 x float].
-/// \code
+///
+/// \code{.operation}
 /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
 /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
 /// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64]
@@ -750,7 +765,8 @@ _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
 
 /// Computes a vector multiply with alternating add/subtract of 256-bit
 ///    vectors of [4 x double].
-/// \code
+///
+/// \code{.operation}
 /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
 /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
 /// result[191:128] = (__A[191:128] * __B[191:128]) + __C[191:128]
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index 08849f01071aea..962d24738e7aa4 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -105,28 +105,23 @@ _mm_cvtm64_si64(__m64 __m)
     return (long long)__m;
 }
 
-/// Converts 16-bit signed integers from both 64-bit integer vector
-///    parameters of [4 x i16] into 8-bit signed integer values, and constructs
-///    a 64-bit integer vector of [8 x i8] as the result. Positive values
-///    greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
-///    are saturated to 0x80.
+/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
+///    vector parameters of [4 x i16] into 8-bit signed integer values, and
+///    constructs a 64-bit integer vector of [8 x i8] as the result.
+///
+///    Positive values greater than 0x7F are saturated to 0x7F. Negative values
+///    less than 0x80 are saturated to 0x80.
 ///
 /// \headerfile 
 ///
 /// This intrinsic corresponds to the  PACKSSWB  instruction.
 ///
 /// \param __m1
-///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
-///    16-bit signed integer and is converted to an 8-bit signed integer with
-///    saturation. Positive values greater than 0x7F are saturated to 0x7F.
-///    Negative values less than 0x80 are saturated to 0x80. The converted
-///    [4 x i8] values are written to the lower 32 bits of the result.
+///    A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
+///    written to the lower 32 bits of the result.
 /// \param __m2
-///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
-///    16-bit signed integer and is converted to an 8-bit signed integer with
-///    saturation. Positive values greater than 0x7F are saturated to 0x7F.
-///    Negative values less than 0x80 are saturated to 0x80. The converted
-///    [4 x i8] values are written to the upper 32 bits of the result.
+///    A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
+///    written to the upper 32 bits of the result.
 /// \returns A 64-bit integer vector of [8 x i8] containing the converted
 ///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -135,28 +130,23 @@ _mm_packs_pi16(__m64 __m1, __m64 __m2)
     return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
 }
 
-/// Converts 32-bit signed integers from both 64-bit integer vector
-///    parameters of [2 x i32] into 16-bit signed integer values, and constructs
-///    a 64-bit integer vector of [4 x i16] as the result. Positive values
-///    greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
-///    0x8000 are saturated to 0x8000.
+/// Converts, with saturation, 32-bit signed integers from both 64-bit integer
+///    vector parameters of [2 x i32] into 16-bit signed integer values, and
+///    constructs a 64-bit integer vector of [4 x i16] as the result.
+///
+///    Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative
+///    values less than 0x8000 are saturated to 0x8000.
 ///
 /// \headerfile 
 ///
 /// This intrinsic corresponds to the  PACKSSDW  instruction.
 ///
 /// \param __m1
-///    A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
-///    32-bit signed integer and is converted to a 16-bit signed integer with
-///    saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
-///    Negative values less than 0x8000 are saturated to 0x8000. The converted
-///    [2 x i16] values are written to the lower 32 bits of the result.
+///    A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
+///    written to the lower 32 bits of the result.
 /// \param __m2
-///    A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
-///    32-bit signed integer and is converted to a 16-bit signed integer with
-///    saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
-///    Negative values less than 0x8000 are saturated to 0x8000. The converted
-///    [2 x i16] values are written to the upper 32 bits of the result.
+///    A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
+///    written to the upper 32 bits of the result.
 /// \returns A 64-bit integer vector of [4 x i16] containing the converted
 ///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -165,28 +155,23 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2)
     return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
 }
 
-/// Converts 16-bit signed integers from both 64-bit integer vector
-///    parameters of [4 x i16] into 8-bit unsigned integer values, and
-///    constructs a 64-bit integer vector of [8 x i8] as the result. Values
-///    greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
-///    to 0.
+/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
+///    vector parameters of [4 x i16] into 8-bit unsigned integer values, and
+///    constructs a 64-bit integer vector of [8 x i8] as the result.
+///
+///    Values greater than 0xFF are saturated to 0xFF. Values less than 0 are
+///    saturated to 0.
 ///
 /// \headerfile 
 ///
 /// This intrinsic corresponds to the  PACKUSWB  instruction.
 ///
 /// \param __m1
-///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
-///    16-bit signed integer and is converted to an 8-bit unsigned integer with
-///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
-///    than 0 are saturated to 0. The converted [4 x i8] values are written to
-///    the lower 32 bits of the result.
+///    A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
+///    written to the lower 32 bits of the result.
 /// \param __m2
-///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
-///    16-bit signed integer and is converted to an 8-bit unsigned integer with
-///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
-///    than 0 are saturated to 0. The converted [4 x i8] values are written to
-///    the upper 32 bits of the result.
+///    A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
+///    written to the upper 32 bits of the result.
 /// \returns A 64-bit integer vector of [8 x i8] containing the converted
 ///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -400,11 +385,13 @@ _mm_add_pi32(__m64 __m1, __m64 __m2)
     return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
 }
 
-/// Adds each 8-bit signed integer element of the first 64-bit integer
-///    vector of [8 x i8] to the corresponding 8-bit signed integer element of
-///    the second 64-bit integer vector of [8 x i8]. Positive sums greater than
-///    0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
-///    0x80. The results are packed into a 64-bit integer vector of [8 x i8].
+/// Adds, with saturation, each 8-bit signed integer element of the first
+///    64-bit integer vector of [8 x i8] to the corresponding 8-bit signed
+///    integer element of the second 64-bit integer vector of [8 x i8].
+///
+///    Positive sums greater than 0x7F are saturated to 0x7F. Negative sums
+///    less than 0x80 are saturated to 0x80. The results are packed into a
+///    64-bit integer vector of [8 x i8].
 ///
 /// \headerfile 
 ///
@@ -422,12 +409,13 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2)
     return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-/// Adds each 16-bit signed integer element of the first 64-bit integer
-///    vector of [4 x i16] to the corresponding 16-bit signed integer element of
-///    the second 64-bit integer vector of [4 x i16]. Positive sums greater than
-///    0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
-///    saturated to 0x8000. The results are packed into a 64-bit integer vector
-///    of [4 x i16].
+/// Adds, with saturation, each 16-bit signed integer element of the first
+///    64-bit integer vector of [4 x i16] to the corresponding 16-bit signed
+///    integer element of the second 64-bit integer vector of [4 x i16].
+///
+///    Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
+///    less than 0x8000 are saturated to 0x8000. The results are packed into a
+///    64-bit integer vector of [4 x i16].
 ///
 /// \headerfile 
 ///
@@ -445,11 +433,12 @@ _mm_adds_pi16(__m64 __m1, __m64 __m2)
     return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-/// Adds each 8-bit unsigned integer element of the first 64-bit integer
-///    vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
-///    the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
-///    saturated to 0xFF. The results are packed into a 64-bit integer vector of
-///    [8 x i8].
+/// Adds, with saturation, each 8-bit unsigned integer element of the first
+///    64-bit integer vector of [8 x i8] to the corresponding 8-bit unsigned
+///    integer element of the second 64-bit integer vector of [8 x i8].
+///
+///    Sums greater than 0xFF are saturated to 0xFF. The results are packed
+///    into a 64-bit integer vector of [8 x i8].
 ///
 /// \headerfile 
 ///
@@ -467,11 +456,12 @@ _mm_adds_pu8(__m64 __m1, __m64 __m2)
     return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-/// Adds each 16-bit unsigned integer element of the first 64-bit integer
-///    vector of [4 x i16] to the corresponding 16-bit unsigned integer element
-///    of the second 64-bit integer vector of [4 x i16]. Sums greater than
-///    0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
-///    integer vector of [4 x i16].
+/// Adds, with saturation, each 16-bit unsigned integer element of the first
+///    64-bit integer vector of [4 x i16] to the corresponding 16-bit unsigned
+///    integer element of the second 64-bit integer vector of [4 x i16].
+///
+///    Sums greater than 0xFFFF are saturated to 0xFFFF. The results are packed
+///    into a 64-bit integer vector of [4 x i16].
 ///
 /// \headerfile 
 ///
@@ -552,12 +542,13 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2)
     return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
 }
 
-/// Subtracts each 8-bit signed integer element of the second 64-bit
-///    integer vector of [8 x i8] from the corresponding 8-bit signed integer
-///    element of the first 64-bit integer vector of [8 x i8]. Positive results
-///    greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
-///    are saturated to 0x80. The results are packed into a 64-bit integer
-///    vector of [8 x i8].
+/// Subtracts, with saturation, each 8-bit signed integer element of the second
+///    64-bit integer vector of [8 x i8] from the corresponding 8-bit signed
+///    integer element of the first 64-bit integer vector of [8 x i8].
+///
+///    Positive results greater than 0x7F are saturated to 0x7F. Negative
+///    results less than 0x80 are saturated to 0x80. The results are packed
+///    into a 64-bit integer vector of [8 x i8].
 ///
 /// \headerfile 
 ///
@@ -575,12 +566,13 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2)
     return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-/// Subtracts each 16-bit signed integer element of the second 64-bit
-///    integer vector of [4 x i16] from the corresponding 16-bit signed integer
-///    element of the first 64-bit integer vector of [4 x i16]. Positive results
-///    greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
-///    0x8000 are saturated to 0x8000. The results are packed into a 64-bit
-///    integer vector of [4 x i16].
+/// Subtracts, with saturation, each 16-bit signed integer element of the
+///    second 64-bit integer vector of [4 x i16] from the corresponding 16-bit
+///    signed integer element of the first 64-bit integer vector of [4 x i16].
+///
+///    Positive results greater than 0x7FFF are saturated to 0x7FFF. Negative
+///    results less than 0x8000 are saturated to 0x8000. The results are packed
+///    into a 64-bit integer vector of [4 x i16].
 ///
 /// \headerfile 
 ///
diff --git a/clang/lib/Headers/prfchwintrin.h b/clang/lib/Headers/prfchwintrin.h
index d2f91aa0123e0e..8a13784543c5f1 100644
--- a/clang/lib/Headers/prfchwintrin.h
+++ b/clang/lib/Headers/prfchwintrin.h
@@ -15,9 +15,10 @@
 #define __PRFCHWINTRIN_H
 
 /// Loads a memory sequence containing the specified memory address into
-///    all data cache levels. The cache-coherency state is set to exclusive.
-///    Data can be read from and written to the cache line without additional
-///    delay.
+///    all data cache levels.
+///
+///    The cache-coherency state is set to exclusive. Data can be read from
+///    and written to the cache line without additional delay.
 ///
 /// \headerfile 
 ///
@@ -32,10 +33,11 @@ _m_prefetch(void *__P)
 }
 
 /// Loads a memory sequence containing the specified memory address into
-///    the L1 data cache and sets the cache-coherency to modified. This
-///    provides a hint to the processor that the cache line will be modified.
-///    It is intended for use when the cache line will be written to shortly
-///    after the prefetch is performed.
+///    the L1 data cache and sets the cache-coherency state to modified.
+///
+///    This provides a hint to the processor that the cache line will be
+///    modified. It is intended for use when the cache line will be written to
+///    shortly after the prefetch is performed.
 ///
 ///    Note that the effect of this intrinsic is dependent on the processor
 ///    implementation.
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 005d7db9c3c308..c52ffb77e33d5c 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -1431,8 +1431,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) {
 }
 
 /* SSE4 Pack with Unsigned Saturation.  */
-/// Converts 32-bit signed integers from both 128-bit integer vector
-///    operands into 16-bit unsigned integers, and returns the packed result.
+/// Converts, with saturation, 32-bit signed integers from both 128-bit integer
+///    vector operands into 16-bit unsigned integers, and returns the packed
+///    result.
+///
 ///    Values greater than 0xFFFF are saturated to 0xFFFF. Values less than
 ///    0x0000 are saturated to 0x0000.
 ///
@@ -1441,17 +1443,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) {
 /// This intrinsic corresponds to the  VPACKUSDW / PACKUSDW  instruction.
 ///
 /// \param __V1
-///    A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a
-///    signed integer and is converted to a 16-bit unsigned integer with
-///    saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values
-///    less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values
-///    are written to the lower 64 bits of the result.
+///    A 128-bit vector of [4 x i32]. The converted [4 x i16] values are
+///    written to the lower 64 bits of the result.
 /// \param __V2
-///    A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a
-///    signed integer and is converted to a 16-bit unsigned integer with
-///    saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values
-///    less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values
-///    are written to the higher 64 bits of the result.
+///    A 128-bit vector of [4 x i32]. The converted [4 x i16] values are
+///    written to the higher 64 bits of the result.
 /// \returns A 128-bit vector of [8 x i16] containing the converted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1,
                                                               __m128i __V2) {
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index 7d8dc46c57bfeb..bf8327b692d1cf 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -271,10 +271,11 @@ _mm_hadd_pi32(__m64 __a, __m64 __b)
     return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
 }
 
-/// Horizontally adds the adjacent pairs of values contained in 2 packed
-///    128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
-///    saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
-///    0x8000.
+/// Horizontally adds, with saturation, the adjacent pairs of values contained
+///    in two packed 128-bit vectors of [8 x i16].
+///
+///    Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
+///    less than 0x8000 are saturated to 0x8000.
 ///
 /// \headerfile 
 ///
@@ -296,10 +297,11 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
     return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-/// Horizontally adds the adjacent pairs of values contained in 2 packed
-///    64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
-///    saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
-///    0x8000.
+/// Horizontally adds, with saturation, the adjacent pairs of values contained
+///    in two packed 64-bit vectors of [4 x i16].
+///
+///    Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
+///    less than 0x8000 are saturated to 0x8000.
 ///
 /// \headerfile 
 ///
@@ -413,10 +415,11 @@ _mm_hsub_pi32(__m64 __a, __m64 __b)
     return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
 }
 
-/// Horizontally subtracts the adjacent pairs of values contained in 2
-///    packed 128-bit vectors of [8 x i16]. Positive differences greater than
-///    0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
-///    saturated to 0x8000.
+/// Horizontally subtracts, with saturation, the adjacent pairs of values
+///    contained in two packed 128-bit vectors of [8 x i16].
+///
+///    Positive differences greater than 0x7FFF are saturated to 0x7FFF.
+///    Negative differences less than 0x8000 are saturated to 0x8000.
 ///
 /// \headerfile 
 ///
@@ -438,10 +441,11 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
     return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-/// Horizontally subtracts the adjacent pairs of values contained in 2
-///    packed 64-bit vectors of [4 x i16]. Positive differences greater than
-///    0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
-///    saturated to 0x8000.
+/// Horizontally subtracts, with saturation, the adjacent pairs of values
+///    contained in two packed 64-bit vectors of [4 x i16].
+///
+///    Positive differences greater than 0x7FFF are saturated to 0x7FFF.
+///    Negative differences less than 0x8000 are saturated to 0x8000.
 ///
 /// \headerfile 
 ///
diff --git a/clang/lib/InstallAPI/CMakeLists.txt b/clang/lib/InstallAPI/CMakeLists.txt
index fdc4f064f29e95..19fc4c3abde53a 100644
--- a/clang/lib/InstallAPI/CMakeLists.txt
+++ b/clang/lib/InstallAPI/CMakeLists.txt
@@ -1,11 +1,14 @@
 set(LLVM_LINK_COMPONENTS
   Support
   TextAPI
+  Core
   )
 
 add_clang_library(clangInstallAPI
   FileList.cpp
+  Frontend.cpp
   HeaderFile.cpp
+  Visitor.cpp
 
   LINK_LIBS
   clangAST
diff --git a/clang/lib/InstallAPI/Frontend.cpp b/clang/lib/InstallAPI/Frontend.cpp
new file mode 100644
index 00000000000000..9f675ef7d1bd22
--- /dev/null
+++ b/clang/lib/InstallAPI/Frontend.cpp
@@ -0,0 +1,58 @@
+//===- Frontend.cpp ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/InstallAPI/Frontend.h"
+#include "clang/AST/Availability.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace llvm;
+using namespace llvm::MachO;
+
+namespace clang::installapi {
+
+static StringRef getFileExtension(clang::Language Lang) {
+  switch (Lang) {
+  default:
+    llvm_unreachable("Unexpected language option.");
+  case clang::Language::C:
+    return ".c";
+  case clang::Language::CXX:
+    return ".cpp";
+  case clang::Language::ObjC:
+    return ".m";
+  case clang::Language::ObjCXX:
+    return ".mm";
+  }
+}
+
+std::unique_ptr createInputBuffer(const InstallAPIContext &Ctx) {
+  assert(Ctx.Type != HeaderType::Unknown &&
+         "unexpected access level for parsing");
+  SmallString<4096> Contents;
+  raw_svector_ostream OS(Contents);
+  for (const HeaderFile &H : Ctx.InputHeaders) {
+    if (H.getType() != Ctx.Type)
+      continue;
+    if (Ctx.LangMode == Language::C || Ctx.LangMode == Language::CXX)
+      OS << "#include ";
+    else
+      OS << "#import ";
+    if (H.useIncludeName())
+      OS << "<" << H.getIncludeName() << ">";
+    else
+      OS << "\"" << H.getPath() << "\"";
+  }
+  if (Contents.empty())
+    return nullptr;
+
+  return llvm::MemoryBuffer::getMemBufferCopy(
+      Contents, "installapi-includes" + getFileExtension(Ctx.LangMode));
+}
+
+} // namespace clang::installapi
diff --git a/clang/lib/InstallAPI/Visitor.cpp b/clang/lib/InstallAPI/Visitor.cpp
new file mode 100644
index 00000000000000..9b333a6142ae8f
--- /dev/null
+++ b/clang/lib/InstallAPI/Visitor.cpp
@@ -0,0 +1,94 @@
+//===- Visitor.cpp ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/InstallAPI/Visitor.h"
+#include "clang/AST/Availability.h"
+#include "clang/Basic/Linkage.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Mangler.h"
+
+using namespace llvm;
+using namespace llvm::MachO;
+
+namespace clang::installapi {
+
+// Exported NamedDecl needs to have externally visibiliy linkage and
+// default visibility from LinkageComputer.
+static bool isExported(const NamedDecl *D) {
+  auto LV = D->getLinkageAndVisibility();
+  return isExternallyVisible(LV.getLinkage()) &&
+         (LV.getVisibility() == DefaultVisibility);
+}
+
+static SymbolFlags getFlags(bool WeakDef, bool ThreadLocal) {
+  SymbolFlags Result = SymbolFlags::None;
+  if (WeakDef)
+    Result |= SymbolFlags::WeakDefined;
+  if (ThreadLocal)
+    Result |= SymbolFlags::ThreadLocalValue;
+
+  return Result;
+}
+
+void InstallAPIVisitor::HandleTranslationUnit(ASTContext &ASTCtx) {
+  if (ASTCtx.getDiagnostics().hasErrorOccurred())
+    return;
+
+  auto *D = ASTCtx.getTranslationUnitDecl();
+  TraverseDecl(D);
+}
+
+std::string InstallAPIVisitor::getMangledName(const NamedDecl *D) const {
+  SmallString<256> Name;
+  if (MC->shouldMangleDeclName(D)) {
+    raw_svector_ostream NStream(Name);
+    MC->mangleName(D, NStream);
+  } else
+    Name += D->getNameAsString();
+
+  return getBackendMangledName(Name);
+}
+
+std::string InstallAPIVisitor::getBackendMangledName(Twine Name) const {
+  SmallString<256> FinalName;
+  Mangler::getNameWithPrefix(FinalName, Name, DataLayout(Layout));
+  return std::string(FinalName);
+}
+
+/// Collect all global variables.
+bool InstallAPIVisitor::VisitVarDecl(const VarDecl *D) {
+  // Skip function parameters.
+  if (isa(D))
+    return true;
+
+  // Skip variables in records. They are handled seperately for C++.
+  if (D->getDeclContext()->isRecord())
+    return true;
+
+  // Skip anything inside functions or methods.
+  if (!D->isDefinedOutsideFunctionOrMethod())
+    return true;
+
+  // If this is a template but not specialization or instantiation, skip.
+  if (D->getASTContext().getTemplateOrSpecializationInfo(D) &&
+      D->getTemplateSpecializationKind() == TSK_Undeclared)
+    return true;
+
+  // TODO: Capture SourceLocation & Availability for Decls.
+  const RecordLinkage Linkage =
+      isExported(D) ? RecordLinkage::Exported : RecordLinkage::Internal;
+  const bool WeakDef = D->hasAttr();
+  const bool ThreadLocal = D->getTLSKind() != VarDecl::TLS_None;
+  Slice.addGlobal(getMangledName(D), Linkage, GlobalRecord::Kind::Variable,
+                  getFlags(WeakDef, ThreadLocal));
+  return true;
+}
+
+} // namespace clang::installapi
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
index defd83ec8e179c..01b191ab0eeaf4 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
@@ -310,8 +310,12 @@ class TrivialFunctionAnalysisVisitor
         return true;
       if (isa(decl))
         return true;
-      if (auto *VD = dyn_cast(decl))
-        return VD->hasConstantInitialization() && VD->getEvaluatedValue();
+      if (auto *VD = dyn_cast(decl)) {
+        if (VD->hasConstantInitialization() && VD->getEvaluatedValue())
+          return true;
+        auto *Init = VD->getInit();
+        return !Init || Visit(Init);
+      }
     }
     return false;
   }
diff --git a/clang/test/AST/Interp/cxx11.cpp b/clang/test/AST/Interp/cxx11.cpp
index 0a1e0f3fd28e9d..993e3618a37848 100644
--- a/clang/test/AST/Interp/cxx11.cpp
+++ b/clang/test/AST/Interp/cxx11.cpp
@@ -22,3 +22,11 @@ int array2[recurse2]; // both-warning {{variable length arrays in C++}} \
                       // both-note {{initializer of 'recurse2' is not a constant expression}} \
                       // expected-error {{variable length array declaration not allowed at file scope}} \
                       // ref-warning {{variable length array folded to constant array as an extension}}
+
+struct S {
+  int m;
+};
+constexpr S s = { 5 };
+constexpr const int *p = &s.m + 1;
+
+constexpr const int *np2 = &(*(int(*)[4])nullptr)[0]; // ok
diff --git a/clang/test/AST/Interp/cxx20.cpp b/clang/test/AST/Interp/cxx20.cpp
index 5c9c6257965108..000ffe39eb94a7 100644
--- a/clang/test/AST/Interp/cxx20.cpp
+++ b/clang/test/AST/Interp/cxx20.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -fcxx-exceptions -fexperimental-new-constant-interpreter -std=c++20 -verify %s
-// RUN: %clang_cc1 -fcxx-exceptions -std=c++20 -verify=ref %s
+// RUN: %clang_cc1 -fcxx-exceptions -fexperimental-new-constant-interpreter -std=c++20 -verify=both,expected -fcxx-exceptions %s
+// RUN: %clang_cc1 -fcxx-exceptions -std=c++20 -verify=both,ref -fcxx-exceptions %s
 
 void test_alignas_operand() {
   alignas(8) char dummy;
@@ -58,13 +58,10 @@ static_assert(pointerAssign2() == 12, "");
 
 constexpr int unInitLocal() {
   int a;
-  return a; // ref-note {{read of uninitialized object}} \
-            // expected-note {{read of uninitialized object}}
+  return a; // both-note {{read of uninitialized object}}
 }
-static_assert(unInitLocal() == 0, ""); // ref-error {{not an integral constant expression}} \
-                                       // ref-note {{in call to 'unInitLocal()'}} \
-                                       // expected-error {{not an integral constant expression}} \
-                                       // expected-note {{in call to 'unInitLocal()'}} \
+static_assert(unInitLocal() == 0, ""); // both-error {{not an integral constant expression}} \
+                                       // both-note {{in call to 'unInitLocal()'}}
 
 constexpr int initializedLocal() {
   int a;
@@ -75,25 +72,19 @@ static_assert(initializedLocal() == 20);
 
 constexpr int initializedLocal2() {
   int a[2];
-  return *a; // expected-note {{read of uninitialized object is not allowed in a constant expression}} \
-             // ref-note {{read of uninitialized object is not allowed in a constant expression}}
+  return *a; // both-note {{read of uninitialized object is not allowed in a constant expression}}
 }
-static_assert(initializedLocal2() == 20); // expected-error {{not an integral constant expression}} \
-                                          // expected-note {{in call to}} \
-                                          // ref-error {{not an integral constant expression}} \
-                                          // ref-note {{in call to}}
+static_assert(initializedLocal2() == 20); // both-error {{not an integral constant expression}} \
+                                          // both-note {{in call to}}
 
 
 struct Int { int a; };
 constexpr int initializedLocal3() {
   Int i;
-  return i.a; // ref-note {{read of uninitialized object is not allowed in a constant expression}} \
-              // expected-note {{read of uninitialized object}}
+  return i.a; // both-note {{read of uninitialized object is not allowed in a constant expression}}
 }
-static_assert(initializedLocal3() == 20); // expected-error {{not an integral constant expression}} \
-                                          // expected-note {{in call to}} \
-                                          // ref-error {{not an integral constant expression}} \
-                                          // ref-note {{in call to}}
+static_assert(initializedLocal3() == 20); // both-error {{not an integral constant expression}} \
+                                          // both-note {{in call to}}
 
 
 
@@ -137,22 +128,16 @@ static_assert(!b4); // ref-error {{not an integral constant expression}} \
 namespace UninitializedFields {
   class A {
   public:
-    int a; // expected-note 4{{subobject declared here}} \
-           // ref-note 4{{subobject declared here}}
+    int a; // both-note 4{{subobject declared here}}
     constexpr A() {}
   };
-  constexpr A a; // expected-error {{must be initialized by a constant expression}} \
-                 // expected-note {{subobject 'a' is not initialized}} \
-                 // ref-error {{must be initialized by a constant expression}} \
-                 // ref-note {{subobject 'a' is not initialized}}
-  constexpr A aarr[2]; // expected-error {{must be initialized by a constant expression}} \
-                       // expected-note {{subobject 'a' is not initialized}} \
-                       // ref-error {{must be initialized by a constant expression}} \
-                       // ref-note {{subobject 'a' is not initialized}}
+  constexpr A a; // both-error {{must be initialized by a constant expression}} \
+                 // both-note {{subobject 'a' is not initialized}}
+  constexpr A aarr[2]; // both-error {{must be initialized by a constant expression}} \
+                       // both-note {{subobject 'a' is not initialized}}
   class F {
     public:
-      int f; // expected-note 3{{subobject declared here}} \
-             // ref-note 3{{subobject declared here}}
+      int f; // both-note 3{{subobject declared here}}
 
       constexpr F() {}
       constexpr F(bool b) {
@@ -161,26 +146,19 @@ namespace UninitializedFields {
       }
   };
 
-  constexpr F foo[2] = {true}; // expected-error {{must be initialized by a constant expression}} \
-                               // expected-note {{subobject 'f' is not initialized}} \
-                               // ref-error {{must be initialized by a constant expression}} \
-                               // ref-note {{subobject 'f' is not initialized}}
-  constexpr F foo2[3] = {true, false, true}; // expected-error {{must be initialized by a constant expression}} \
-                                             // expected-note {{subobject 'f' is not initialized}} \
-                                             // ref-error {{must be initialized by a constant expression}} \
-                                             // ref-note {{subobject 'f' is not initialized}}
-  constexpr F foo3[3] = {true, true, F()}; // expected-error {{must be initialized by a constant expression}} \
-                                           // expected-note {{subobject 'f' is not initialized}} \
-                                           // ref-error {{must be initialized by a constant expression}} \
-                                           // ref-note {{subobject 'f' is not initialized}}
+  constexpr F foo[2] = {true}; // both-error {{must be initialized by a constant expression}} \
+                               // both-note {{subobject 'f' is not initialized}}
+  constexpr F foo2[3] = {true, false, true}; // both-error {{must be initialized by a constant expression}} \
+                                             // both-note {{subobject 'f' is not initialized}}
+  constexpr F foo3[3] = {true, true, F()}; // both-error {{must be initialized by a constant expression}} \
+                                           // both-note {{subobject 'f' is not initialized}}
 
 
 
   class Base {
   public:
     bool b;
-    int a; // expected-note {{subobject declared here}} \
-           // ref-note {{subobject declared here}}
+    int a; // both-note {{subobject declared here}}
     constexpr Base() : b(true) {}
   };
 
@@ -188,56 +166,44 @@ namespace UninitializedFields {
   public:
     constexpr Derived() : Base() {}   };
 
-  constexpr Derived D; // expected-error {{must be initialized by a constant expression}} \
-                       // expected-note {{subobject 'a' is not initialized}} \
-                       // ref-error {{must be initialized by a constant expression}} \
-                       // ref-note {{subobject 'a' is not initialized}}
+  constexpr Derived D; // both-error {{must be initialized by a constant expression}} \
+                       // both-note {{subobject 'a' is not initialized}}
 
   class C2 {
   public:
     A a;
     constexpr C2() {}   };
-  constexpr C2 c2; // expected-error {{must be initialized by a constant expression}} \
-                   // expected-note {{subobject 'a' is not initialized}} \
-                   // ref-error {{must be initialized by a constant expression}} \
-                   // ref-note {{subobject 'a' is not initialized}}
+  constexpr C2 c2; // both-error {{must be initialized by a constant expression}} \
+                   // both-note {{subobject 'a' is not initialized}}
 
   class C3 {
   public:
     A a[2];
     constexpr C3() {}
   };
-  constexpr C3 c3; // expected-error {{must be initialized by a constant expression}} \
-                   // expected-note {{subobject 'a' is not initialized}} \
-                   // ref-error {{must be initialized by a constant expression}} \
-                   // ref-note {{subobject 'a' is not initialized}}
+  constexpr C3 c3; // both-error {{must be initialized by a constant expression}} \
+                   // both-note {{subobject 'a' is not initialized}}
 
   class C4 {
   public:
-    bool B[2][3]; // expected-note {{subobject declared here}} \
-                  // ref-note {{subobject declared here}}
+    bool B[2][3]; // both-note {{subobject declared here}}
     constexpr C4(){}
   };
-  constexpr C4 c4; // expected-error {{must be initialized by a constant expression}} \
-                   // expected-note {{subobject 'B' is not initialized}} \
-                   // ref-error {{must be initialized by a constant expression}} \
-                   // ref-note {{subobject 'B' is not initialized}}
+  constexpr C4 c4; // both-error {{must be initialized by a constant expression}} \
+                   // both-note {{subobject 'B' is not initialized}}
 };
 
 namespace ConstThis {
   class Foo {
-    const int T = 12; // expected-note {{declared const here}} \
-                      // ref-note {{declared const here}}
+    const int T = 12; // both-note {{declared const here}}
     int a;
   public:
     constexpr Foo() {
       this->a = 10;
-      T = 13; // expected-error {{cannot assign to non-static data member 'T' with const-qualified type}} \
-              // ref-error {{cannot assign to non-static data member 'T' with const-qualified type}}
+      T = 13; // both-error {{cannot assign to non-static data member 'T' with const-qualified type}}
     }
   };
-  constexpr Foo F; // expected-error {{must be initialized by a constant expression}} \
-                   // ref-error {{must be initialized by a constant expression}}
+  constexpr Foo F; // both-error {{must be initialized by a constant expression}}
 
 
   class FooDtor {
@@ -264,8 +230,7 @@ namespace ConstThis {
     constexpr ctor_test() {
       if (Good)
         a = 10;
-      int local = 100 / a; // expected-note {{division by zero}} \
-                           // ref-note {{division by zero}}
+      int local = 100 / a; // both-note {{division by zero}}
     }
   };
 
@@ -277,22 +242,17 @@ namespace ConstThis {
     constexpr ~dtor_test() {
       if (Good)
         a = 10;
-      int local = 100 / a; // expected-note {{division by zero}} \
-                           // ref-note {{division by zero}}
+      int local = 100 / a; // both-note {{division by zero}}
     }
   };
 
   constexpr ctor_test good_ctor;
   constexpr dtor_test good_dtor;
 
-  constexpr ctor_test bad_ctor; // expected-error {{must be initialized by a constant expression}} \
-                                       // expected-note {{in call to}} \
-                                       // ref-error {{must be initialized by a constant expression}} \
-                                       // ref-note {{in call to}}
-  constexpr dtor_test bad_dtor; // expected-error {{must have constant destruction}} \
-                                       // expected-note {{in call to}} \
-                                       // ref-error {{must have constant destruction}} \
-                                       // ref-note {{in call to}}
+  constexpr ctor_test bad_ctor; // both-error {{must be initialized by a constant expression}} \
+                                       // both-note {{in call to}}
+  constexpr dtor_test bad_dtor; // both-error {{must have constant destruction}} \
+                                       // both-note {{in call to}}
 };
 
 namespace BaseInit {
@@ -311,10 +271,8 @@ namespace BaseInit {
   };
 
   static_assert(Final{1, 2, 3}.c == 3, ""); // OK
-  static_assert(Final{1, 2, 3}.a == 0, ""); // expected-error {{not an integral constant expression}} \
-                                            // expected-note {{read of uninitialized object}} \
-                                            // ref-error {{not an integral constant expression}} \
-                                            // ref-note {{read of uninitialized object}}
+  static_assert(Final{1, 2, 3}.a == 0, ""); // both-error {{not an integral constant expression}} \
+                                            // both-note {{read of uninitialized object}}
 
 
   struct Mixin  {
@@ -333,10 +291,8 @@ namespace BaseInit {
 
   static_assert(Final2{1, 2, 3}.c == 3, ""); // OK
   static_assert(Final2{1, 2, 3}.b == 2, ""); // OK
-  static_assert(Final2{1, 2, 3}.a == 0, ""); // expected-error {{not an integral constant expression}} \
-                                             // expected-note {{read of uninitialized object}} \
-                                             // ref-error {{not an integral constant expression}} \
-                                             // ref-note {{read of uninitialized object}}
+  static_assert(Final2{1, 2, 3}.a == 0, ""); // both-error {{not an integral constant expression}} \
+                                             // both-note {{read of uninitialized object}}
 
 
   struct Mixin3  {
@@ -352,10 +308,8 @@ namespace BaseInit {
 
   static_assert(Final3{1, 2, 3}.c == 3, ""); // OK
   static_assert(Final3{1, 2, 3}.b == 2, ""); // OK
-  static_assert(Final3{1, 2, 3}.a == 0, ""); // expected-error {{not an integral constant expression}} \
-                                             // expected-note {{read of uninitialized object}} \
-                                             // ref-error {{not an integral constant expression}} \
-                                             // ref-note {{read of uninitialized object}}
+  static_assert(Final3{1, 2, 3}.a == 0, ""); // both-error {{not an integral constant expression}} \
+                                             // both-note {{read of uninitialized object}}
 };
 
 namespace Destructors {
@@ -633,16 +587,13 @@ namespace ImplicitFunction {
 
    /// The operator= call here will fail and the diagnostics should be fine.
    b = a; // ref-note {{subobject 'a' is not initialized}} \
-          // ref-note {{in call to}} \
           // expected-note {{read of uninitialized object}} \
-          // expected-note {{in call to}}
+          // both-note {{in call to}}
 
    return 1;
   }
-  static_assert(callMe() == 1, ""); // ref-error {{not an integral constant expression}} \
-                                    // ref-note {{in call to 'callMe()'}} \
-                                    // expected-error {{not an integral constant expression}} \
-                                    // expected-note {{in call to 'callMe()'}}
+  static_assert(callMe() == 1, ""); // both-error {{not an integral constant expression}} \
+                                    // both-note {{in call to 'callMe()'}}
 }
 
 /// FIXME: Unfortunately, the similar tests in test/SemaCXX/{compare-cxx2a.cpp use member pointers,
@@ -680,8 +631,7 @@ namespace ThreeWayCmp {
   static_assert(1.0 <=> 2.f == -1, "");
   static_assert(1.0 <=> 1.0 == 0, "");
   static_assert(2.0 <=> 1.0 == 1, "");
-  constexpr int k = (1 <=> 1, 0); // expected-warning {{comparison result unused}} \
-                                  // ref-warning {{comparison result unused}}
+  constexpr int k = (1 <=> 1, 0); // both-warning {{comparison result unused}}
   static_assert(k== 0, "");
 
   /// Pointers.
@@ -690,10 +640,8 @@ namespace ThreeWayCmp {
   constexpr const int *pa1 = &a[1];
   constexpr const int *pa2 = &a[2];
   constexpr const int *pb1 = &b[1];
-  static_assert(pa1 <=> pb1 != 0, ""); // expected-error {{not an integral constant expression}} \
-                                       // expected-note {{has unspecified value}} \
-                                       // ref-error {{not an integral constant expression}} \
-                                       // ref-note {{has unspecified value}}
+  static_assert(pa1 <=> pb1 != 0, ""); // both-error {{not an integral constant expression}} \
+                                       // both-note {{has unspecified value}} \
   static_assert(pa1 <=> pa1 == 0, "");
   static_assert(pa1 <=> pa2 == -1, "");
   static_assert(pa2 <=> pa1 == 1, "");
@@ -799,3 +747,30 @@ void f2() {
                        // access info for unnamed bit-field
 }
 }
+
+namespace FailingDestructor {
+  struct D {
+    int n;
+    bool can_destroy;
+
+    constexpr ~D() {
+      if (!can_destroy)
+        throw "oh no";
+    }
+  };
+  template
+  void f() {} // both-note {{invalid explicitly-specified argument}}
+
+  void g() {
+    f(); // both-error {{no matching function}}
+  }
+}
+
+
+void overflowInSwitchCase(int n) {
+  switch (n) {
+  case (int)(float)1e300: // both-error {{constant expression}} \
+                          // both-note {{value +Inf is outside the range of representable values of type 'int'}}
+    break;
+  }
+}
diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp
index ac16a31293f3de..80a9a263dab140 100644
--- a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp
+++ b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp
@@ -199,6 +199,8 @@ class RefCounted {
   bool trivial23() const { return OptionSet::fromRaw(v).contains(Flags::Flag1); }
   int trivial24() const { ASSERT(v); return v; }
   unsigned trivial25() const { return __c11_atomic_load((volatile _Atomic(unsigned) *)&v, __ATOMIC_RELAXED); }
+  bool trivial26() { bool hasValue = v; return !hasValue; }
+  bool trivial27(int v) { bool value; value = v ? 1 : 0; return value; }
 
   static RefCounted& singleton() {
     static RefCounted s_RefCounted;
@@ -262,6 +264,15 @@ class RefCounted {
     return __c11_atomic_load((volatile _Atomic(unsigned) *)another(), __ATOMIC_RELAXED);
   }
 
+  void nonTrivial11() {
+    Number num(0.3);
+  }
+
+  bool nonTrivial12() {
+    bool val = otherFunction();
+    return val;
+  }
+
   unsigned v { 0 };
   Number* number { nullptr };
   Enum enumValue { Enum::Value1 };
@@ -309,6 +320,8 @@ class UnrelatedClass {
     getFieldTrivial().trivial23(); // no-warning
     getFieldTrivial().trivial24(); // no-warning
     getFieldTrivial().trivial25(); // no-warning
+    getFieldTrivial().trivial26(); // no-warning
+    getFieldTrivial().trivial27(5); // no-warning
     RefCounted::singleton().trivial18(); // no-warning
     RefCounted::singleton().someFunction(); // no-warning
 
@@ -334,6 +347,10 @@ class UnrelatedClass {
     // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}}
     getFieldTrivial().nonTrivial10();
     // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}}
+    getFieldTrivial().nonTrivial11();
+    // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}}
+    getFieldTrivial().nonTrivial12();
+    // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}}
   }
 };
 
diff --git a/clang/test/Driver/basic-block-address-map.c b/clang/test/Driver/basic-block-address-map.c
index 022f972b412d6b..12393e8ebfd54e 100644
--- a/clang/test/Driver/basic-block-address-map.c
+++ b/clang/test/Driver/basic-block-address-map.c
@@ -1,8 +1,9 @@
-// RUN: %clang -### -target x86_64 -fbasic-block-address-map %s -S 2>&1 | FileCheck -check-prefix=CHECK-PRESENT %s
+// RUN: %clang -### --target=x86_64 -fbasic-block-address-map %s -S 2>&1 | FileCheck -check-prefix=CHECK-PRESENT %s
+// RUN: %clang -### --target=aarch64 -fbasic-block-address-map %s -S 2>&1 | FileCheck -check-prefix=CHECK-PRESENT %s
 // CHECK-PRESENT: -fbasic-block-address-map
 
-// RUN: %clang -### -target x86_64 -fno-basic-block-address-map %s -S 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT
+// RUN: %clang -### --target=x86_64 -fno-basic-block-address-map %s -S 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT
 // CHECK-ABSENT-NOT: -fbasic-block-address-map
 
-// RUN: not %clang -c -target x86_64-apple-darwin10 -fbasic-block-address-map %s -S 2>&1 | FileCheck -check-prefix=CHECK-TRIPLE %s
+// RUN: not %clang -c --target=x86_64-apple-darwin10 -fbasic-block-address-map %s -S 2>&1 | FileCheck -check-prefix=CHECK-TRIPLE %s
 // CHECK-TRIPLE: error: unsupported option '-fbasic-block-address-map' for target
diff --git a/clang/test/Driver/split-debug.c b/clang/test/Driver/split-debug.c
index a2a3dc02354503..968f33b4cc035c 100644
--- a/clang/test/Driver/split-debug.c
+++ b/clang/test/Driver/split-debug.c
@@ -124,8 +124,3 @@
 // G1_NOSPLIT: "-debug-info-kind=line-tables-only"
 // G1_NOSPLIT-NOT: "-split-dwarf-file"
 // G1_NOSPLIT-NOT: "-split-dwarf-output"
-
-/// Do not generate -ggnu-pubnames for -glldb
-// RUN: %clang -### -c -target x86_64 -gsplit-dwarf -g -glldb %s 2>&1 | FileCheck %s --check-prefixes=GLLDBSPLIT
-
-// GLLDBSPLIT-NOT: "-ggnu-pubnames"
diff --git a/clang/test/InstallAPI/basic.test b/clang/test/InstallAPI/basic.test
index 22b04792ca2c30..5b41ccd517b0af 100644
--- a/clang/test/InstallAPI/basic.test
+++ b/clang/test/InstallAPI/basic.test
@@ -16,6 +16,11 @@
 // CHECK-NOT: warning:  
 
 //--- basic_inputs.json
+{
+  "headers": [
+  ],
+  "version": "3"
+}
 
 //--- expected.tbd
 {
diff --git a/clang/test/InstallAPI/variables.test b/clang/test/InstallAPI/variables.test
new file mode 100644
index 00000000000000..6272867911f18b
--- /dev/null
+++ b/clang/test/InstallAPI/variables.test
@@ -0,0 +1,63 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|SRC_DIR|%/t|g" %t/vars_inputs.json.in > %t/vars_inputs.json
+
+/// Check multiple targets are captured.
+// RUN: clang-installapi -target arm64-apple-macos13.1 -target arm64e-apple-macos13.1 \
+// RUN: -fapplication-extension -install_name /usr/lib/vars.dylib \
+// RUN: %t/vars_inputs.json -o %t/vars.tbd 2>&1 | FileCheck %s --allow-empty
+// RUN: llvm-readtapi -compare %t/vars.tbd %t/expected.tbd 2>&1 | FileCheck %s --allow-empty
+
+// CHECK-NOT: error:  
+// CHECK-NOT: warning:  
+
+//--- vars.h
+extern int foo;
+
+//--- vars_inputs.json.in
+{
+  "headers": [ {
+    "path" : "SRC_DIR/vars.h",
+    "type" : "public"
+  }],
+  "version": "3"
+}
+
+//--- expected.tbd
+{
+  "main_library": {
+    "compatibility_versions": [
+      {
+        "version": "0"
+      }],
+    "current_versions": [
+      {
+        "version": "0"
+      }],
+    "install_names": [
+      {
+        "name": "/usr/lib/vars.dylib"
+      }
+    ],
+    "exported_symbols": [
+      {
+        "data": {
+          "global": [
+            "_foo"
+          ]
+        }
+      }
+    ],
+    "target_info": [
+      {
+        "min_deployment": "13.1",
+        "target": "arm64-macos"
+      },
+      {
+        "min_deployment": "13.1",
+        "target": "arm64e-macos"
+      }
+    ]
+  },
+  "tapi_tbd_version": 5
+}
diff --git a/clang/tools/clang-installapi/CMakeLists.txt b/clang/tools/clang-installapi/CMakeLists.txt
index b8384c92c104f6..e05f4eac3ad198 100644
--- a/clang/tools/clang-installapi/CMakeLists.txt
+++ b/clang/tools/clang-installapi/CMakeLists.txt
@@ -14,6 +14,7 @@ add_clang_tool(clang-installapi
 
 clang_target_link_libraries(clang-installapi
   PRIVATE
+  clangAST
   clangInstallAPI
   clangBasic
   clangDriver
diff --git a/clang/tools/clang-installapi/ClangInstallAPI.cpp b/clang/tools/clang-installapi/ClangInstallAPI.cpp
index fc23ffd7ae6b9c..43c9fca0a82eec 100644
--- a/clang/tools/clang-installapi/ClangInstallAPI.cpp
+++ b/clang/tools/clang-installapi/ClangInstallAPI.cpp
@@ -12,12 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "Options.h"
-#include "clang/Basic/DiagnosticIDs.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticFrontend.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
-#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Driver/Tool.h"
 #include "clang/Frontend/TextDiagnosticPrinter.h"
-#include "clang/InstallAPI/Context.h"
+#include "clang/InstallAPI/Frontend.h"
+#include "clang/Tooling/Tooling.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Option/Option.h"
 #include "llvm/Support/CommandLine.h"
@@ -27,7 +29,9 @@
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/TargetParser/Host.h"
+#include "llvm/TextAPI/RecordVisitor.h"
 #include "llvm/TextAPI/TextAPIWriter.h"
+#include 
 
 using namespace clang;
 using namespace clang::installapi;
@@ -35,6 +39,36 @@ using namespace clang::driver::options;
 using namespace llvm::opt;
 using namespace llvm::MachO;
 
+static bool runFrontend(StringRef ProgName, bool Verbose,
+                        const InstallAPIContext &Ctx,
+                        llvm::vfs::InMemoryFileSystem *FS,
+                        const ArrayRef InitialArgs) {
+
+  std::unique_ptr ProcessedInput = createInputBuffer(Ctx);
+  // Skip invoking cc1 when there are no header inputs.
+  if (!ProcessedInput)
+    return true;
+
+  if (Verbose)
+    llvm::errs() << getName(Ctx.Type) << " Headers:\n"
+                 << ProcessedInput->getBuffer() << "\n\n";
+
+  std::string InputFile = ProcessedInput->getBufferIdentifier().str();
+  FS->addFile(InputFile, /*ModTime=*/0, std::move(ProcessedInput));
+  // Reconstruct arguments with unique values like target triple or input
+  // headers.
+  std::vector Args = {ProgName.data(), "-target",
+                                   Ctx.Slice->getTriple().str().c_str()};
+  llvm::copy(InitialArgs, std::back_inserter(Args));
+  Args.push_back(InputFile);
+
+  // Create & run invocation.
+  clang::tooling::ToolInvocation Invocation(
+      std::move(Args), std::make_unique(*Ctx.Slice), Ctx.FM);
+
+  return Invocation.run();
+}
+
 static bool run(ArrayRef Args, const char *ProgName) {
   // Setup Diagnostics engine.
   IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions();
@@ -48,9 +82,15 @@ static bool run(ArrayRef Args, const char *ProgName) {
       new clang::DiagnosticIDs(), DiagOpts.get(),
       new clang::TextDiagnosticPrinter(llvm::errs(), DiagOpts.get()));
 
-  // Create file manager for all file operations.
+  // Create file manager for all file operations and holding in-memory generated
+  // inputs.
+  llvm::IntrusiveRefCntPtr OverlayFileSystem(
+      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
+  llvm::IntrusiveRefCntPtr InMemoryFileSystem(
+      new llvm::vfs::InMemoryFileSystem);
+  OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   IntrusiveRefCntPtr FM(
-      new FileManager(clang::FileSystemOptions()));
+      new FileManager(clang::FileSystemOptions(), OverlayFileSystem));
 
   // Set up driver to parse input arguments.
   auto DriverArgs = llvm::ArrayRef(Args).slice(1);
@@ -71,7 +111,10 @@ static bool run(ArrayRef Args, const char *ProgName) {
   Options Opts(*Diag, FM.get(), ArgList);
   if (Diag->hasErrorOccurred())
     return EXIT_FAILURE;
+
   InstallAPIContext Ctx = Opts.createContext();
+  if (Diag->hasErrorOccurred())
+    return EXIT_FAILURE;
 
   // Set up compilation.
   std::unique_ptr CI(new CompilerInstance());
@@ -80,6 +123,21 @@ static bool run(ArrayRef Args, const char *ProgName) {
   if (!CI->hasDiagnostics())
     return EXIT_FAILURE;
 
+  // Execute and gather AST results.
+  llvm::MachO::Records FrontendResults;
+  for (const auto &[Targ, Trip] : Opts.DriverOpts.Targets) {
+    for (const HeaderType Type :
+         {HeaderType::Public, HeaderType::Private, HeaderType::Project}) {
+      Ctx.Slice = std::make_shared(Trip);
+      Ctx.Type = Type;
+      if (!runFrontend(ProgName, Opts.DriverOpts.Verbose, Ctx,
+                       InMemoryFileSystem.get(), Opts.getClangFrontendArgs()))
+        return EXIT_FAILURE;
+      FrontendResults.emplace_back(std::move(Ctx.Slice));
+    }
+  }
+
+  // After symbols have been collected, prepare to write output.
   auto Out = CI->createOutputFile(Ctx.OutputLoc, /*Binary=*/false,
                                   /*RemoveFileOnSignal=*/false,
                                   /*UseTemporary=*/false,
@@ -88,7 +146,13 @@ static bool run(ArrayRef Args, const char *ProgName) {
     return EXIT_FAILURE;
 
   // Assign attributes for serialization.
-  InterfaceFile IF;
+  auto Symbols = std::make_unique();
+  for (const auto &FR : FrontendResults) {
+    SymbolConverter Converter(Symbols.get(), FR->getTarget());
+    FR->visit(Converter);
+  }
+
+  InterfaceFile IF(std::move(Symbols));
   for (const auto &TargetInfo : Opts.DriverOpts.Targets) {
     IF.addTarget(TargetInfo.first);
     IF.setFromBinaryAttrs(Ctx.BA, TargetInfo.first);
diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp
index 562a643edfcf40..7d45e999448d9f 100644
--- a/clang/tools/clang-installapi/Options.cpp
+++ b/clang/tools/clang-installapi/Options.cpp
@@ -9,6 +9,7 @@
 #include "Options.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
+#include "clang/InstallAPI/FileList.h"
 #include "llvm/Support/Program.h"
 #include "llvm/TargetParser/Host.h"
 
@@ -68,6 +69,8 @@ bool Options::processDriverOptions(InputArgList &Args) {
     }
   }
 
+  DriverOpts.Verbose = Args.hasArgNoClaim(OPT_v);
+
   return true;
 }
 
@@ -104,10 +107,21 @@ Options::Options(DiagnosticsEngine &Diag, FileManager *FM,
 
   if (!processLinkerOptions(ArgList))
     return;
+
+  /// Any remaining arguments should be handled by invoking the clang frontend.
+  for (const Arg *A : ArgList) {
+    if (A->isClaimed())
+      continue;
+    FrontendArgs.emplace_back(A->getAsString(ArgList));
+  }
+  FrontendArgs.push_back("-fsyntax-only");
 }
 
 InstallAPIContext Options::createContext() {
   InstallAPIContext Ctx;
+  Ctx.FM = FM;
+  Ctx.Diags = Diags;
+
   // InstallAPI requires two level namespacing.
   Ctx.BA.TwoLevelNamespace = true;
 
@@ -116,6 +130,21 @@ InstallAPIContext Options::createContext() {
   Ctx.BA.AppExtensionSafe = LinkerOpts.AppExtensionSafe;
   Ctx.FT = DriverOpts.OutFT;
   Ctx.OutputLoc = DriverOpts.OutputPath;
+
+  // Process inputs.
+  for (const std::string &ListPath : DriverOpts.FileLists) {
+    auto Buffer = FM->getBufferForFile(ListPath);
+    if (auto Err = Buffer.getError()) {
+      Diags->Report(diag::err_cannot_open_file) << ListPath;
+      return Ctx;
+    }
+    if (auto Err = FileListReader::loadHeaders(std::move(Buffer.get()),
+                                               Ctx.InputHeaders)) {
+      Diags->Report(diag::err_cannot_open_file) << ListPath;
+      return Ctx;
+    }
+  }
+
   return Ctx;
 }
 
diff --git a/clang/tools/clang-installapi/Options.h b/clang/tools/clang-installapi/Options.h
index 4a84166a6c91b1..f68addf1972880 100644
--- a/clang/tools/clang-installapi/Options.h
+++ b/clang/tools/clang-installapi/Options.h
@@ -43,6 +43,9 @@ struct DriverOptions {
 
   /// \brief File encoding to print.
   llvm::MachO::FileType OutFT = llvm::MachO::FileType::TBD_V5;
+
+  /// \brief Print verbose output.
+  bool Verbose = false;
 };
 
 struct LinkerOptions {
@@ -78,9 +81,14 @@ class Options {
   Options(clang::DiagnosticsEngine &Diag, FileManager *FM,
           llvm::opt::InputArgList &Args);
 
+  /// \brief Get CC1 arguments after extracting out the irrelevant
+  /// ones.
+  std::vector &getClangFrontendArgs() { return FrontendArgs; }
+
 private:
   DiagnosticsEngine *Diags;
   FileManager *FM;
+  std::vector FrontendArgs;
 };
 
 } // namespace installapi
diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index bbb4e8d7c333e4..8a2b138d8d7020 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -771,8 +771,6 @@ mark_as_advanced(COMPILER_RT_ENABLE_INTERNAL_SYMBOLIZER)
 add_subdirectory(lib)
 
 if(COMPILER_RT_INCLUDE_TESTS)
-  add_subdirectory(unittests)
-  add_subdirectory(test)
   # Don't build llvm-lit for runtimes-build, it will clean up map_config.
   if (COMPILER_RT_STANDALONE_BUILD AND NOT LLVM_RUNTIMES_BUILD)
     # If we have a valid source tree, generate llvm-lit into the bin directory.
@@ -782,11 +780,17 @@ if(COMPILER_RT_INCLUDE_TESTS)
       # Needed for lit support in standalone builds.
       include(AddLLVM)
       add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit ${CMAKE_CURRENT_BINARY_DIR}/llvm-lit)
+      # Ensure that the testsuite uses the local lit rather than
+      # LLVM_INSTALL_DIR/bin/llvm-lit (which probably does not exist).
+      get_llvm_lit_path(_base_dir _file_name)
+      set(LLVM_EXTERNAL_LIT "${_base_dir}/${_file_name}" CACHE STRING "Command used to spawn lit" FORCE)
     elseif(NOT EXISTS ${LLVM_EXTERNAL_LIT})
       message(WARNING "Could not find LLVM source directory and LLVM_EXTERNAL_LIT does not"
                        "point to a valid file.  You will not be able to run tests.")
     endif()
   endif()
+  add_subdirectory(unittests)
+  add_subdirectory(test)
 endif()
 
 add_subdirectory(tools)
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 28ded8766f2533..83f7697a4a2b42 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -916,7 +916,7 @@ cmake_dependent_option(COMPILER_RT_BUILD_CRT "Build crtbegin.o/crtend.o" ON "COM
 if (COMPILER_RT_BUILD_CRT)
   add_compiler_rt_component(crt)
 
-  option(COMPILER_RT_CRT_USE_EH_FRAME_REGISTRY "Use eh_frame in crtbegin.o/crtend.o" ON)
+  option(COMPILER_RT_CRT_USE_EH_FRAME_REGISTRY "Use eh_frame in crtbegin.o/crtend.o" OFF)
 
   include(CheckSectionExists)
   check_section_exists(".init_array" COMPILER_RT_HAS_INITFINI_ARRAY
diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c
index 7c5c26f4d113b1..1c451d7ec75637 100644
--- a/compiler-rt/lib/profile/InstrProfilingBuffer.c
+++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c
@@ -61,19 +61,12 @@ uint64_t __llvm_profile_get_size_for_buffer(void) {
       NamesBegin, NamesEnd, VTableBegin, VTableEnd, VNamesBegin, VNamesEnd);
 }
 
+// NOTE: Caller should guarantee that `Begin` and `End` specifies a half-open
+// interval [Begin, End). Namely, `End` is one-byte past the end of the array.
 COMPILER_RT_VISIBILITY
 uint64_t __llvm_profile_get_num_data(const __llvm_profile_data *Begin,
                                      const __llvm_profile_data *End) {
   intptr_t BeginI = (intptr_t)Begin, EndI = (intptr_t)End;
-  // `sizeof(__llvm_profile_data) - 1` is required in the numerator when
-  // [Begin, End] represents an inclusive range.
-  // For ELF, [Begin, End) represents the address of linker-inserted
-  // symbols  `__start__` and `__stop_`.
-  // Thereby, `End` is one byte past the inclusive range, and
-  // `sizeof(__llvm_profile_data) - 1` is not necessary in the numerator to get
-  // the correct number of profile data.
-  // FIXME: Consider removing `sizeof(__llvm_profile_data) - 1` if this is true
-  // across platforms.
   return ((EndI + sizeof(__llvm_profile_data) - 1) - BeginI) /
          sizeof(__llvm_profile_data);
 }
diff --git a/compiler-rt/test/builtins/Unit/ctor_dtor.c b/compiler-rt/test/builtins/Unit/ctor_dtor.c
index 47560722a9f750..3d5f895a0a1cd1 100644
--- a/compiler-rt/test/builtins/Unit/ctor_dtor.c
+++ b/compiler-rt/test/builtins/Unit/ctor_dtor.c
@@ -9,23 +9,13 @@
 
 // Ensure the various startup functions are called in the proper order.
 
-// CHECK: __register_frame_info()
 /// ctor() is here if ld.so/libc supports DT_INIT/DT_FINI
 // CHECK:      main()
 /// dtor() is here if ld.so/libc supports DT_INIT/DT_FINI
-// CHECK:      __deregister_frame_info()
 
 struct object;
 static int counter;
 
-void __register_frame_info(const void *fi, struct object *obj) {
-  printf("__register_frame_info()\n");
-}
-
-void __deregister_frame_info(const void *fi) {
-  printf("__deregister_frame_info()\n");
-}
-
 void __attribute__((constructor)) ctor() {
   printf("ctor()\n");
   ++counter;
diff --git a/compiler-rt/test/profile/instrprof-basic.c b/compiler-rt/test/profile/instrprof-basic.c
index de66e1b2746806..702f521ba4ed8a 100644
--- a/compiler-rt/test/profile/instrprof-basic.c
+++ b/compiler-rt/test/profile/instrprof-basic.c
@@ -1,6 +1,7 @@
 // RUN: %clang_profgen -o %t -O3 %s
 // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
 // RUN: llvm-profdata merge -o %t.profdata %t.profraw
+// RUN: llvm-profdata show --all-functions %t.profdata | FileCheck %s --check-prefix=PROFCNT
 // RUN: %clang_profuse=%t.profdata -o - -S -emit-llvm %s | FileCheck %s --check-prefix=COMMON --check-prefix=ORIG
 //
 // RUN: rm -fr %t.dir1
@@ -8,6 +9,7 @@
 // RUN: env LLVM_PROFILE_FILE=%t.dir1/profraw_e_%1m %run %t
 // RUN: env LLVM_PROFILE_FILE=%t.dir1/profraw_e_%1m %run %t
 // RUN: llvm-profdata merge -o %t.em.profdata %t.dir1
+// RUN: llvm-profdata show --all-functions %t.em.profdata | FileCheck %s --check-prefix=PROFCNT
 // RUN: %clang_profuse=%t.em.profdata -o - -S -emit-llvm %s | FileCheck %s --check-prefix=COMMON --check-prefix=MERGE
 //
 // RUN: rm -fr %t.dir2
@@ -16,6 +18,7 @@
 // RUN: %run %t.merge
 // RUN: %run %t.merge
 // RUN: llvm-profdata merge -o %t.m.profdata %t.dir2/
+// RUN: llvm-profdata show --all-functions %t.m.profdata | FileCheck %s --check-prefix=PROFCNT
 // RUN: %clang_profuse=%t.m.profdata -o - -S -emit-llvm %s | FileCheck %s --check-prefix=COMMON --check-prefix=MERGE
 //
 // Test that merging is enabled by default with -fprofile-generate=
@@ -27,6 +30,7 @@
 // RUN: %run %t.merge3
 // RUN: %run %t.merge3
 // RUN: llvm-profdata merge -o %t.m3.profdata %t.dir3/
+// RUN: llvm-profdata show --all-functions %t.m3.profdata | FileCheck %s --check-prefix=PROFCNT
 // RUN: %clang_profuse=%t.m3.profdata -O0 -o - -S -emit-llvm %s | FileCheck %s --check-prefix=COMMON --check-prefix=PGOMERGE
 //
 // Test that merging is enabled by default with -fprofile-generate
@@ -40,6 +44,7 @@
 // RUN: %run %t.dir4/merge4
 // RUN: rm -f %t.dir4/merge4*
 // RUN: llvm-profdata merge -o %t.m4.profdata ./
+// RUN: llvm-profdata show --all-functions %t.m4.profdata | FileCheck %s --check-prefix=PROFCNT
 // RUN: %clang_profuse=%t.m4.profdata -O0 -o - -S -emit-llvm %s | FileCheck %s --check-prefix=COMMON  --check-prefix=PGOMERGE
 
 /// Test that the merge pool size can be larger than 10.
@@ -49,6 +54,13 @@
 // RUN: not ls %t.dir5/e_%20m.profraw
 // RUN: ls %t.dir5/e_*.profraw | count 1
 
+// Test that all three functions have counters in the profile.
+// PROFCNT-DAG: begin
+// PROFCNT-DAG: end
+// PROFCNT-DAG: main
+// PROFCNT: Functions shown: 3
+// PROFCNT: Total functions: 3
+
 int begin(int i) {
   // COMMON: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD1:[0-9]+]]
   if (i)
diff --git a/flang/include/flang/Common/float128.h b/flang/include/flang/Common/float128.h
index 3443aa06437b04..2e76bc0a162e61 100644
--- a/flang/include/flang/Common/float128.h
+++ b/flang/include/flang/Common/float128.h
@@ -20,6 +20,8 @@
 #ifndef FORTRAN_COMMON_FLOAT128_H_
 #define FORTRAN_COMMON_FLOAT128_H_
 
+#include 
+
 #ifdef __cplusplus
 /*
  * libc++ does not fully support __float128 right now, e.g.
@@ -49,4 +51,25 @@
 #endif /* (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)) && \
           !defined(_LIBCPP_VERSION)  && !defined(__CUDA_ARCH__) */
 
+/* Define pure C CFloat128Type and CFloat128ComplexType. */
+#if LDBL_MANT_DIG == 113
+typedef long double CFloat128Type;
+#ifndef __cplusplus
+typedef long double _Complex CFloat128ComplexType;
+#endif
+#elif HAS_FLOAT128
+typedef __float128 CFloat128Type;
+
+#ifndef __cplusplus
+/*
+ * Use mode() attribute supported by GCC and Clang.
+ * Adjust it for other compilers as needed.
+ */
+#if !defined(_ARCH_PPC) || defined(__LONG_DOUBLE_IEEE128__)
+typedef _Complex float __attribute__((mode(TC))) CFloat128ComplexType;
+#else
+typedef _Complex float __attribute__((mode(KC))) CFloat128ComplexType;
+#endif
+#endif // __cplusplus
+#endif
 #endif /* FORTRAN_COMMON_FLOAT128_H_ */
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index e2af59e0aaa196..32e7a5e2b04061 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -53,6 +53,7 @@ class DerivedTypeSpec;
 
 namespace lower {
 class SymMap;
+struct SymbolBox;
 namespace pft {
 struct Variable;
 }
@@ -299,6 +300,11 @@ class AbstractConverter {
     return loweringOptions;
   }
 
+  /// Find the symbol in one level up of symbol map such as for host-association
+  /// in OpenMP code or return null.
+  virtual Fortran::lower::SymbolBox
+  lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0;
+
 private:
   /// Options controlling lowering behavior.
   const Fortran::lower::LoweringOptions &loweringOptions;
diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index 39821f1036c63f..bd9b67b14b966c 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -688,6 +688,9 @@ fir::BoxValue createBoxValue(fir::FirOpBuilder &builder, mlir::Location loc,
 /// Generate Null BoxProc for procedure pointer null initialization.
 mlir::Value createNullBoxProc(fir::FirOpBuilder &builder, mlir::Location loc,
                               mlir::Type boxType);
+
+/// Set internal linkage attribute on a function.
+void setInternalLinkage(mlir::func::FuncOp);
 } // namespace fir::factory
 
 #endif // FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H
diff --git a/flang/include/flang/Runtime/reduction.h b/flang/include/flang/Runtime/reduction.h
index b91fec0cd26b51..5b607765857523 100644
--- a/flang/include/flang/Runtime/reduction.h
+++ b/flang/include/flang/Runtime/reduction.h
@@ -92,9 +92,11 @@ void RTDECL(CppSumComplex8)(std::complex &, const Descriptor &,
 void RTDECL(CppSumComplex10)(std::complex &, const Descriptor &,
     const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTDECL(CppSumComplex16)(std::complex &, const Descriptor &,
-    const char *source, int line, int dim = 0,
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+void RTDECL(CppSumComplex16)(std::complex &,
+    const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
+#endif
 
 void RTDECL(SumDim)(Descriptor &result, const Descriptor &array, int dim,
     const char *source, int line, const Descriptor *mask = nullptr);
@@ -145,12 +147,16 @@ void RTDECL(CppProductComplex4)(std::complex &, const Descriptor &,
 void RTDECL(CppProductComplex8)(std::complex &, const Descriptor &,
     const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
+#if LDBL_MANT_DIG == 64
 void RTDECL(CppProductComplex10)(std::complex &,
     const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTDECL(CppProductComplex16)(std::complex &,
+#endif
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+void RTDECL(CppProductComplex16)(std::complex &,
     const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
+#endif
 
 void RTDECL(ProductDim)(Descriptor &result, const Descriptor &array, int dim,
     const char *source, int line, const Descriptor *mask = nullptr);
@@ -358,9 +364,12 @@ double RTDECL(Norm2_8)(
 #if LDBL_MANT_DIG == 64
 long double RTDECL(Norm2_10)(
     const Descriptor &, const char *source, int line, int dim = 0);
-#elif LDBL_MANT_DIG == 113
+#endif
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
 long double RTDECL(Norm2_16)(
     const Descriptor &, const char *source, int line, int dim = 0);
+void RTDECL(Norm2DimReal16)(
+    Descriptor &, const Descriptor &, int dim, const char *source, int line);
 #endif
 void RTDECL(Norm2Dim)(
     Descriptor &, const Descriptor &, int dim, const char *source, int line);
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index f865b53f74debd..153ce0623ab3ae 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1000,6 +1000,17 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       if (sym.detailsIf())
         return symMap->lookupSymbol(sym);
 
+      // For symbols to be privatized in OMP, the symbol is mapped to an
+      // instance of `SymbolBox::Intrinsic` (i.e. a direct mapping to an MLIR
+      // SSA value). This MLIR SSA value is the block argument to the
+      // `omp.private`'s `alloc` block. If this is the case, we return this
+      // `SymbolBox::Intrinsic` value.
+      if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym))
+        return v.match(
+            [&](const Fortran::lower::SymbolBox::Intrinsic &)
+                -> Fortran::lower::SymbolBox { return v; },
+            [](const auto &) -> Fortran::lower::SymbolBox { return {}; });
+
       return {};
     }
     if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym))
@@ -1018,7 +1029,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   /// Find the symbol in one level up of symbol map such as for host-association
   /// in OpenMP code or return null.
   Fortran::lower::SymbolBox
-  lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) {
+  lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) override {
     if (Fortran::lower::SymbolBox v = localSymbols.lookupOneLevelUpSymbol(sym))
       return v;
     return {};
@@ -4477,7 +4488,16 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     assert(builder && "FirOpBuilder did not instantiate");
     builder->setFastMathFlags(bridge.getLoweringOptions().getMathOptions());
     builder->setInsertionPointToStart(&func.front());
-    func.setVisibility(mlir::SymbolTable::Visibility::Public);
+    if (funit.parent.isA()) {
+      // Give internal linkage to internal functions. There are no name clash
+      // risks, but giving global linkage to internal procedure will break the
+      // static link register in shared libraries because of the system calls.
+      // Also, it should be possible to eliminate the procedure code if all the
+      // uses have been inlined.
+      fir::factory::setInternalLinkage(func);
+    } else {
+      func.setVisibility(mlir::SymbolTable::Visibility::Public);
+    }
     assert(blockId == 0 && "invalid blockId");
     assert(activeConstructStack.empty() && "invalid construct stack state");
 
diff --git a/flang/lib/Lower/HostAssociations.cpp b/flang/lib/Lower/HostAssociations.cpp
index a62f7a7e99b6ff..b9e13ccad1c929 100644
--- a/flang/lib/Lower/HostAssociations.cpp
+++ b/flang/lib/Lower/HostAssociations.cpp
@@ -247,9 +247,11 @@ class CapturedCharacterScalars
   }
 };
 
-/// Class defining how polymorphic entities are captured in internal procedures.
-/// Polymorphic entities are always boxed as a fir.class box.
-class CapturedPolymorphic : public CapturedSymbols {
+/// Class defining how polymorphic scalar entities are captured in internal
+/// procedures. Polymorphic entities are always boxed as a fir.class box.
+/// Polymorphic array can be handled in CapturedArrays directly
+class CapturedPolymorphicScalar
+    : public CapturedSymbols {
 public:
   static mlir::Type getType(Fortran::lower::AbstractConverter &converter,
                             const Fortran::semantics::Symbol &sym) {
@@ -257,19 +259,50 @@ class CapturedPolymorphic : public CapturedSymbols {
   }
   static void instantiateHostTuple(const InstantiateHostTuple &args,
                                    Fortran::lower::AbstractConverter &converter,
-                                   const Fortran::semantics::Symbol &) {
+                                   const Fortran::semantics::Symbol &sym) {
     fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+    mlir::Location loc = args.loc;
     mlir::Type typeInTuple = fir::dyn_cast_ptrEleTy(args.addrInTuple.getType());
     assert(typeInTuple && "addrInTuple must be an address");
     mlir::Value castBox = builder.createConvert(args.loc, typeInTuple,
                                                 fir::getBase(args.hostValue));
-    builder.create(args.loc, castBox, args.addrInTuple);
+    if (Fortran::semantics::IsOptional(sym)) {
+      auto isPresent =
+          builder.create(loc, builder.getI1Type(), castBox);
+      builder.genIfThenElse(loc, isPresent)
+          .genThen([&]() {
+            builder.create(loc, castBox, args.addrInTuple);
+          })
+          .genElse([&]() {
+            mlir::Value null = fir::factory::createUnallocatedBox(
+                builder, loc, typeInTuple,
+                /*nonDeferredParams=*/mlir::ValueRange{});
+            builder.create(loc, null, args.addrInTuple);
+          })
+          .end();
+    } else {
+      builder.create(loc, castBox, args.addrInTuple);
+    }
   }
   static void getFromTuple(const GetFromTuple &args,
                            Fortran::lower::AbstractConverter &converter,
                            const Fortran::semantics::Symbol &sym,
                            const Fortran::lower::BoxAnalyzer &ba) {
-    bindCapturedSymbol(sym, args.valueInTuple, converter, args.symMap);
+    fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+    mlir::Location loc = args.loc;
+    mlir::Value box = args.valueInTuple;
+    if (Fortran::semantics::IsOptional(sym)) {
+      auto boxTy = box.getType().cast();
+      auto eleTy = boxTy.getEleTy();
+      if (!fir::isa_ref_type(eleTy))
+        eleTy = builder.getRefType(eleTy);
+      auto addr = builder.create(loc, eleTy, box);
+      mlir::Value isPresent = builder.genIsNotNullAddr(loc, addr);
+      auto absentBox = builder.create(loc, boxTy);
+      box =
+          builder.create(loc, isPresent, box, absentBox);
+    }
+    bindCapturedSymbol(sym, box, converter, args.symMap);
   }
 };
 
@@ -342,7 +375,12 @@ class CapturedArrays : public CapturedSymbols {
   static mlir::Type getType(Fortran::lower::AbstractConverter &converter,
                             const Fortran::semantics::Symbol &sym) {
     mlir::Type type = converter.genType(sym);
-    assert(type.isa() && "must be a sequence type");
+    bool isPolymorphic = Fortran::semantics::IsPolymorphic(sym);
+    assert((type.isa() ||
+            (isPolymorphic && type.isa())) &&
+           "must be a sequence type");
+    if (isPolymorphic)
+      return type;
     return fir::BoxType::get(type);
   }
 
@@ -410,13 +448,13 @@ class CapturedArrays : public CapturedSymbols {
                          fir::factory::readBoxValue(builder, loc, boxValue),
                          converter, args.symMap);
     } else {
-      // Keep variable as a fir.box.
+      // Keep variable as a fir.box/fir.class.
       // If this is an optional that is absent, the fir.box needs to be an
       // AbsentOp result, otherwise it will not work properly with IsPresentOp
       // (absent boxes are null descriptor addresses, not descriptors containing
       // a null base address).
       if (Fortran::semantics::IsOptional(sym)) {
-        auto boxTy = box.getType().cast();
+        auto boxTy = box.getType().cast();
         auto eleTy = boxTy.getEleTy();
         if (!fir::isa_ref_type(eleTy))
           eleTy = builder.getRefType(eleTy);
@@ -470,14 +508,10 @@ walkCaptureCategories(T visitor, Fortran::lower::AbstractConverter &converter,
   ba.analyze(sym);
   if (Fortran::semantics::IsAllocatableOrPointer(sym))
     return CapturedAllocatableAndPointer::visit(visitor, converter, sym, ba);
-  if (Fortran::semantics::IsPolymorphic(sym)) {
-    if (ba.isArray() && !ba.lboundIsAllOnes())
-      TODO(converter.genLocation(sym.name()),
-           "polymorphic array with non default lower bound");
-    return CapturedPolymorphic::visit(visitor, converter, sym, ba);
-  }
   if (ba.isArray())
     return CapturedArrays::visit(visitor, converter, sym, ba);
+  if (Fortran::semantics::IsPolymorphic(sym))
+    return CapturedPolymorphicScalar::visit(visitor, converter, sym, ba);
   if (ba.isChar())
     return CapturedCharacterScalars::visit(visitor, converter, sym, ba);
   assert(ba.isTrivial() && "must be trivial scalar");
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 136bda0b582ee3..717b8cc0276a30 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -14,6 +14,7 @@
 
 #include "Utils.h"
 #include "flang/Lower/PFTBuilder.h"
+#include "flang/Lower/SymbolMap.h"
 #include "flang/Optimizer/Builder/Todo.h"
 #include "flang/Semantics/tools.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
@@ -66,9 +67,10 @@ void DataSharingProcessor::cloneSymbol(const Fortran::semantics::Symbol *sym) {
 }
 
 void DataSharingProcessor::copyFirstPrivateSymbol(
-    const Fortran::semantics::Symbol *sym) {
+    const Fortran::semantics::Symbol *sym,
+    mlir::OpBuilder::InsertPoint *copyAssignIP) {
   if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate))
-    converter.copyHostAssociateVar(*sym);
+    converter.copyHostAssociateVar(*sym, copyAssignIP);
 }
 
 void DataSharingProcessor::copyLastPrivateSymbol(
@@ -307,14 +309,10 @@ void DataSharingProcessor::privatize() {
   for (const Fortran::semantics::Symbol *sym : privatizedSymbols) {
     if (const auto *commonDet =
             sym->detailsIf()) {
-      for (const auto &mem : commonDet->objects()) {
-        cloneSymbol(&*mem);
-        copyFirstPrivateSymbol(&*mem);
-      }
-    } else {
-      cloneSymbol(sym);
-      copyFirstPrivateSymbol(sym);
-    }
+      for (const auto &mem : commonDet->objects())
+        doPrivatize(&*mem);
+    } else
+      doPrivatize(sym);
   }
 }
 
@@ -338,11 +336,95 @@ void DataSharingProcessor::defaultPrivatize() {
         !sym->GetUltimate().has() &&
         !symbolsInNestedRegions.contains(sym) &&
         !symbolsInParentRegions.contains(sym) &&
-        !privatizedSymbols.contains(sym)) {
+        !privatizedSymbols.contains(sym))
+      doPrivatize(sym);
+  }
+}
+
+void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
+  if (!useDelayedPrivatization) {
+    cloneSymbol(sym);
+    copyFirstPrivateSymbol(sym);
+    return;
+  }
+
+  Fortran::lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
+  assert(hsb && "Host symbol box not found");
+
+  mlir::Type symType = hsb.getAddr().getType();
+  mlir::Location symLoc = hsb.getAddr().getLoc();
+  std::string privatizerName = sym->name().ToString() + ".privatizer";
+  bool isFirstPrivate =
+      sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate);
+
+  mlir::omp::PrivateClauseOp privatizerOp = [&]() {
+    auto moduleOp = firOpBuilder.getModule();
+    auto uniquePrivatizerName = fir::getTypeAsString(
+        symType, converter.getKindMap(),
+        converter.mangleName(*sym) +
+            (isFirstPrivate ? "_firstprivate" : "_private"));
+
+    if (auto existingPrivatizer =
+            moduleOp.lookupSymbol(
+                uniquePrivatizerName))
+      return existingPrivatizer;
+
+    auto ip = firOpBuilder.saveInsertionPoint();
+    firOpBuilder.setInsertionPoint(&moduleOp.getBodyRegion().front(),
+                                   moduleOp.getBodyRegion().front().begin());
+    auto result = firOpBuilder.create(
+        symLoc, uniquePrivatizerName, symType,
+        isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate
+                       : mlir::omp::DataSharingClauseType::Private);
+
+    symTable->pushScope();
+
+    // Populate the `alloc` region.
+    {
+      mlir::Region &allocRegion = result.getAllocRegion();
+      mlir::Block *allocEntryBlock = firOpBuilder.createBlock(
+          &allocRegion, /*insertPt=*/{}, symType, symLoc);
+
+      firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
+      symTable->addSymbol(*sym, allocRegion.getArgument(0));
+      symTable->pushScope();
       cloneSymbol(sym);
-      copyFirstPrivateSymbol(sym);
+      firOpBuilder.create(
+          hsb.getAddr().getLoc(),
+          symTable->shallowLookupSymbol(*sym).getAddr());
+      symTable->popScope();
     }
-  }
+
+    // Populate the `copy` region if this is a `firstprivate`.
+    if (isFirstPrivate) {
+      mlir::Region ©Region = result.getCopyRegion();
+      // First block argument corresponding to the original/host value while
+      // second block argument corresponding to the privatized value.
+      mlir::Block *copyEntryBlock = firOpBuilder.createBlock(
+          ©Region, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc});
+      firOpBuilder.setInsertionPointToEnd(copyEntryBlock);
+      symTable->addSymbol(*sym, copyRegion.getArgument(0),
+                          /*force=*/true);
+      symTable->pushScope();
+      symTable->addSymbol(*sym, copyRegion.getArgument(1));
+      auto ip = firOpBuilder.saveInsertionPoint();
+      copyFirstPrivateSymbol(sym, &ip);
+
+      firOpBuilder.create(
+          hsb.getAddr().getLoc(),
+          symTable->shallowLookupSymbol(*sym).getAddr());
+      symTable->popScope();
+    }
+
+    symTable->popScope();
+    firOpBuilder.restoreInsertionPoint(ip);
+    return result;
+  }();
+
+  delayedPrivatizationInfo.privatizers.push_back(
+      mlir::SymbolRefAttr::get(privatizerOp));
+  delayedPrivatizationInfo.originalAddresses.push_back(hsb.getAddr());
+  delayedPrivatizationInfo.symbols.push_back(sym);
 }
 
 } // namespace omp
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 10c0a30c09c391..9f7301df07598f 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -23,6 +23,24 @@ namespace lower {
 namespace omp {
 
 class DataSharingProcessor {
+public:
+  /// Collects all the information needed for delayed privatization. This can be
+  /// used by ops with data-sharing clauses to properly generate their regions
+  /// (e.g. add region arguments) and map the original SSA values to their
+  /// corresponding OMP region operands.
+  struct DelayedPrivatizationInfo {
+    // The list of symbols referring to delayed privatizer ops (i.e.
+    // `omp.private` ops).
+    llvm::SmallVector privatizers;
+    // SSA values that correspond to "original" values being privatized.
+    // "Original" here means the SSA value outside the OpenMP region from which
+    // a clone is created inside the region.
+    llvm::SmallVector originalAddresses;
+    // Fortran symbols corresponding to the above SSA values.
+    llvm::SmallVector symbols;
+  };
+
+private:
   bool hasLastPrivateOp;
   mlir::OpBuilder::InsertPoint lastPrivIP;
   mlir::OpBuilder::InsertPoint insPt;
@@ -36,6 +54,9 @@ class DataSharingProcessor {
   fir::FirOpBuilder &firOpBuilder;
   const Fortran::parser::OmpClauseList &opClauseList;
   Fortran::lower::pft::Evaluation &eval;
+  bool useDelayedPrivatization;
+  Fortran::lower::SymMap *symTable;
+  DelayedPrivatizationInfo delayedPrivatizationInfo;
 
   bool needBarrier();
   void collectSymbols(Fortran::semantics::Symbol::Flag flag);
@@ -47,10 +68,13 @@ class DataSharingProcessor {
   void collectDefaultSymbols();
   void privatize();
   void defaultPrivatize();
+  void doPrivatize(const Fortran::semantics::Symbol *sym);
   void copyLastPrivatize(mlir::Operation *op);
   void insertLastPrivateCompare(mlir::Operation *op);
   void cloneSymbol(const Fortran::semantics::Symbol *sym);
-  void copyFirstPrivateSymbol(const Fortran::semantics::Symbol *sym);
+  void
+  copyFirstPrivateSymbol(const Fortran::semantics::Symbol *sym,
+                         mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr);
   void copyLastPrivateSymbol(const Fortran::semantics::Symbol *sym,
                              mlir::OpBuilder::InsertPoint *lastPrivIP);
   void insertDeallocs();
@@ -58,10 +82,14 @@ class DataSharingProcessor {
 public:
   DataSharingProcessor(Fortran::lower::AbstractConverter &converter,
                        const Fortran::parser::OmpClauseList &opClauseList,
-                       Fortran::lower::pft::Evaluation &eval)
+                       Fortran::lower::pft::Evaluation &eval,
+                       bool useDelayedPrivatization = false,
+                       Fortran::lower::SymMap *symTable = nullptr)
       : hasLastPrivateOp(false), converter(converter),
         firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList),
-        eval(eval) {}
+        eval(eval), useDelayedPrivatization(useDelayedPrivatization),
+        symTable(symTable) {}
+
   // Privatisation is split into two steps.
   // Step1 performs cloning of all privatisation clauses and copying for
   // firstprivates. Step1 is performed at the place where process/processStep1
@@ -80,6 +108,10 @@ class DataSharingProcessor {
     assert(!loopIV && "Loop iteration variable already set");
     loopIV = iv;
   }
+
+  const DelayedPrivatizationInfo &getDelayedPrivatizationInfo() const {
+    return delayedPrivatizationInfo;
+  }
 };
 
 } // namespace omp
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 7953bf83cba0fe..21ad51c53d8742 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -558,6 +558,7 @@ genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::ParallelOp
 genParallelOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, bool genNested,
               mlir::Location currentLocation,
@@ -590,8 +591,8 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
   auto reductionCallback = [&](mlir::Operation *op) {
     llvm::SmallVector locs(reductionVars.size(),
                                            currentLocation);
-    auto block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {},
-                                                         reductionTypes, locs);
+    auto *block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {},
+                                                          reductionTypes, locs);
     for (auto [arg, prv] :
          llvm::zip_equal(reductionSymbols, block->getArguments())) {
       converter.bindSymbol(*arg, prv);
@@ -599,13 +600,78 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
     return reductionSymbols;
   };
 
-  return genOpWithBody(
+  OpWithBodyGenInfo genInfo =
       OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
           .setGenNested(genNested)
           .setOuterCombined(outerCombined)
           .setClauses(&clauseList)
           .setReductions(&reductionSymbols, &reductionTypes)
-          .setGenRegionEntryCb(reductionCallback),
+          .setGenRegionEntryCb(reductionCallback);
+
+  if (!enableDelayedPrivatization) {
+    return genOpWithBody(
+        genInfo,
+        /*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
+        numThreadsClauseOperand, allocateOperands, allocatorOperands,
+        reductionVars,
+        reductionDeclSymbols.empty()
+            ? nullptr
+            : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
+                                   reductionDeclSymbols),
+        procBindKindAttr, /*private_vars=*/llvm::SmallVector{},
+        /*privatizers=*/nullptr);
+  }
+
+  bool privatize = !outerCombined;
+  DataSharingProcessor dsp(converter, clauseList, eval,
+                           /*useDelayedPrivatization=*/true, &symTable);
+
+  if (privatize)
+    dsp.processStep1();
+
+  const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo();
+
+  auto genRegionEntryCB = [&](mlir::Operation *op) {
+    auto parallelOp = llvm::cast(op);
+
+    llvm::SmallVector reductionLocs(reductionVars.size(),
+                                                    currentLocation);
+
+    mlir::OperandRange privateVars = parallelOp.getPrivateVars();
+    mlir::Region ®ion = parallelOp.getRegion();
+
+    llvm::SmallVector privateVarTypes = reductionTypes;
+    privateVarTypes.reserve(privateVarTypes.size() + privateVars.size());
+    llvm::transform(privateVars, std::back_inserter(privateVarTypes),
+                    [](mlir::Value v) { return v.getType(); });
+
+    llvm::SmallVector privateVarLocs = reductionLocs;
+    privateVarLocs.reserve(privateVarLocs.size() + privateVars.size());
+    llvm::transform(privateVars, std::back_inserter(privateVarLocs),
+                    [](mlir::Value v) { return v.getLoc(); });
+
+    converter.getFirOpBuilder().createBlock(®ion, /*insertPt=*/{},
+                                            privateVarTypes, privateVarLocs);
+
+    llvm::SmallVector allSymbols =
+        reductionSymbols;
+    allSymbols.append(delayedPrivatizationInfo.symbols);
+    for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
+      converter.bindSymbol(*arg, prv);
+    }
+
+    return allSymbols;
+  };
+
+  // TODO Merge with the reduction CB.
+  genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp);
+
+  llvm::SmallVector privatizers(
+      delayedPrivatizationInfo.privatizers.begin(),
+      delayedPrivatizationInfo.privatizers.end());
+
+  return genOpWithBody(
+      genInfo,
       /*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
       numThreadsClauseOperand, allocateOperands, allocatorOperands,
       reductionVars,
@@ -613,8 +679,11 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
           ? nullptr
           : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
                                  reductionDeclSymbols),
-      procBindKindAttr, /*private_vars=*/llvm::SmallVector{},
-      /*privatizers=*/nullptr);
+      procBindKindAttr, delayedPrivatizationInfo.originalAddresses,
+      delayedPrivatizationInfo.privatizers.empty()
+          ? nullptr
+          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
+                                 privatizers));
 }
 
 static mlir::omp::SectionOp
@@ -1621,7 +1690,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
     if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet)
             .test(ompDirective)) {
       validDirective = true;
-      genParallelOp(converter, semaCtx, eval, /*genNested=*/false,
+      genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
                     currentLocation, loopOpClauseList,
                     /*outerCombined=*/true);
     }
@@ -1711,8 +1780,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
                        currentLocation);
     break;
   case llvm::omp::Directive::OMPD_parallel:
-    genParallelOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
-                  beginClauseList);
+    genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
+                  currentLocation, beginClauseList);
     break;
   case llvm::omp::Directive::OMPD_single:
     genSingleOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
@@ -1769,7 +1838,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
           .test(directive.v)) {
     bool outerCombined =
         directive.v != llvm::omp::Directive::OMPD_target_parallel;
-    genParallelOp(converter, semaCtx, eval, /*genNested=*/false,
+    genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
                   currentLocation, beginClauseList, outerCombined);
     combinedDirective = true;
   }
@@ -1852,7 +1921,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 
   // Parallel wrapper of PARALLEL SECTIONS construct
   if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
-    genParallelOp(converter, semaCtx, eval,
+    genParallelOp(converter, symTable, semaCtx, eval,
                   /*genNested=*/false, currentLocation, sectionsClauseList,
                   /*outerCombined=*/true);
   } else {
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 31b15257d18687..49517f62895dfe 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -24,6 +24,12 @@ llvm::cl::opt treatIndexAsSection(
     llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."),
     llvm::cl::init(true));
 
+llvm::cl::opt enableDelayedPrivatization(
+    "openmp-enable-delayed-privatization",
+    llvm::cl::desc(
+        "Emit `[first]private` variables as clauses on the MLIR ops."),
+    llvm::cl::init(false));
+
 namespace Fortran {
 namespace lower {
 namespace omp {
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index c346f891f0797e..f57cd7420ce4d5 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -15,6 +15,7 @@
 #include "llvm/Support/CommandLine.h"
 
 extern llvm::cl::opt treatIndexAsSection;
+extern llvm::cl::opt enableDelayedPrivatization;
 
 namespace fir {
 class FirOpBuilder;
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 3cce39f5b8c782..788c99e40105a2 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -18,6 +18,7 @@
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Optimizer/Support/FatalError.h"
 #include "flang/Optimizer/Support/InternalNames.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -1533,3 +1534,10 @@ mlir::Value fir::factory::createNullBoxProc(fir::FirOpBuilder &builder,
   mlir::Value initVal{builder.create(loc, boxEleTy)};
   return builder.create(loc, boxTy, initVal);
 }
+
+void fir::factory::setInternalLinkage(mlir::func::FuncOp func) {
+  auto internalLinkage = mlir::LLVM::linkage::Linkage::Internal;
+  auto linkage =
+      mlir::LLVM::LinkageAttr::get(func->getContext(), internalLinkage);
+  func->setAttr("llvm.linkage", linkage);
+}
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index c84fb27cb38da5..837ef0576ef696 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -1834,10 +1834,7 @@ mlir::func::FuncOp IntrinsicLibrary::getWrapper(GeneratorType generator,
     // First time this wrapper is needed, build it.
     function = builder.createFunction(loc, wrapperName, funcType);
     function->setAttr("fir.intrinsic", builder.getUnitAttr());
-    auto internalLinkage = mlir::LLVM::linkage::Linkage::Internal;
-    auto linkage =
-        mlir::LLVM::LinkageAttr::get(builder.getContext(), internalLinkage);
-    function->setAttr("llvm.linkage", linkage);
+    fir::factory::setInternalLinkage(function);
     function.addEntryBlock();
 
     // Create local context to emit code into the newly created function
diff --git a/flang/lib/Optimizer/Builder/MutableBox.cpp b/flang/lib/Optimizer/Builder/MutableBox.cpp
index 4d8860b60915c4..d4012e9c3d9d93 100644
--- a/flang/lib/Optimizer/Builder/MutableBox.cpp
+++ b/flang/lib/Optimizer/Builder/MutableBox.cpp
@@ -674,7 +674,7 @@ void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
     // 7.3.2.3 point 7. The dynamic type of a disassociated pointer is the
     // same as its declared type.
     auto boxTy = box.getBoxTy().dyn_cast();
-    auto eleTy = fir::dyn_cast_ptrOrBoxEleTy(boxTy.getEleTy());
+    auto eleTy = fir::unwrapPassByRefType(boxTy.getEleTy());
     mlir::Type derivedType = fir::getDerivedType(eleTy);
     if (auto recTy = derivedType.dyn_cast()) {
       fir::runtime::genNullifyDerivedType(builder, loc, box.getAddr(), recTy,
diff --git a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
index fabbff818b6f0e..66fbaddcbda1aa 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
@@ -149,6 +149,22 @@ struct ForcedNorm2Real16 {
   }
 };
 
+/// Placeholder for real*16 version of Norm2Dim Intrinsic
+struct ForcedNorm2DimReal16 {
+  static constexpr const char *name = ExpandAndQuoteKey(RTNAME(Norm2DimReal16));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto boxTy =
+          fir::runtime::getModel()(ctx);
+      auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+      auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+      return mlir::FunctionType::get(
+          ctx, {fir::ReferenceType::get(boxTy), boxTy, intTy, strTy, intTy},
+          mlir::NoneType::get(ctx));
+    };
+  }
+};
+
 /// Placeholder for real*10 version of Product Intrinsic
 struct ForcedProductReal10 {
   static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ProductReal10));
@@ -876,7 +892,14 @@ mlir::Value fir::runtime::genMinval(fir::FirOpBuilder &builder,
 void fir::runtime::genNorm2Dim(fir::FirOpBuilder &builder, mlir::Location loc,
                                mlir::Value resultBox, mlir::Value arrayBox,
                                mlir::Value dim) {
-  auto func = fir::runtime::getRuntimeFunc(loc, builder);
+  mlir::func::FuncOp func;
+  auto ty = arrayBox.getType();
+  auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty);
+  auto eleTy = arrTy.cast().getEleTy();
+  if (eleTy.isF128())
+    func = fir::runtime::getRuntimeFunc(loc, builder);
+  else
+    func = fir::runtime::getRuntimeFunc(loc, builder);
   auto fTy = func.getFunctionType();
   auto sourceFile = fir::factory::locationToFilename(builder, loc);
   auto sourceLine =
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
index 314e4264c17e83..377cc44392028f 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
@@ -176,7 +176,14 @@ class HlfirIntrinsicConversion : public mlir::OpRewritePattern {
           rewriter.eraseOp(use);
       }
     }
-    rewriter.replaceAllUsesWith(op->getResults(), {base});
+    // TODO: This entire pass should be a greedy pattern rewrite or a manual
+    // IR traversal. A dialect conversion cannot be used here because
+    // `replaceAllUsesWith` is not supported. Similarly, `replaceOp` is not
+    // suitable because "op->getResult(0)" and "base" can have different types.
+    // In such a case, the dialect conversion will attempt to convert the type,
+    // but no type converter is specified in this pass. Also note that all
+    // patterns in this pass are actually rewrite patterns.
+    op->getResult(0).replaceAllUsesWith(base);
     rewriter.replaceOp(op, base);
   }
 };
diff --git a/flang/runtime/Float128Math/CMakeLists.txt b/flang/runtime/Float128Math/CMakeLists.txt
index 8d276e8f122728..f11678cd70b769 100644
--- a/flang/runtime/Float128Math/CMakeLists.txt
+++ b/flang/runtime/Float128Math/CMakeLists.txt
@@ -69,6 +69,7 @@ set(sources
   log.cpp
   log10.cpp
   lround.cpp
+  norm2.cpp
   pow.cpp
   round.cpp
   sin.cpp
diff --git a/flang/runtime/Float128Math/cabs.cpp b/flang/runtime/Float128Math/cabs.cpp
index 63f2bdf8e177ae..827b197a6a81ae 100644
--- a/flang/runtime/Float128Math/cabs.cpp
+++ b/flang/runtime/Float128Math/cabs.cpp
@@ -10,15 +10,15 @@
 
 namespace Fortran::runtime {
 extern "C" {
-
+#if 0
+// FIXME: temporarily disabled. Need to add pure C entry point
+// using C _Complex ABI.
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-// FIXME: the argument should be CppTypeFor,
-// and it should be translated into the underlying library's
-// corresponding complex128 type.
-CppTypeFor RTDEF(CAbsF128)(ComplexF128 x) {
+// NOTE: Flang calls the runtime APIs using C _Complex ABI
+CppTypeFor RTDEF(CAbsF128)(CFloat128ComplexType x) {
   return CAbs::invoke(x);
 }
 #endif
-
+#endif
 } // extern "C"
 } // namespace Fortran::runtime
diff --git a/flang/runtime/Float128Math/math-entries.h b/flang/runtime/Float128Math/math-entries.h
index fe1525468edcaf..a0d81d0cbb5407 100644
--- a/flang/runtime/Float128Math/math-entries.h
+++ b/flang/runtime/Float128Math/math-entries.h
@@ -54,6 +54,7 @@ namespace Fortran::runtime {
   };
 
 // Define fallback callers.
+DEFINE_FALLBACK(Abs)
 DEFINE_FALLBACK(Acos)
 DEFINE_FALLBACK(Acosh)
 DEFINE_FALLBACK(Asin)
@@ -61,7 +62,6 @@ DEFINE_FALLBACK(Asinh)
 DEFINE_FALLBACK(Atan)
 DEFINE_FALLBACK(Atan2)
 DEFINE_FALLBACK(Atanh)
-DEFINE_FALLBACK(CAbs)
 DEFINE_FALLBACK(Ceil)
 DEFINE_FALLBACK(Cos)
 DEFINE_FALLBACK(Cosh)
@@ -91,15 +91,6 @@ DEFINE_FALLBACK(Y0)
 DEFINE_FALLBACK(Y1)
 DEFINE_FALLBACK(Yn)
 
-// Define ComplexF128 type that is compatible with
-// the type of results/arguments of libquadmath.
-// TODO: this may need more work for other libraries/compilers.
-#if !defined(_ARCH_PPC) || defined(__LONG_DOUBLE_IEEE128__)
-typedef _Complex float __attribute__((mode(TC))) ComplexF128;
-#else
-typedef _Complex float __attribute__((mode(KC))) ComplexF128;
-#endif
-
 #if HAS_LIBM
 // Define wrapper callers for libm.
 #include 
@@ -109,6 +100,7 @@ typedef _Complex float __attribute__((mode(KC))) ComplexF128;
 // Use STD math functions. They provide IEEE-754 128-bit float
 // support either via 'long double' or __float128.
 // The Bessel's functions are not present in STD namespace.
+DEFINE_SIMPLE_ALIAS(Abs, std::abs)
 DEFINE_SIMPLE_ALIAS(Acos, std::acos)
 DEFINE_SIMPLE_ALIAS(Acosh, std::acosh)
 DEFINE_SIMPLE_ALIAS(Asin, std::asin)
@@ -165,6 +157,7 @@ DEFINE_SIMPLE_ALIAS(Yn, ynl)
 #elif HAS_QUADMATHLIB
 // Define wrapper callers for libquadmath.
 #include "quadmath.h"
+DEFINE_SIMPLE_ALIAS(Abs, fabsq)
 DEFINE_SIMPLE_ALIAS(Acos, acosq)
 DEFINE_SIMPLE_ALIAS(Acosh, acoshq)
 DEFINE_SIMPLE_ALIAS(Asin, asinq)
@@ -172,7 +165,6 @@ DEFINE_SIMPLE_ALIAS(Asinh, asinhq)
 DEFINE_SIMPLE_ALIAS(Atan, atanq)
 DEFINE_SIMPLE_ALIAS(Atan2, atan2q)
 DEFINE_SIMPLE_ALIAS(Atanh, atanhq)
-DEFINE_SIMPLE_ALIAS(CAbs, cabsq)
 DEFINE_SIMPLE_ALIAS(Ceil, ceilq)
 DEFINE_SIMPLE_ALIAS(Cos, cosq)
 DEFINE_SIMPLE_ALIAS(Cosh, coshq)
@@ -202,6 +194,19 @@ DEFINE_SIMPLE_ALIAS(Y0, y0q)
 DEFINE_SIMPLE_ALIAS(Y1, y1q)
 DEFINE_SIMPLE_ALIAS(Yn, ynq)
 #endif
+
+extern "C" {
+// Declarations of the entry points that might be referenced
+// within the Float128Math library itself.
+// Note that not all of these entry points are actually
+// defined in this library. Some of them are used just
+// as template parameters to call the corresponding callee directly.
+CppTypeFor RTDECL(AbsF128)(
+    CppTypeFor x);
+CppTypeFor RTDECL(SqrtF128)(
+    CppTypeFor x);
+} // extern "C"
+
 } // namespace Fortran::runtime
 
 #endif // FORTRAN_RUNTIME_FLOAT128MATH_MATH_ENTRIES_H_
diff --git a/flang/runtime/Float128Math/norm2.cpp b/flang/runtime/Float128Math/norm2.cpp
new file mode 100644
index 00000000000000..17453bd2d6cbd7
--- /dev/null
+++ b/flang/runtime/Float128Math/norm2.cpp
@@ -0,0 +1,59 @@
+//===-- runtime/Float128Math/norm2.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "math-entries.h"
+#include "reduction-templates.h"
+#include 
+
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+
+namespace {
+using namespace Fortran::runtime;
+
+using AccumType = Norm2AccumType<16>;
+
+struct ABSTy {
+  static AccumType compute(AccumType x) {
+    return Sqrt::invoke(x);
+  }
+};
+
+struct SQRTTy {
+  static AccumType compute(AccumType x) {
+    return Sqrt::invoke(x);
+  }
+};
+
+using Float128Norm2Accumulator = Norm2Accumulator<16, ABSTy, SQRTTy>;
+} // namespace
+
+namespace Fortran::runtime {
+extern "C" {
+
+CppTypeFor RTDEF(Norm2_16)(
+    const Descriptor &x, const char *source, int line, int dim) {
+  auto accumulator{::Float128Norm2Accumulator(x)};
+  return GetTotalReduction(
+      x, source, line, dim, nullptr, accumulator, "NORM2");
+}
+
+void RTDEF(Norm2DimReal16)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line) {
+  Terminator terminator{source, line};
+  auto type{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, type);
+  RUNTIME_CHECK(
+      terminator, type->first == TypeCategory::Real && type->second == 16);
+  DoMaxMinNorm2(
+      result, x, dim, nullptr, "NORM2", terminator);
+}
+
+} // extern "C"
+} // namespace Fortran::runtime
+
+#endif
diff --git a/flang/runtime/complex-reduction.c b/flang/runtime/complex-reduction.c
index d77e1c0a550069..72c31ce08b875a 100644
--- a/flang/runtime/complex-reduction.c
+++ b/flang/runtime/complex-reduction.c
@@ -19,6 +19,11 @@ struct CppComplexDouble {
 struct CppComplexLongDouble {
   long double r, i;
 };
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+struct CppComplexFloat128 {
+  CFloat128Type r, i;
+};
+#endif
 
 /* Not all environments define CMPLXF, CMPLX, CMPLXL. */
 
@@ -70,6 +75,29 @@ static long_double_Complex_t CMPLXL(long double r, long double i) {
 #endif
 #endif
 
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+#ifndef CMPLXF128
+/*
+ * GCC 7.4.0 (currently minimum GCC version for llvm builds)
+ * supports __builtin_complex. For Clang, require >=12.0.
+ * Otherwise, rely on the memory layout compatibility.
+ */
+#if (defined(__clang_major__) && (__clang_major__ >= 12)) || defined(__GNUC__)
+#define CMPLXF128 __builtin_complex
+#else
+static CFloat128ComplexType CMPLXF128(CFloat128Type r, CFloat128Type i) {
+  union {
+    struct CppComplexFloat128 x;
+    CFloat128ComplexType result;
+  } u;
+  u.x.r = r;
+  u.x.i = i;
+  return u.result;
+}
+#endif
+#endif
+#endif
+
 /* RTNAME(SumComplex4) calls RTNAME(CppSumComplex4) with the same arguments
  * and converts the members of its C++ complex result to C _Complex.
  */
@@ -93,9 +121,10 @@ ADAPT_REDUCTION(SumComplex8, double_Complex_t, CppComplexDouble, CMPLX,
 #if LDBL_MANT_DIG == 64
 ADAPT_REDUCTION(SumComplex10, long_double_Complex_t, CppComplexLongDouble,
     CMPLXL, REDUCTION_ARGS, REDUCTION_ARG_NAMES)
-#elif LDBL_MANT_DIG == 113
-ADAPT_REDUCTION(SumComplex16, long_double_Complex_t, CppComplexLongDouble,
-    CMPLXL, REDUCTION_ARGS, REDUCTION_ARG_NAMES)
+#endif
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+ADAPT_REDUCTION(SumComplex16, CFloat128ComplexType, CppComplexFloat128,
+    CMPLXF128, REDUCTION_ARGS, REDUCTION_ARG_NAMES)
 #endif
 
 /* PRODUCT() */
@@ -106,9 +135,10 @@ ADAPT_REDUCTION(ProductComplex8, double_Complex_t, CppComplexDouble, CMPLX,
 #if LDBL_MANT_DIG == 64
 ADAPT_REDUCTION(ProductComplex10, long_double_Complex_t, CppComplexLongDouble,
     CMPLXL, REDUCTION_ARGS, REDUCTION_ARG_NAMES)
-#elif LDBL_MANT_DIG == 113
-ADAPT_REDUCTION(ProductComplex16, long_double_Complex_t, CppComplexLongDouble,
-    CMPLXL, REDUCTION_ARGS, REDUCTION_ARG_NAMES)
+#endif
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+ADAPT_REDUCTION(ProductComplex16, CFloat128ComplexType, CppComplexFloat128,
+    CMPLXF128, REDUCTION_ARGS, REDUCTION_ARG_NAMES)
 #endif
 
 /* DOT_PRODUCT() */
@@ -119,7 +149,8 @@ ADAPT_REDUCTION(DotProductComplex8, double_Complex_t, CppComplexDouble, CMPLX,
 #if LDBL_MANT_DIG == 64
 ADAPT_REDUCTION(DotProductComplex10, long_double_Complex_t,
     CppComplexLongDouble, CMPLXL, DOT_PRODUCT_ARGS, DOT_PRODUCT_ARG_NAMES)
-#elif LDBL_MANT_DIG == 113
-ADAPT_REDUCTION(DotProductComplex16, long_double_Complex_t,
-    CppComplexLongDouble, CMPLXL, DOT_PRODUCT_ARGS, DOT_PRODUCT_ARG_NAMES)
+#endif
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+ADAPT_REDUCTION(DotProductComplex16, CFloat128ComplexType, CppComplexFloat128,
+    CMPLXF128, DOT_PRODUCT_ARGS, DOT_PRODUCT_ARG_NAMES)
 #endif
diff --git a/flang/runtime/complex-reduction.h b/flang/runtime/complex-reduction.h
index 5c4f1f5126e393..1d37b235d5194b 100644
--- a/flang/runtime/complex-reduction.h
+++ b/flang/runtime/complex-reduction.h
@@ -15,6 +15,7 @@
 #ifndef FORTRAN_RUNTIME_COMPLEX_REDUCTION_H_
 #define FORTRAN_RUNTIME_COMPLEX_REDUCTION_H_
 
+#include "flang/Common/float128.h"
 #include "flang/Runtime/entry-names.h"
 #include 
 
@@ -40,14 +41,18 @@ float_Complex_t RTNAME(SumComplex3)(REDUCTION_ARGS);
 float_Complex_t RTNAME(SumComplex4)(REDUCTION_ARGS);
 double_Complex_t RTNAME(SumComplex8)(REDUCTION_ARGS);
 long_double_Complex_t RTNAME(SumComplex10)(REDUCTION_ARGS);
-long_double_Complex_t RTNAME(SumComplex16)(REDUCTION_ARGS);
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+CFloat128ComplexType RTNAME(SumComplex16)(REDUCTION_ARGS);
+#endif
 
 float_Complex_t RTNAME(ProductComplex2)(REDUCTION_ARGS);
 float_Complex_t RTNAME(ProductComplex3)(REDUCTION_ARGS);
 float_Complex_t RTNAME(ProductComplex4)(REDUCTION_ARGS);
 double_Complex_t RTNAME(ProductComplex8)(REDUCTION_ARGS);
 long_double_Complex_t RTNAME(ProductComplex10)(REDUCTION_ARGS);
-long_double_Complex_t RTNAME(ProductComplex16)(REDUCTION_ARGS);
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+CFloat128ComplexType RTNAME(ProductComplex16)(REDUCTION_ARGS);
+#endif
 
 #define DOT_PRODUCT_ARGS \
   const struct CppDescriptor *x, const struct CppDescriptor *y, \
@@ -60,6 +65,8 @@ float_Complex_t RTNAME(DotProductComplex3)(DOT_PRODUCT_ARGS);
 float_Complex_t RTNAME(DotProductComplex4)(DOT_PRODUCT_ARGS);
 double_Complex_t RTNAME(DotProductComplex8)(DOT_PRODUCT_ARGS);
 long_double_Complex_t RTNAME(DotProductComplex10)(DOT_PRODUCT_ARGS);
-long_double_Complex_t RTNAME(DotProductComplex16)(DOT_PRODUCT_ARGS);
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+CFloat128ComplexType RTNAME(DotProductComplex16)(DOT_PRODUCT_ARGS);
+#endif
 
 #endif // FORTRAN_RUNTIME_COMPLEX_REDUCTION_H_
diff --git a/flang/runtime/extrema.cpp b/flang/runtime/extrema.cpp
index 3fdc8e159866d1..fc2b4e165cb269 100644
--- a/flang/runtime/extrema.cpp
+++ b/flang/runtime/extrema.cpp
@@ -528,35 +528,6 @@ inline RT_API_ATTRS CppTypeFor TotalNumericMaxOrMin(
       NumericExtremumAccumulator{x}, intrinsic);
 }
 
-template 
-static RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
-    int dim, const Descriptor *mask, const char *intrinsic,
-    Terminator &terminator) {
-  using Type = CppTypeFor;
-  ACCUMULATOR accumulator{x};
-  if (dim == 0 || x.rank() == 1) {
-    // Total reduction
-
-    // Element size of the destination descriptor is the same
-    // as the element size of the source.
-    result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
-        CFI_attribute_allocatable);
-    if (int stat{result.Allocate()}) {
-      terminator.Crash(
-          "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
-    }
-    DoTotalReduction(x, dim, mask, accumulator, intrinsic, terminator);
-    accumulator.GetResult(result.OffsetElement());
-  } else {
-    // Partial reduction
-
-    // Element size of the destination descriptor is the same
-    // as the element size of the source.
-    PartialReduction(result, x, x.ElementBytes(), dim,
-        mask, terminator, intrinsic, accumulator);
-  }
-}
-
 template  struct MaxOrMinHelper {
   template  struct Functor {
     RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
@@ -802,66 +773,11 @@ RT_EXT_API_GROUP_END
 
 // NORM2
 
-RT_VAR_GROUP_BEGIN
-
-// Use at least double precision for accumulators.
-// Don't use __float128, it doesn't work with abs() or sqrt() yet.
-static constexpr RT_CONST_VAR_ATTRS int largestLDKind {
-#if LDBL_MANT_DIG == 113
-  16
-#elif LDBL_MANT_DIG == 64
-  10
-#else
-  8
-#endif
-};
-
-RT_VAR_GROUP_END
-
-template  class Norm2Accumulator {
-public:
-  using Type = CppTypeFor;
-  using AccumType =
-      CppTypeFor;
-  explicit RT_API_ATTRS Norm2Accumulator(const Descriptor &array)
-      : array_{array} {}
-  RT_API_ATTRS void Reinitialize() { max_ = sum_ = 0; }
-  template 
-  RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
-    // m * sqrt(1 + sum((others(:)/m)**2))
-    *p = static_cast(max_ * std::sqrt(1 + sum_));
-  }
-  RT_API_ATTRS bool Accumulate(Type x) {
-    auto absX{std::abs(static_cast(x))};
-    if (!max_) {
-      max_ = absX;
-    } else if (absX > max_) {
-      auto t{max_ / absX}; // < 1.0
-      auto tsq{t * t};
-      sum_ *= tsq; // scale sum to reflect change to the max
-      sum_ += tsq; // include a term for the previous max
-      max_ = absX;
-    } else { // absX <= max_
-      auto t{absX / max_};
-      sum_ += t * t;
-    }
-    return true;
-  }
-  template 
-  RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
-    return Accumulate(*array_.Element(at));
-  }
-
-private:
-  const Descriptor &array_;
-  AccumType max_{0}; // value (m) with largest magnitude
-  AccumType sum_{0}; // sum((others(:)/m)**2)
-};
-
 template  struct Norm2Helper {
   RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, int dim,
       const Descriptor *mask, Terminator &terminator) const {
-    DoMaxMinNorm2>(
+    DoMaxMinNorm2::Type>(
         result, x, dim, mask, "NORM2", terminator);
   }
 };
@@ -872,26 +788,27 @@ RT_EXT_API_GROUP_BEGIN
 // TODO: REAL(2 & 3)
 CppTypeFor RTDEF(Norm2_4)(
     const Descriptor &x, const char *source, int line, int dim) {
-  return GetTotalReduction(
-      x, source, line, dim, nullptr, Norm2Accumulator<4>{x}, "NORM2");
+  return GetTotalReduction(x, source, line, dim, nullptr,
+      Norm2AccumulatorGetter<4>::create(x), "NORM2");
 }
 CppTypeFor RTDEF(Norm2_8)(
     const Descriptor &x, const char *source, int line, int dim) {
-  return GetTotalReduction(
-      x, source, line, dim, nullptr, Norm2Accumulator<8>{x}, "NORM2");
+  return GetTotalReduction(x, source, line, dim, nullptr,
+      Norm2AccumulatorGetter<8>::create(x), "NORM2");
 }
 #if LDBL_MANT_DIG == 64
 CppTypeFor RTDEF(Norm2_10)(
     const Descriptor &x, const char *source, int line, int dim) {
-  return GetTotalReduction(
-      x, source, line, dim, nullptr, Norm2Accumulator<10>{x}, "NORM2");
+  return GetTotalReduction(x, source, line, dim,
+      nullptr, Norm2AccumulatorGetter<10>::create(x), "NORM2");
 }
 #endif
 #if LDBL_MANT_DIG == 113
+// The __float128 implementation resides in FortranFloat128Math library.
 CppTypeFor RTDEF(Norm2_16)(
     const Descriptor &x, const char *source, int line, int dim) {
-  return GetTotalReduction(
-      x, source, line, dim, nullptr, Norm2Accumulator<16>{x}, "NORM2");
+  return GetTotalReduction(x, source, line, dim,
+      nullptr, Norm2AccumulatorGetter<16>::create(x), "NORM2");
 }
 #endif
 
@@ -901,7 +818,7 @@ void RTDEF(Norm2Dim)(Descriptor &result, const Descriptor &x, int dim,
   auto type{x.type().GetCategoryAndKind()};
   RUNTIME_CHECK(terminator, type);
   if (type->first == TypeCategory::Real) {
-    ApplyFloatingPointKind(
+    ApplyFloatingPointKind(
         type->second, terminator, result, x, dim, nullptr, terminator);
   } else {
     terminator.Crash("NORM2: bad type code %d", x.type().raw());
diff --git a/flang/runtime/product.cpp b/flang/runtime/product.cpp
index a516bc51a959b7..4c3b8c33a12e0f 100644
--- a/flang/runtime/product.cpp
+++ b/flang/runtime/product.cpp
@@ -123,7 +123,8 @@ CppTypeFor RTDEF(ProductReal10)(const Descriptor &x,
       NonComplexProductAccumulator>{x},
       "PRODUCT");
 }
-#elif LDBL_MANT_DIG == 113
+#endif
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
 CppTypeFor RTDEF(ProductReal16)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction(x, source, line, dim, mask,
@@ -154,7 +155,8 @@ void RTDEF(CppProductComplex10)(CppTypeFor &result,
       mask, ComplexProductAccumulator>{x},
       "PRODUCT");
 }
-#elif LDBL_MANT_DIG == 113
+#endif
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
 void RTDEF(CppProductComplex16)(CppTypeFor &result,
     const Descriptor &x, const char *source, int line, int dim,
     const Descriptor *mask) {
diff --git a/flang/runtime/reduction-templates.h b/flang/runtime/reduction-templates.h
index 7d0f82d59a084d..0891bc021ff753 100644
--- a/flang/runtime/reduction-templates.h
+++ b/flang/runtime/reduction-templates.h
@@ -25,6 +25,7 @@
 #include "tools.h"
 #include "flang/Runtime/cpp-type.h"
 #include "flang/Runtime/descriptor.h"
+#include 
 
 namespace Fortran::runtime {
 
@@ -332,5 +333,119 @@ template  struct PartialLocationHelper {
   };
 };
 
+// NORM2 templates
+
+RT_VAR_GROUP_BEGIN
+
+// Use at least double precision for accumulators.
+// Don't use __float128, it doesn't work with abs() or sqrt() yet.
+static constexpr RT_CONST_VAR_ATTRS int Norm2LargestLDKind {
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+  16
+#elif LDBL_MANT_DIG == 64
+  10
+#else
+  8
+#endif
+};
+
+RT_VAR_GROUP_END
+
+template 
+inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
+    int dim, const Descriptor *mask, const char *intrinsic,
+    Terminator &terminator) {
+  using Type = CppTypeFor;
+  ACCUMULATOR accumulator{x};
+  if (dim == 0 || x.rank() == 1) {
+    // Total reduction
+
+    // Element size of the destination descriptor is the same
+    // as the element size of the source.
+    result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
+        CFI_attribute_allocatable);
+    if (int stat{result.Allocate()}) {
+      terminator.Crash(
+          "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
+    }
+    DoTotalReduction(x, dim, mask, accumulator, intrinsic, terminator);
+    accumulator.GetResult(result.OffsetElement());
+  } else {
+    // Partial reduction
+
+    // Element size of the destination descriptor is the same
+    // as the element size of the source.
+    PartialReduction(result, x, x.ElementBytes(), dim,
+        mask, terminator, intrinsic, accumulator);
+  }
+}
+
+// The data type used by Norm2Accumulator.
+template 
+using Norm2AccumType =
+    CppTypeFor;
+
+template  class Norm2Accumulator {
+public:
+  using Type = CppTypeFor;
+  using AccumType = Norm2AccumType;
+  explicit RT_API_ATTRS Norm2Accumulator(const Descriptor &array)
+      : array_{array} {}
+  RT_API_ATTRS void Reinitialize() { max_ = sum_ = 0; }
+  template 
+  RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    // m * sqrt(1 + sum((others(:)/m)**2))
+    *p = static_cast(max_ * SQRT::compute(1 + sum_));
+  }
+  RT_API_ATTRS bool Accumulate(Type x) {
+    auto absX{ABS::compute(static_cast(x))};
+    if (!max_) {
+      max_ = absX;
+    } else if (absX > max_) {
+      auto t{max_ / absX}; // < 1.0
+      auto tsq{t * t};
+      sum_ *= tsq; // scale sum to reflect change to the max
+      sum_ += tsq; // include a term for the previous max
+      max_ = absX;
+    } else { // absX <= max_
+      auto t{absX / max_};
+      sum_ += t * t;
+    }
+    return true;
+  }
+  template 
+  RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
+    return Accumulate(*array_.Element(at));
+  }
+
+private:
+  const Descriptor &array_;
+  AccumType max_{0}; // value (m) with largest magnitude
+  AccumType sum_{0}; // sum((others(:)/m)**2)
+};
+
+// Helper class for creating Norm2Accumulator instance
+// based on the given KIND. This helper returns and instance
+// that uses std::abs and std::sqrt for the computations.
+template  class Norm2AccumulatorGetter {
+  using AccumType = Norm2AccumType;
+
+public:
+  struct ABSTy {
+    static constexpr RT_API_ATTRS AccumType compute(AccumType &&x) {
+      return std::abs(std::forward(x));
+    }
+  };
+  struct SQRTTy {
+    static constexpr RT_API_ATTRS AccumType compute(AccumType &&x) {
+      return std::sqrt(std::forward(x));
+    }
+  };
+
+  using Type = Norm2Accumulator;
+
+  static RT_API_ATTRS Type create(const Descriptor &x) { return Type(x); }
+};
+
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
diff --git a/flang/runtime/sum.cpp b/flang/runtime/sum.cpp
index 048399737c8501..d2495e3e956fe6 100644
--- a/flang/runtime/sum.cpp
+++ b/flang/runtime/sum.cpp
@@ -175,7 +175,8 @@ void RTDEF(CppSumComplex10)(CppTypeFor &result,
   result = GetTotalReduction(
       x, source, line, dim, mask, ComplexSumAccumulator{x}, "SUM");
 }
-#elif LDBL_MANT_DIG == 113
+#endif
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
 void RTDEF(CppSumComplex16)(CppTypeFor &result,
     const Descriptor &x, const char *source, int line, int dim,
     const Descriptor *mask) {
diff --git a/flang/runtime/tools.h b/flang/runtime/tools.h
index 89e5069995748b..c1f89cadca06e7 100644
--- a/flang/runtime/tools.h
+++ b/flang/runtime/tools.h
@@ -266,7 +266,8 @@ inline RT_API_ATTRS RESULT ApplyIntegerKind(
   }
 }
 
-template