From 82f86ae01a54ff8e3a5aaefd24745ef2b7b917ba Mon Sep 17 00:00:00 2001
From: Saleem Abdulrasool <compnerd@compnerd.org>
Date: Mon, 28 Sep 2020 20:26:02 +0000
Subject: [PATCH 01/31] APINotes: add APINotesYAMLCompiler

This adds the skeleton of the YAML Compiler for APINotes.  This change
only adds the YAML IO model for the API Notes along with a new testing
tool `apinotes-test` which can be used to verify that can round trip the
YAML content properly.  It provides the basis for the future work which
will add a binary serialization and deserialization format to the data
model.

This is based on the code contributed by Apple at
https://github.com/llvm/llvm-project-staging/tree/staging/swift/apinotes.

Differential Revision: https://reviews.llvm.org/D88859
Reviewed By: Gabor Marton
---
 .../clang/APINotes/APINotesYAMLCompiler.h     |  24 +
 clang/include/clang/APINotes/Types.h          |  40 ++
 clang/lib/APINotes/APINotesYAMLCompiler.cpp   | 597 ++++++++++++++++++
 clang/lib/APINotes/CMakeLists.txt             |   6 +
 clang/lib/CMakeLists.txt                      |   1 +
 .../Simple.framework/Headers/Simple.apinotes  |  28 +
 .../Simple.framework/Headers/Simple.h         |  19 +
 .../Headers/SimpleKit.apinotes                |  48 ++
 .../SimpleKit.framework/Headers/SimpleKit.h   |  29 +
 .../Headers/module.modulemap                  |   5 +
 clang/test/APINotes/yaml-roundtrip-2.test     |  11 +
 clang/test/APINotes/yaml-roundtrip.test       |  26 +
 clang/test/CMakeLists.txt                     |   1 +
 clang/test/lit.cfg.py                         |   3 +-
 clang/tools/CMakeLists.txt                    |   1 +
 clang/tools/apinotes-test/APINotesTest.cpp    |  53 ++
 clang/tools/apinotes-test/CMakeLists.txt      |   6 +
 17 files changed, 897 insertions(+), 1 deletion(-)
 create mode 100644 clang/include/clang/APINotes/APINotesYAMLCompiler.h
 create mode 100644 clang/include/clang/APINotes/Types.h
 create mode 100644 clang/lib/APINotes/APINotesYAMLCompiler.cpp
 create mode 100644 clang/lib/APINotes/CMakeLists.txt
 create mode 100644 clang/test/APINotes/Inputs/Frameworks/Simple.framework/Headers/Simple.apinotes
 create mode 100644 clang/test/APINotes/Inputs/Frameworks/Simple.framework/Headers/Simple.h
 create mode 100644 clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Headers/SimpleKit.apinotes
 create mode 100644 clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Headers/SimpleKit.h
 create mode 100644 clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Headers/module.modulemap
 create mode 100644 clang/test/APINotes/yaml-roundtrip-2.test
 create mode 100644 clang/test/APINotes/yaml-roundtrip.test
 create mode 100644 clang/tools/apinotes-test/APINotesTest.cpp
 create mode 100644 clang/tools/apinotes-test/CMakeLists.txt

diff --git a/clang/include/clang/APINotes/APINotesYAMLCompiler.h b/clang/include/clang/APINotes/APINotesYAMLCompiler.h
new file mode 100644
index 00000000000000..6098d0ee36fc47
--- /dev/null
+++ b/clang/include/clang/APINotes/APINotesYAMLCompiler.h
@@ -0,0 +1,24 @@
+//===-- APINotesYAMLCompiler.h - API Notes YAML Format Reader ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_APINOTES_APINOTESYAMLCOMPILER_H
+#define LLVM_CLANG_APINOTES_APINOTESYAMLCOMPILER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace clang {
+namespace api_notes {
+/// Parses the APINotes YAML content and writes the representation back to the
+/// specified stream.  This provides a means of testing the YAML processing of
+/// the APINotes format.
+bool parseAndDumpAPINotes(llvm::StringRef YI, llvm::raw_ostream &OS);
+} // namespace api_notes
+} // namespace clang
+
+#endif
diff --git a/clang/include/clang/APINotes/Types.h b/clang/include/clang/APINotes/Types.h
new file mode 100644
index 00000000000000..be2a99ad6fd080
--- /dev/null
+++ b/clang/include/clang/APINotes/Types.h
@@ -0,0 +1,40 @@
+//===-- Types.h - API Notes Data Types --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_APINOTES_TYPES_H
+#define LLVM_CLANG_APINOTES_TYPES_H
+
+namespace clang {
+namespace api_notes {
+enum class RetainCountConventionKind {
+  None,
+  CFReturnsRetained,
+  CFReturnsNotRetained,
+  NSReturnsRetained,
+  NSReturnsNotRetained,
+};
+
+/// The payload for an enum_extensibility attribute. This is a tri-state rather
+/// than just a boolean because the presence of the attribute indicates
+/// auditing.
+enum class EnumExtensibilityKind {
+  None,
+  Open,
+  Closed,
+};
+
+/// The kind of a swift_wrapper/swift_newtype.
+enum class SwiftNewTypeKind {
+  None,
+  Struct,
+  Enum,
+};
+} // namespace api_notes
+} // namespace clang
+
+#endif
diff --git a/clang/lib/APINotes/APINotesYAMLCompiler.cpp b/clang/lib/APINotes/APINotesYAMLCompiler.cpp
new file mode 100644
index 00000000000000..997929a9bd2206
--- /dev/null
+++ b/clang/lib/APINotes/APINotesYAMLCompiler.cpp
@@ -0,0 +1,597 @@
+//===-- APINotesYAMLCompiler.cpp - API Notes YAML Format Reader -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The types defined locally are designed to represent the YAML state, which
+// adds an additional bit of state: e.g. a tri-state boolean attribute (yes, no,
+// not applied) becomes a tri-state boolean + present.  As a result, while these
+// enumerations appear to be redefining constants from the attributes table
+// data, they are distinct.
+//
+
+#include "clang/APINotes/APINotesYAMLCompiler.h"
+#include "clang/APINotes/Types.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/Specifiers.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/VersionTuple.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <vector>
+using namespace clang;
+using namespace api_notes;
+
+namespace {
+enum class APIAvailability {
+  Available = 0,
+  OSX,
+  IOS,
+  None,
+  NonSwift,
+};
+} // namespace
+
+namespace llvm {
+namespace yaml {
+template <> struct ScalarEnumerationTraits<APIAvailability> {
+  static void enumeration(IO &IO, APIAvailability &AA) {
+    IO.enumCase(AA, "OSX", APIAvailability::OSX);
+    IO.enumCase(AA, "iOS", APIAvailability::IOS);
+    IO.enumCase(AA, "none", APIAvailability::None);
+    IO.enumCase(AA, "nonswift", APIAvailability::NonSwift);
+    IO.enumCase(AA, "available", APIAvailability::Available);
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+enum class MethodKind {
+  Class,
+  Instance,
+};
+} // namespace
+
+namespace llvm {
+namespace yaml {
+template <> struct ScalarEnumerationTraits<MethodKind> {
+  static void enumeration(IO &IO, MethodKind &MK) {
+    IO.enumCase(MK, "Class", MethodKind::Class);
+    IO.enumCase(MK, "Instance", MethodKind::Instance);
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+struct Param {
+  unsigned Position;
+  Optional<bool> NoEscape = false;
+  Optional<NullabilityKind> Nullability;
+  Optional<RetainCountConventionKind> RetainCountConvention;
+  StringRef Type;
+};
+
+typedef std::vector<Param> ParamsSeq;
+} // namespace
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(Param)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(NullabilityKind)
+
+namespace llvm {
+namespace yaml {
+template <> struct ScalarEnumerationTraits<NullabilityKind> {
+  static void enumeration(IO &IO, NullabilityKind &NK) {
+    IO.enumCase(NK, "Nonnull", NullabilityKind::NonNull);
+    IO.enumCase(NK, "Optional", NullabilityKind::Nullable);
+    IO.enumCase(NK, "Unspecified", NullabilityKind::Unspecified);
+    // TODO: Mapping this to it's own value would allow for better cross
+    // checking. Also the default should be Unknown.
+    IO.enumCase(NK, "Scalar", NullabilityKind::Unspecified);
+
+    // Aliases for compatibility with existing APINotes.
+    IO.enumCase(NK, "N", NullabilityKind::NonNull);
+    IO.enumCase(NK, "O", NullabilityKind::Nullable);
+    IO.enumCase(NK, "U", NullabilityKind::Unspecified);
+    IO.enumCase(NK, "S", NullabilityKind::Unspecified);
+  }
+};
+
+template <> struct ScalarEnumerationTraits<RetainCountConventionKind> {
+  static void enumeration(IO &IO, RetainCountConventionKind &RCCK) {
+    IO.enumCase(RCCK, "none", RetainCountConventionKind::None);
+    IO.enumCase(RCCK, "CFReturnsRetained",
+                RetainCountConventionKind::CFReturnsRetained);
+    IO.enumCase(RCCK, "CFReturnsNotRetained",
+                RetainCountConventionKind::CFReturnsNotRetained);
+    IO.enumCase(RCCK, "NSReturnsRetained",
+                RetainCountConventionKind::NSReturnsRetained);
+    IO.enumCase(RCCK, "NSReturnsNotRetained",
+                RetainCountConventionKind::NSReturnsNotRetained);
+  }
+};
+
+template <> struct MappingTraits<Param> {
+  static void mapping(IO &IO, Param &P) {
+    IO.mapRequired("Position", P.Position);
+    IO.mapOptional("Nullability", P.Nullability, llvm::None);
+    IO.mapOptional("RetainCountConvention", P.RetainCountConvention);
+    IO.mapOptional("NoEscape", P.NoEscape);
+    IO.mapOptional("Type", P.Type, StringRef(""));
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+typedef std::vector<NullabilityKind> NullabilitySeq;
+
+struct AvailabilityItem {
+  APIAvailability Mode = APIAvailability::Available;
+  StringRef Msg;
+};
+
+/// Old attribute deprecated in favor of SwiftName.
+enum class FactoryAsInitKind {
+  /// Infer based on name and type (the default).
+  Infer,
+  /// Treat as a class method.
+  AsClassMethod,
+  /// Treat as an initializer.
+  AsInitializer,
+};
+
+struct Method {
+  StringRef Selector;
+  MethodKind Kind;
+  ParamsSeq Params;
+  NullabilitySeq Nullability;
+  Optional<NullabilityKind> NullabilityOfRet;
+  Optional<RetainCountConventionKind> RetainCountConvention;
+  AvailabilityItem Availability;
+  Optional<bool> SwiftPrivate;
+  StringRef SwiftName;
+  FactoryAsInitKind FactoryAsInit = FactoryAsInitKind::Infer;
+  bool DesignatedInit = false;
+  bool Required = false;
+  StringRef ResultType;
+};
+
+typedef std::vector<Method> MethodsSeq;
+} // namespace
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(Method)
+
+namespace llvm {
+namespace yaml {
+template <> struct ScalarEnumerationTraits<FactoryAsInitKind> {
+  static void enumeration(IO &IO, FactoryAsInitKind &FIK) {
+    IO.enumCase(FIK, "A", FactoryAsInitKind::Infer);
+    IO.enumCase(FIK, "C", FactoryAsInitKind::AsClassMethod);
+    IO.enumCase(FIK, "I", FactoryAsInitKind::AsInitializer);
+  }
+};
+
+template <> struct MappingTraits<Method> {
+  static void mapping(IO &IO, Method &M) {
+    IO.mapRequired("Selector", M.Selector);
+    IO.mapRequired("MethodKind", M.Kind);
+    IO.mapOptional("Parameters", M.Params);
+    IO.mapOptional("Nullability", M.Nullability);
+    IO.mapOptional("NullabilityOfRet", M.NullabilityOfRet, llvm::None);
+    IO.mapOptional("RetainCountConvention", M.RetainCountConvention);
+    IO.mapOptional("Availability", M.Availability.Mode,
+                   APIAvailability::Available);
+    IO.mapOptional("AvailabilityMsg", M.Availability.Msg, StringRef(""));
+    IO.mapOptional("SwiftPrivate", M.SwiftPrivate);
+    IO.mapOptional("SwiftName", M.SwiftName, StringRef(""));
+    IO.mapOptional("FactoryAsInit", M.FactoryAsInit, FactoryAsInitKind::Infer);
+    IO.mapOptional("DesignatedInit", M.DesignatedInit, false);
+    IO.mapOptional("Required", M.Required, false);
+    IO.mapOptional("ResultType", M.ResultType, StringRef(""));
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+struct Property {
+  StringRef Name;
+  llvm::Optional<MethodKind> Kind;
+  llvm::Optional<NullabilityKind> Nullability;
+  AvailabilityItem Availability;
+  Optional<bool> SwiftPrivate;
+  StringRef SwiftName;
+  Optional<bool> SwiftImportAsAccessors;
+  StringRef Type;
+};
+
+typedef std::vector<Property> PropertiesSeq;
+} // namespace
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(Property)
+
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits<Property> {
+  static void mapping(IO &IO, Property &P) {
+    IO.mapRequired("Name", P.Name);
+    IO.mapOptional("PropertyKind", P.Kind);
+    IO.mapOptional("Nullability", P.Nullability, llvm::None);
+    IO.mapOptional("Availability", P.Availability.Mode,
+                   APIAvailability::Available);
+    IO.mapOptional("AvailabilityMsg", P.Availability.Msg, StringRef(""));
+    IO.mapOptional("SwiftPrivate", P.SwiftPrivate);
+    IO.mapOptional("SwiftName", P.SwiftName, StringRef(""));
+    IO.mapOptional("SwiftImportAsAccessors", P.SwiftImportAsAccessors);
+    IO.mapOptional("Type", P.Type, StringRef(""));
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+struct Class {
+  StringRef Name;
+  bool AuditedForNullability = false;
+  AvailabilityItem Availability;
+  Optional<bool> SwiftPrivate;
+  StringRef SwiftName;
+  Optional<StringRef> SwiftBridge;
+  Optional<StringRef> NSErrorDomain;
+  Optional<bool> SwiftImportAsNonGeneric;
+  Optional<bool> SwiftObjCMembers;
+  MethodsSeq Methods;
+  PropertiesSeq Properties;
+};
+
+typedef std::vector<Class> ClassesSeq;
+} // namespace
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(Class)
+
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits<Class> {
+  static void mapping(IO &IO, Class &C) {
+    IO.mapRequired("Name", C.Name);
+    IO.mapOptional("AuditedForNullability", C.AuditedForNullability, false);
+    IO.mapOptional("Availability", C.Availability.Mode,
+                   APIAvailability::Available);
+    IO.mapOptional("AvailabilityMsg", C.Availability.Msg, StringRef(""));
+    IO.mapOptional("SwiftPrivate", C.SwiftPrivate);
+    IO.mapOptional("SwiftName", C.SwiftName, StringRef(""));
+    IO.mapOptional("SwiftBridge", C.SwiftBridge);
+    IO.mapOptional("NSErrorDomain", C.NSErrorDomain);
+    IO.mapOptional("SwiftImportAsNonGeneric", C.SwiftImportAsNonGeneric);
+    IO.mapOptional("SwiftObjCMembers", C.SwiftObjCMembers);
+    IO.mapOptional("Methods", C.Methods);
+    IO.mapOptional("Properties", C.Properties);
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+struct Function {
+  StringRef Name;
+  ParamsSeq Params;
+  NullabilitySeq Nullability;
+  Optional<NullabilityKind> NullabilityOfRet;
+  Optional<api_notes::RetainCountConventionKind> RetainCountConvention;
+  AvailabilityItem Availability;
+  Optional<bool> SwiftPrivate;
+  StringRef SwiftName;
+  StringRef Type;
+  StringRef ResultType;
+};
+
+typedef std::vector<Function> FunctionsSeq;
+} // namespace
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(Function)
+
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits<Function> {
+  static void mapping(IO &IO, Function &F) {
+    IO.mapRequired("Name", F.Name);
+    IO.mapOptional("Parameters", F.Params);
+    IO.mapOptional("Nullability", F.Nullability);
+    IO.mapOptional("NullabilityOfRet", F.NullabilityOfRet, llvm::None);
+    IO.mapOptional("RetainCountConvention", F.RetainCountConvention);
+    IO.mapOptional("Availability", F.Availability.Mode,
+                   APIAvailability::Available);
+    IO.mapOptional("AvailabilityMsg", F.Availability.Msg, StringRef(""));
+    IO.mapOptional("SwiftPrivate", F.SwiftPrivate);
+    IO.mapOptional("SwiftName", F.SwiftName, StringRef(""));
+    IO.mapOptional("ResultType", F.ResultType, StringRef(""));
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+struct GlobalVariable {
+  StringRef Name;
+  llvm::Optional<NullabilityKind> Nullability;
+  AvailabilityItem Availability;
+  Optional<bool> SwiftPrivate;
+  StringRef SwiftName;
+  StringRef Type;
+};
+
+typedef std::vector<GlobalVariable> GlobalVariablesSeq;
+} // namespace
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(GlobalVariable)
+
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits<GlobalVariable> {
+  static void mapping(IO &IO, GlobalVariable &GV) {
+    IO.mapRequired("Name", GV.Name);
+    IO.mapOptional("Nullability", GV.Nullability, llvm::None);
+    IO.mapOptional("Availability", GV.Availability.Mode,
+                   APIAvailability::Available);
+    IO.mapOptional("AvailabilityMsg", GV.Availability.Msg, StringRef(""));
+    IO.mapOptional("SwiftPrivate", GV.SwiftPrivate);
+    IO.mapOptional("SwiftName", GV.SwiftName, StringRef(""));
+    IO.mapOptional("Type", GV.Type, StringRef(""));
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+struct EnumConstant {
+  StringRef Name;
+  AvailabilityItem Availability;
+  Optional<bool> SwiftPrivate;
+  StringRef SwiftName;
+};
+
+typedef std::vector<EnumConstant> EnumConstantsSeq;
+} // namespace
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(EnumConstant)
+
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits<EnumConstant> {
+  static void mapping(IO &IO, EnumConstant &EC) {
+    IO.mapRequired("Name", EC.Name);
+    IO.mapOptional("Availability", EC.Availability.Mode,
+                   APIAvailability::Available);
+    IO.mapOptional("AvailabilityMsg", EC.Availability.Msg, StringRef(""));
+    IO.mapOptional("SwiftPrivate", EC.SwiftPrivate);
+    IO.mapOptional("SwiftName", EC.SwiftName, StringRef(""));
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+/// Syntactic sugar for EnumExtensibility and FlagEnum
+enum class EnumConvenienceAliasKind {
+  /// EnumExtensibility: none, FlagEnum: false
+  None,
+  /// EnumExtensibility: open, FlagEnum: false
+  CFEnum,
+  /// EnumExtensibility: open, FlagEnum: true
+  CFOptions,
+  /// EnumExtensibility: closed, FlagEnum: false
+  CFClosedEnum
+};
+} // namespace
+
+namespace llvm {
+namespace yaml {
+template <> struct ScalarEnumerationTraits<EnumConvenienceAliasKind> {
+  static void enumeration(IO &IO, EnumConvenienceAliasKind &ECAK) {
+    IO.enumCase(ECAK, "none", EnumConvenienceAliasKind::None);
+    IO.enumCase(ECAK, "CFEnum", EnumConvenienceAliasKind::CFEnum);
+    IO.enumCase(ECAK, "NSEnum", EnumConvenienceAliasKind::CFEnum);
+    IO.enumCase(ECAK, "CFOptions", EnumConvenienceAliasKind::CFOptions);
+    IO.enumCase(ECAK, "NSOptions", EnumConvenienceAliasKind::CFOptions);
+    IO.enumCase(ECAK, "CFClosedEnum", EnumConvenienceAliasKind::CFClosedEnum);
+    IO.enumCase(ECAK, "NSClosedEnum", EnumConvenienceAliasKind::CFClosedEnum);
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+struct Tag {
+  StringRef Name;
+  AvailabilityItem Availability;
+  StringRef SwiftName;
+  Optional<bool> SwiftPrivate;
+  Optional<StringRef> SwiftBridge;
+  Optional<StringRef> NSErrorDomain;
+  Optional<EnumExtensibilityKind> EnumExtensibility;
+  Optional<bool> FlagEnum;
+  Optional<EnumConvenienceAliasKind> EnumConvenienceKind;
+};
+
+typedef std::vector<Tag> TagsSeq;
+} // namespace
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(Tag)
+
+namespace llvm {
+namespace yaml {
+template <> struct ScalarEnumerationTraits<EnumExtensibilityKind> {
+  static void enumeration(IO &IO, EnumExtensibilityKind &EEK) {
+    IO.enumCase(EEK, "none", EnumExtensibilityKind::None);
+    IO.enumCase(EEK, "open", EnumExtensibilityKind::Open);
+    IO.enumCase(EEK, "closed", EnumExtensibilityKind::Closed);
+  }
+};
+
+template <> struct MappingTraits<Tag> {
+  static void mapping(IO &IO, Tag &T) {
+    IO.mapRequired("Name", T.Name);
+    IO.mapOptional("Availability", T.Availability.Mode,
+                   APIAvailability::Available);
+    IO.mapOptional("AvailabilityMsg", T.Availability.Msg, StringRef(""));
+    IO.mapOptional("SwiftPrivate", T.SwiftPrivate);
+    IO.mapOptional("SwiftName", T.SwiftName, StringRef(""));
+    IO.mapOptional("SwiftBridge", T.SwiftBridge);
+    IO.mapOptional("NSErrorDomain", T.NSErrorDomain);
+    IO.mapOptional("EnumExtensibility", T.EnumExtensibility);
+    IO.mapOptional("FlagEnum", T.FlagEnum);
+    IO.mapOptional("EnumKind", T.EnumConvenienceKind);
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+struct Typedef {
+  StringRef Name;
+  AvailabilityItem Availability;
+  StringRef SwiftName;
+  Optional<bool> SwiftPrivate;
+  Optional<StringRef> SwiftBridge;
+  Optional<StringRef> NSErrorDomain;
+  Optional<SwiftNewTypeKind> SwiftType;
+};
+
+typedef std::vector<Typedef> TypedefsSeq;
+} // namespace
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(Typedef)
+
+namespace llvm {
+namespace yaml {
+template <> struct ScalarEnumerationTraits<SwiftNewTypeKind> {
+  static void enumeration(IO &IO, SwiftNewTypeKind &SWK) {
+    IO.enumCase(SWK, "none", SwiftNewTypeKind::None);
+    IO.enumCase(SWK, "struct", SwiftNewTypeKind::Struct);
+    IO.enumCase(SWK, "enum", SwiftNewTypeKind::Enum);
+  }
+};
+
+template <> struct MappingTraits<Typedef> {
+  static void mapping(IO &IO, Typedef &T) {
+    IO.mapRequired("Name", T.Name);
+    IO.mapOptional("Availability", T.Availability.Mode,
+                   APIAvailability::Available);
+    IO.mapOptional("AvailabilityMsg", T.Availability.Msg, StringRef(""));
+    IO.mapOptional("SwiftPrivate", T.SwiftPrivate);
+    IO.mapOptional("SwiftName", T.SwiftName, StringRef(""));
+    IO.mapOptional("SwiftBridge", T.SwiftBridge);
+    IO.mapOptional("NSErrorDomain", T.NSErrorDomain);
+    IO.mapOptional("SwiftWrapper", T.SwiftType);
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+struct TopLevelItems {
+  ClassesSeq Classes;
+  ClassesSeq Protocols;
+  FunctionsSeq Functions;
+  GlobalVariablesSeq Globals;
+  EnumConstantsSeq EnumConstants;
+  TagsSeq Tags;
+  TypedefsSeq Typedefs;
+};
+} // namespace
+
+namespace llvm {
+namespace yaml {
+static void mapTopLevelItems(IO &IO, TopLevelItems &TLI) {
+  IO.mapOptional("Classes", TLI.Classes);
+  IO.mapOptional("Protocols", TLI.Protocols);
+  IO.mapOptional("Functions", TLI.Functions);
+  IO.mapOptional("Globals", TLI.Globals);
+  IO.mapOptional("Enumerators", TLI.EnumConstants);
+  IO.mapOptional("Tags", TLI.Tags);
+  IO.mapOptional("Typedefs", TLI.Typedefs);
+}
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+struct Versioned {
+  VersionTuple Version;
+  TopLevelItems Items;
+};
+
+typedef std::vector<Versioned> VersionedSeq;
+} // namespace
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(Versioned)
+
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits<Versioned> {
+  static void mapping(IO &IO, Versioned &V) {
+    IO.mapRequired("Version", V.Version);
+    mapTopLevelItems(IO, V.Items);
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace {
+struct Module {
+  StringRef Name;
+  AvailabilityItem Availability;
+  TopLevelItems TopLevel;
+  VersionedSeq SwiftVersions;
+
+  llvm::Optional<bool> SwiftInferImportAsMember = {llvm::None};
+
+  LLVM_DUMP_METHOD void dump() /*const*/;
+};
+} // namespace
+
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits<Module> {
+  static void mapping(IO &IO, Module &M) {
+    IO.mapRequired("Name", M.Name);
+    IO.mapOptional("Availability", M.Availability.Mode,
+                   APIAvailability::Available);
+    IO.mapOptional("AvailabilityMsg", M.Availability.Msg, StringRef(""));
+    IO.mapOptional("SwiftInferImportAsMember", M.SwiftInferImportAsMember);
+    mapTopLevelItems(IO, M.TopLevel);
+    IO.mapOptional("SwiftVersions", M.SwiftVersions);
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+void Module::dump() {
+  llvm::yaml::Output OS(llvm::errs());
+  OS << *this;
+}
+
+namespace {
+bool parseAPINotes(StringRef YI, Module &M, llvm::SourceMgr::DiagHandlerTy Diag,
+                   void *DiagContext) {
+  llvm::yaml::Input IS(YI, nullptr, Diag, DiagContext);
+  IS >> M;
+  return static_cast<bool>(IS.error());
+}
+} // namespace
+
+bool clang::api_notes::parseAndDumpAPINotes(StringRef YI,
+                                            llvm::raw_ostream &OS) {
+  Module M;
+  if (parseAPINotes(YI, M, nullptr, nullptr))
+    return true;
+
+  llvm::yaml::Output YOS(OS);
+  YOS << M;
+
+  return false;
+}
diff --git a/clang/lib/APINotes/CMakeLists.txt b/clang/lib/APINotes/CMakeLists.txt
new file mode 100644
index 00000000000000..3ce511a0de2232
--- /dev/null
+++ b/clang/lib/APINotes/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_LINK_COMPONENTS
+  Support)
+add_clang_library(clangAPINotes
+  APINotesYAMLCompiler.cpp
+  LINK_LIBS
+    clangBasic)
diff --git a/clang/lib/CMakeLists.txt b/clang/lib/CMakeLists.txt
index 1068288100fd67..be09c0c351f347 100644
--- a/clang/lib/CMakeLists.txt
+++ b/clang/lib/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_subdirectory(Headers)
 add_subdirectory(Basic)
+add_subdirectory(APINotes)
 add_subdirectory(Lex)
 add_subdirectory(Parse)
 add_subdirectory(AST)
diff --git a/clang/test/APINotes/Inputs/Frameworks/Simple.framework/Headers/Simple.apinotes b/clang/test/APINotes/Inputs/Frameworks/Simple.framework/Headers/Simple.apinotes
new file mode 100644
index 00000000000000..8c915bd8b5913f
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/Simple.framework/Headers/Simple.apinotes
@@ -0,0 +1,28 @@
+Name:            SimpleKit
+Classes:
+  - Name:            I
+    Properties:
+      - Name:            nonnullProperty
+        PropertyKind:    Class
+        Nullability:     N
+      - Name:            nonnullNewProperty
+        PropertyKind:    Class
+        Nullability:     Nonnull
+      - Name:            optionalProperty
+        PropertyKind:    Class
+        Nullability:     O
+      - Name:            optionalNewProperty
+        PropertyKind:    Class
+        Nullability:     Optional
+      - Name:            unspecifiedProperty
+        PropertyKind:    Instance
+        Nullability:     U
+      - Name:            unspecifiedNewProperty
+        PropertyKind:    Instance
+        Nullability:     Unspecified
+      - Name:            scalarProperty
+        PropertyKind:    Instance
+        Nullability:     S
+      - Name:            scalarNewProperty
+        PropertyKind:    Instance
+        Nullability:     Scalar
diff --git a/clang/test/APINotes/Inputs/Frameworks/Simple.framework/Headers/Simple.h b/clang/test/APINotes/Inputs/Frameworks/Simple.framework/Headers/Simple.h
new file mode 100644
index 00000000000000..d206a7a368878b
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/Simple.framework/Headers/Simple.h
@@ -0,0 +1,19 @@
+#ifndef SIMPLE_H
+#define SIMPLE_H
+
+__attribute__((__objc_root__))
+@interface I
+@property(class, nonatomic, readonly) id nonnullProperty;
+@property(class, nonatomic, readonly) id nonnullNewProperty;
+
+@property(class, nonatomic, readonly) id optionalProperty;
+@property(class, nonatomic, readonly) id optionalNewProperty;
+
+@property(nonatomic, readonly) id unspecifiedProperty;
+@property(nonatomic, readonly) id unspecifiedNewProperty;
+
+@property(nonatomic, readonly) id scalarProperty;
+@property(nonatomic, readonly) id scalarNewProperty;
+@end
+
+#endif
diff --git a/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Headers/SimpleKit.apinotes b/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Headers/SimpleKit.apinotes
new file mode 100644
index 00000000000000..ef6e44c51c21c7
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Headers/SimpleKit.apinotes
@@ -0,0 +1,48 @@
+Name:            SimpleKit
+Classes:
+  - Name:            MethodTest
+    Methods:
+      - Selector:        getOwnedToUnowned
+        MethodKind:      Instance
+        RetainCountConvention: NSReturnsNotRetained
+      - Selector:        getUnownedToOwned
+        MethodKind:      Instance
+        RetainCountConvention: NSReturnsRetained
+Functions:
+  - Name:            getCFOwnedToUnowned
+    RetainCountConvention: CFReturnsNotRetained
+  - Name:            getCFUnownedToOwned
+    RetainCountConvention: CFReturnsRetained
+  - Name:            getCFOwnedToNone
+    RetainCountConvention: none
+  - Name:            getObjCOwnedToUnowned
+    RetainCountConvention: NSReturnsNotRetained
+  - Name:            getObjCUnownedToOwned
+    RetainCountConvention: NSReturnsRetained
+  - Name:            indirectGetCFOwnedToUnowned
+    Parameters:
+      - Position:        0
+        RetainCountConvention: CFReturnsNotRetained
+  - Name:            indirectGetCFUnownedToOwned
+    Parameters:
+      - Position:        0
+        RetainCountConvention: CFReturnsRetained
+  - Name:            indirectGetCFOwnedToNone
+    Parameters:
+      - Position:        0
+        RetainCountConvention: none
+  - Name:            indirectGetCFNoneToOwned
+    Parameters:
+      - Position:        0
+        RetainCountConvention: CFReturnsNotRetained
+  - Name:            getCFAuditedToUnowned_DUMP
+    RetainCountConvention: CFReturnsNotRetained
+  - Name:            getCFAuditedToOwned_DUMP
+    RetainCountConvention: CFReturnsRetained
+  - Name:            getCFAuditedToNone_DUMP
+    RetainCountConvention: none
+Tags:
+  - Name:            RenamedAgainInAPINotesA
+    SwiftName:       SuccessfullyRenamedA
+  - Name:            RenamedAgainInAPINotesB
+    SwiftName:       SuccessfullyRenamedB
diff --git a/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Headers/SimpleKit.h b/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Headers/SimpleKit.h
new file mode 100644
index 00000000000000..bd73926e9d6af4
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Headers/SimpleKit.h
@@ -0,0 +1,29 @@
+struct RenamedAgainInAPINotesA {
+  int field;
+} __attribute__((__swift_name__("bad")));
+
+struct __attribute__((__swift_name__("bad"))) RenamedAgainInAPINotesB {
+  int field;
+};
+
+void *getCFOwnedToUnowned(void) __attribute__((__cf_returns_retained__));
+void *getCFUnownedToOwned(void) __attribute__((__cf_returns_not_retained__));
+void *getCFOwnedToNone(void) __attribute__((__cf_returns_retained__));
+id getObjCOwnedToUnowned(void) __attribute__((__ns_returns_retained__));
+id getObjCUnownedToOwned(void) __attribute__((__ns_returns_not_retained__));
+
+int indirectGetCFOwnedToUnowned(void **out __attribute__((__cf_returns_retained__)));
+int indirectGetCFUnownedToOwned(void **out __attribute__((__cf_returns_not_retained__)));
+int indirectGetCFOwnedToNone(void **out __attribute__((__cf_returns_retained__)));
+int indirectGetCFNoneToOwned(void **out);
+
+#pragma clang arc_cf_code_audited begin
+void *getCFAuditedToUnowned_DUMP(void);
+void *getCFAuditedToOwned_DUMP(void);
+void *getCFAuditedToNone_DUMP(void);
+#pragma clang arc_cf_code_audited end
+
+@interface MethodTest
+- (id)getOwnedToUnowned __attribute__((__ns_returns_retained__));
+- (id)getUnownedToOwned __attribute__((__ns_returns_not_retained__));
+@end
diff --git a/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Headers/module.modulemap b/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Headers/module.modulemap
new file mode 100644
index 00000000000000..2d07e76c0a142a
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Frameworks/SimpleKit.framework/Headers/module.modulemap
@@ -0,0 +1,5 @@
+framework module SimpleKit {
+  umbrella header "SimpleKit.h"
+  export *
+  module * { export * }
+}
diff --git a/clang/test/APINotes/yaml-roundtrip-2.test b/clang/test/APINotes/yaml-roundtrip-2.test
new file mode 100644
index 00000000000000..02455302fec132
--- /dev/null
+++ b/clang/test/APINotes/yaml-roundtrip-2.test
@@ -0,0 +1,11 @@
+RUN: apinotes-test %S/Inputs/Frameworks/SimpleKit.framework/Headers/SimpleKit.apinotes > %t.result
+RUN: not diff --strip-trailing-cr --ed %t.result %S/Inputs/Frameworks/SimpleKit.framework/Headers/SimpleKit.apinotes | FileCheck %s
+
+The `--ed` parameter to `diff` is not implemented in the builtin diff, assume
+that we have a GNU compatible diff when we have a shell.
+REQUIRES: shell
+
+We expect only the document markers to be emitted
+
+CHECK: 50d
+CHECK: 1d
diff --git a/clang/test/APINotes/yaml-roundtrip.test b/clang/test/APINotes/yaml-roundtrip.test
new file mode 100644
index 00000000000000..3379cbf3b6db81
--- /dev/null
+++ b/clang/test/APINotes/yaml-roundtrip.test
@@ -0,0 +1,26 @@
+RUN: apinotes-test %S/Inputs/Frameworks/Simple.framework/Headers/Simple.apinotes > %t.result
+RUN: not diff --strip-trailing-cr %S/Inputs/Frameworks/Simple.framework/Headers/Simple.apinotes %t.result | FileCheck %s
+
+We expect only the nullability to be different as it is canonicalized during the
+roudtrip.
+
+CHECK:      7c8
+CHECK-NEXT: <         Nullability:     N
+CHECK-NEXT: ---
+CHECK-NEXT: >         Nullability:     Nonnull
+CHECK-NEXT: 13c14
+CHECK-NEXT: <         Nullability:     O
+CHECK-NEXT: ---
+CHECK-NEXT: >         Nullability:     Optional
+CHECK-NEXT: 19c20
+CHECK-NEXT: <         Nullability:     U
+CHECK-NEXT: ---
+CHECK-NEXT: >         Nullability:     Unspecified
+CHECK-NEXT: 25c26
+CHECK-NEXT: <         Nullability:     S
+CHECK-NEXT: ---
+CHECK-NEXT: >         Nullability:     Unspecified
+CHECK-NEXT: 28c29,30
+CHECK-NEXT: <         Nullability:     Scalar
+CHECK-NEXT: ---
+CHECK-NEXT: >         Nullability:     Unspecified
diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt
index 334a90498d0da8..2207607f5c6acd 100644
--- a/clang/test/CMakeLists.txt
+++ b/clang/test/CMakeLists.txt
@@ -58,6 +58,7 @@ if(CLANG_TEST_USE_VG)
 endif ()
 
 list(APPEND CLANG_TEST_DEPS
+  apinotes-test
   c-index-test
   clang
   clang-resource-headers
diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py
index 91161b8244f228..1a6e73ed97839b 100644
--- a/clang/test/lit.cfg.py
+++ b/clang/test/lit.cfg.py
@@ -63,7 +63,8 @@
 tool_dirs = [config.clang_tools_dir, config.llvm_tools_dir]
 
 tools = [
-    'c-index-test', 'clang-diff', 'clang-format', 'clang-tblgen', 'opt', 'llvm-ifs',
+    'apinotes-test', 'c-index-test', 'clang-diff', 'clang-format',
+    'clang-tblgen', 'opt', 'llvm-ifs',
     ToolSubst('%clang_extdef_map', command=FindTool(
         'clang-extdef-mapping'), unresolved='ignore'),
 ]
diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt
index 84e3fb156f1a44..52fd02529b46fb 100644
--- a/clang/tools/CMakeLists.txt
+++ b/clang/tools/CMakeLists.txt
@@ -2,6 +2,7 @@ create_subdirectory_options(CLANG TOOL)
 
 add_clang_subdirectory(diagtool)
 add_clang_subdirectory(driver)
+add_clang_subdirectory(apinotes-test)
 add_clang_subdirectory(clang-diff)
 add_clang_subdirectory(clang-format)
 add_clang_subdirectory(clang-format-vs)
diff --git a/clang/tools/apinotes-test/APINotesTest.cpp b/clang/tools/apinotes-test/APINotesTest.cpp
new file mode 100644
index 00000000000000..8794546dd28486
--- /dev/null
+++ b/clang/tools/apinotes-test/APINotesTest.cpp
@@ -0,0 +1,53 @@
+//===-- APINotesTest.cpp - API Notes Testing Tool ------------------ C++ --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/APINotes/APINotesYAMLCompiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/WithColor.h"
+
+static llvm::cl::list<std::string> APINotes(llvm::cl::Positional,
+                                            llvm::cl::desc("[<apinotes> ...]"),
+                                            llvm::cl::Required);
+
+static llvm::cl::opt<std::string>
+    OutputFileName("o", llvm::cl::desc("output filename"),
+                   llvm::cl::value_desc("filename"), llvm::cl::init("-"));
+
+int main(int argc, const char **argv) {
+  const bool DisableCrashReporting = true;
+  llvm::sys::PrintStackTraceOnErrorSignal(argv[0], DisableCrashReporting);
+  llvm::cl::ParseCommandLineOptions(argc, argv);
+
+  auto Error = [](const llvm::Twine &Msg) {
+    llvm::WithColor::error(llvm::errs(), "apinotes-test") << Msg << '\n';
+  };
+
+  std::error_code EC;
+  auto Out = std::make_unique<llvm::ToolOutputFile>(OutputFileName, EC,
+                                                    llvm::sys::fs::OF_None);
+  if (EC) {
+    Error("failed to open '" + OutputFileName + "': " + EC.message());
+    return EXIT_FAILURE;
+  }
+
+  for (const std::string &Notes : APINotes) {
+    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> NotesOrError =
+        llvm::MemoryBuffer::getFileOrSTDIN(Notes);
+    if (std::error_code EC = NotesOrError.getError()) {
+      llvm::errs() << EC.message() << '\n';
+      return EXIT_FAILURE;
+    }
+
+    clang::api_notes::parseAndDumpAPINotes((*NotesOrError)->getBuffer(),
+                                           Out->os());
+  }
+
+  return EXIT_SUCCESS;
+}
diff --git a/clang/tools/apinotes-test/CMakeLists.txt b/clang/tools/apinotes-test/CMakeLists.txt
new file mode 100644
index 00000000000000..39e82d90b74fe2
--- /dev/null
+++ b/clang/tools/apinotes-test/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_LINK_COMPONENTS
+  Support)
+add_clang_executable(apinotes-test
+  APINotesTest.cpp)
+clang_target_link_libraries(apinotes-test PRIVATE
+  clangAPINotes)

From 98d7e583db9e15cff5ba1b002da0e66dc3ab89fe Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 5 Nov 2020 10:58:42 -0800
Subject: [PATCH 02/31] [LegalizeTypes] Remove unnecessary if around switch in
 ScalarizeVectorOperand and SplitVectorOperand. NFC

The if was checking !Res.getNode() but that's always true since
Res was initialized to SDValue() and not touched before the if.

This appears to be a leftover from a previous implementation of
Custom legalization where Res was updated instead of returning
immediately.
---
 .../SelectionDAG/LegalizeVectorTypes.cpp      | 292 +++++++++---------
 1 file changed, 144 insertions(+), 148 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 606b38d48c52bd..e84d8e90e382d0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -558,76 +558,74 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
              dbgs() << "\n");
   SDValue Res = SDValue();
 
-  if (!Res.getNode()) {
-    switch (N->getOpcode()) {
-    default:
+  switch (N->getOpcode()) {
+  default:
 #ifndef NDEBUG
-      dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
-      N->dump(&DAG);
-      dbgs() << "\n";
+    dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+    N->dump(&DAG);
+    dbgs() << "\n";
 #endif
-      report_fatal_error("Do not know how to scalarize this operator's "
-                         "operand!\n");
-    case ISD::BITCAST:
-      Res = ScalarizeVecOp_BITCAST(N);
-      break;
-    case ISD::ANY_EXTEND:
-    case ISD::ZERO_EXTEND:
-    case ISD::SIGN_EXTEND:
-    case ISD::TRUNCATE:
-    case ISD::FP_TO_SINT:
-    case ISD::FP_TO_UINT:
-    case ISD::SINT_TO_FP:
-    case ISD::UINT_TO_FP:
-      Res = ScalarizeVecOp_UnaryOp(N);
-      break;
-    case ISD::STRICT_SINT_TO_FP:
-    case ISD::STRICT_UINT_TO_FP:
-    case ISD::STRICT_FP_TO_SINT:
-    case ISD::STRICT_FP_TO_UINT:
-      Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
-      break;
-    case ISD::CONCAT_VECTORS:
-      Res = ScalarizeVecOp_CONCAT_VECTORS(N);
-      break;
-    case ISD::EXTRACT_VECTOR_ELT:
-      Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
-      break;
-    case ISD::VSELECT:
-      Res = ScalarizeVecOp_VSELECT(N);
-      break;
-    case ISD::SETCC:
-      Res = ScalarizeVecOp_VSETCC(N);
-      break;
-    case ISD::STORE:
-      Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
-      break;
-    case ISD::STRICT_FP_ROUND:
-      Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
-      break;
-    case ISD::FP_ROUND:
-      Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
-      break;
-    case ISD::VECREDUCE_FADD:
-    case ISD::VECREDUCE_FMUL:
-    case ISD::VECREDUCE_ADD:
-    case ISD::VECREDUCE_MUL:
-    case ISD::VECREDUCE_AND:
-    case ISD::VECREDUCE_OR:
-    case ISD::VECREDUCE_XOR:
-    case ISD::VECREDUCE_SMAX:
-    case ISD::VECREDUCE_SMIN:
-    case ISD::VECREDUCE_UMAX:
-    case ISD::VECREDUCE_UMIN:
-    case ISD::VECREDUCE_FMAX:
-    case ISD::VECREDUCE_FMIN:
-      Res = ScalarizeVecOp_VECREDUCE(N);
-      break;
-    case ISD::VECREDUCE_SEQ_FADD:
-    case ISD::VECREDUCE_SEQ_FMUL:
-      Res = ScalarizeVecOp_VECREDUCE_SEQ(N);
-      break;
-    }
+    report_fatal_error("Do not know how to scalarize this operator's "
+                       "operand!\n");
+  case ISD::BITCAST:
+    Res = ScalarizeVecOp_BITCAST(N);
+    break;
+  case ISD::ANY_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::SIGN_EXTEND:
+  case ISD::TRUNCATE:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    Res = ScalarizeVecOp_UnaryOp(N);
+    break;
+  case ISD::STRICT_SINT_TO_FP:
+  case ISD::STRICT_UINT_TO_FP:
+  case ISD::STRICT_FP_TO_SINT:
+  case ISD::STRICT_FP_TO_UINT:
+    Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
+    break;
+  case ISD::CONCAT_VECTORS:
+    Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+    break;
+  case ISD::EXTRACT_VECTOR_ELT:
+    Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+    break;
+  case ISD::VSELECT:
+    Res = ScalarizeVecOp_VSELECT(N);
+    break;
+  case ISD::SETCC:
+    Res = ScalarizeVecOp_VSETCC(N);
+    break;
+  case ISD::STORE:
+    Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+    break;
+  case ISD::STRICT_FP_ROUND:
+    Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
+    break;
+  case ISD::FP_ROUND:
+    Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
+    break;
+  case ISD::VECREDUCE_FADD:
+  case ISD::VECREDUCE_FMUL:
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_MUL:
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_UMIN:
+  case ISD::VECREDUCE_FMAX:
+  case ISD::VECREDUCE_FMIN:
+    Res = ScalarizeVecOp_VECREDUCE(N);
+    break;
+  case ISD::VECREDUCE_SEQ_FADD:
+  case ISD::VECREDUCE_SEQ_FMUL:
+    Res = ScalarizeVecOp_VECREDUCE_SEQ(N);
+    break;
   }
 
   // If the result is null, the sub-method took care of registering results etc.
@@ -2009,92 +2007,90 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
   if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
     return false;
 
-  if (!Res.getNode()) {
-    switch (N->getOpcode()) {
-    default:
+  switch (N->getOpcode()) {
+  default:
 #ifndef NDEBUG
-      dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
-      N->dump(&DAG);
-      dbgs() << "\n";
+    dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
+    N->dump(&DAG);
+    dbgs() << "\n";
 #endif
-      report_fatal_error("Do not know how to split this operator's "
-                         "operand!\n");
-
-    case ISD::SETCC:             Res = SplitVecOp_VSETCC(N); break;
-    case ISD::BITCAST:           Res = SplitVecOp_BITCAST(N); break;
-    case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
-    case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
-    case ISD::CONCAT_VECTORS:    Res = SplitVecOp_CONCAT_VECTORS(N); break;
-    case ISD::TRUNCATE:
+    report_fatal_error("Do not know how to split this operator's "
+                       "operand!\n");
+
+  case ISD::SETCC:             Res = SplitVecOp_VSETCC(N); break;
+  case ISD::BITCAST:           Res = SplitVecOp_BITCAST(N); break;
+  case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+  case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+  case ISD::CONCAT_VECTORS:    Res = SplitVecOp_CONCAT_VECTORS(N); break;
+  case ISD::TRUNCATE:
+    Res = SplitVecOp_TruncateHelper(N);
+    break;
+  case ISD::STRICT_FP_ROUND:
+  case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;
+  case ISD::FCOPYSIGN:         Res = SplitVecOp_FCOPYSIGN(N); break;
+  case ISD::STORE:
+    Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+    break;
+  case ISD::MSTORE:
+    Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
+    break;
+  case ISD::MSCATTER:
+    Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
+    break;
+  case ISD::MGATHER:
+    Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
+    break;
+  case ISD::VSELECT:
+    Res = SplitVecOp_VSELECT(N, OpNo);
+    break;
+  case ISD::STRICT_SINT_TO_FP:
+  case ISD::STRICT_UINT_TO_FP:
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    if (N->getValueType(0).bitsLT(
+            N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
       Res = SplitVecOp_TruncateHelper(N);
-      break;
-    case ISD::STRICT_FP_ROUND:
-    case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;
-    case ISD::FCOPYSIGN:         Res = SplitVecOp_FCOPYSIGN(N); break;
-    case ISD::STORE:
-      Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
-      break;
-    case ISD::MSTORE:
-      Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
-      break;
-    case ISD::MSCATTER:
-      Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
-      break;
-    case ISD::MGATHER:
-      Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
-      break;
-    case ISD::VSELECT:
-      Res = SplitVecOp_VSELECT(N, OpNo);
-      break;
-    case ISD::STRICT_SINT_TO_FP:
-    case ISD::STRICT_UINT_TO_FP:
-    case ISD::SINT_TO_FP:
-    case ISD::UINT_TO_FP:
-      if (N->getValueType(0).bitsLT(
-              N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
-        Res = SplitVecOp_TruncateHelper(N);
-      else
-        Res = SplitVecOp_UnaryOp(N);
-      break;
-    case ISD::FP_TO_SINT:
-    case ISD::FP_TO_UINT:
-    case ISD::STRICT_FP_TO_SINT:
-    case ISD::STRICT_FP_TO_UINT:
-    case ISD::STRICT_FP_EXTEND:
-    case ISD::FP_EXTEND:
-    case ISD::SIGN_EXTEND:
-    case ISD::ZERO_EXTEND:
-    case ISD::ANY_EXTEND:
-    case ISD::FTRUNC:
+    else
       Res = SplitVecOp_UnaryOp(N);
-      break;
+    break;
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::STRICT_FP_TO_SINT:
+  case ISD::STRICT_FP_TO_UINT:
+  case ISD::STRICT_FP_EXTEND:
+  case ISD::FP_EXTEND:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+  case ISD::FTRUNC:
+    Res = SplitVecOp_UnaryOp(N);
+    break;
 
-    case ISD::ANY_EXTEND_VECTOR_INREG:
-    case ISD::SIGN_EXTEND_VECTOR_INREG:
-    case ISD::ZERO_EXTEND_VECTOR_INREG:
-      Res = SplitVecOp_ExtVecInRegOp(N);
-      break;
+  case ISD::ANY_EXTEND_VECTOR_INREG:
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
+  case ISD::ZERO_EXTEND_VECTOR_INREG:
+    Res = SplitVecOp_ExtVecInRegOp(N);
+    break;
 
-    case ISD::VECREDUCE_FADD:
-    case ISD::VECREDUCE_FMUL:
-    case ISD::VECREDUCE_ADD:
-    case ISD::VECREDUCE_MUL:
-    case ISD::VECREDUCE_AND:
-    case ISD::VECREDUCE_OR:
-    case ISD::VECREDUCE_XOR:
-    case ISD::VECREDUCE_SMAX:
-    case ISD::VECREDUCE_SMIN:
-    case ISD::VECREDUCE_UMAX:
-    case ISD::VECREDUCE_UMIN:
-    case ISD::VECREDUCE_FMAX:
-    case ISD::VECREDUCE_FMIN:
-      Res = SplitVecOp_VECREDUCE(N, OpNo);
-      break;
-    case ISD::VECREDUCE_SEQ_FADD:
-    case ISD::VECREDUCE_SEQ_FMUL:
-      Res = SplitVecOp_VECREDUCE_SEQ(N);
-      break;
-    }
+  case ISD::VECREDUCE_FADD:
+  case ISD::VECREDUCE_FMUL:
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_MUL:
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_UMIN:
+  case ISD::VECREDUCE_FMAX:
+  case ISD::VECREDUCE_FMIN:
+    Res = SplitVecOp_VECREDUCE(N, OpNo);
+    break;
+  case ISD::VECREDUCE_SEQ_FADD:
+  case ISD::VECREDUCE_SEQ_FMUL:
+    Res = SplitVecOp_VECREDUCE_SEQ(N);
+    break;
   }
 
   // If the result is null, the sub-method took care of registering results etc.

From b69af88481aa88e04ff4490dc8d420ec570ec0f1 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Thu, 5 Nov 2020 14:11:26 -0500
Subject: [PATCH 03/31] [gn build] (manually) port 82f86ae01

---
 clang/tools/apinotes-test/CMakeLists.txt                 | 3 ++-
 llvm/utils/gn/secondary/clang/lib/APINotes/BUILD.gn      | 9 +++++++++
 llvm/utils/gn/secondary/clang/test/BUILD.gn              | 1 +
 .../gn/secondary/clang/tools/apinotes-test/BUILD.gn      | 7 +++++++
 4 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 llvm/utils/gn/secondary/clang/lib/APINotes/BUILD.gn
 create mode 100644 llvm/utils/gn/secondary/clang/tools/apinotes-test/BUILD.gn

diff --git a/clang/tools/apinotes-test/CMakeLists.txt b/clang/tools/apinotes-test/CMakeLists.txt
index 39e82d90b74fe2..82c3b7bcb6483f 100644
--- a/clang/tools/apinotes-test/CMakeLists.txt
+++ b/clang/tools/apinotes-test/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(LLVM_LINK_COMPONENTS
   Support)
 add_clang_executable(apinotes-test
-  APINotesTest.cpp)
+  APINotesTest.cpp
+  )
 clang_target_link_libraries(apinotes-test PRIVATE
   clangAPINotes)
diff --git a/llvm/utils/gn/secondary/clang/lib/APINotes/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/APINotes/BUILD.gn
new file mode 100644
index 00000000000000..e49d3d08dc0706
--- /dev/null
+++ b/llvm/utils/gn/secondary/clang/lib/APINotes/BUILD.gn
@@ -0,0 +1,9 @@
+static_library("APINotes") {
+  output_name = "clangAPINotes"
+  configs += [ "//llvm/utils/gn/build:clang_code" ]
+  deps = [
+    "//clang/lib/Basic",
+    "//llvm/lib/Support",
+  ]
+  sources = [ "APINotesYAMLCompiler.cpp" ]
+}
diff --git a/llvm/utils/gn/secondary/clang/test/BUILD.gn b/llvm/utils/gn/secondary/clang/test/BUILD.gn
index a0680d9848235f..9219d2d7bfadc4 100644
--- a/llvm/utils/gn/secondary/clang/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/test/BUILD.gn
@@ -131,6 +131,7 @@ group("test") {
     ":lit_site_cfg",
     ":lit_unit_site_cfg",
     "//clang/lib/Headers",
+    "//clang/tools/apinotes-test",
     "//clang/tools/c-index-test",
     "//clang/tools/clang-diff",
     "//clang/tools/clang-format",
diff --git a/llvm/utils/gn/secondary/clang/tools/apinotes-test/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/apinotes-test/BUILD.gn
new file mode 100644
index 00000000000000..d9fce415690880
--- /dev/null
+++ b/llvm/utils/gn/secondary/clang/tools/apinotes-test/BUILD.gn
@@ -0,0 +1,7 @@
+executable("apinotes-test") {
+  configs += [ "//llvm/utils/gn/build:clang_code" ]
+  deps = [
+    "//clang/lib/APINotes",
+  ]
+  sources = [ "APINotesTest.cpp" ]
+}

From 439b5bebafec4d4008006fa6a8fb448db7e6c59d Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Thu, 5 Nov 2020 14:12:33 -0500
Subject: [PATCH 04/31] [gn build] (manually) port 82f86ae01 more

---
 llvm/utils/gn/secondary/clang/tools/apinotes-test/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang/tools/apinotes-test/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/apinotes-test/BUILD.gn
index d9fce415690880..1b8dc5232566f2 100644
--- a/llvm/utils/gn/secondary/clang/tools/apinotes-test/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/tools/apinotes-test/BUILD.gn
@@ -2,6 +2,7 @@ executable("apinotes-test") {
   configs += [ "//llvm/utils/gn/build:clang_code" ]
   deps = [
     "//clang/lib/APINotes",
+    "//llvm/lib/Support",
   ]
   sources = [ "APINotesTest.cpp" ]
 }

From 5c801de13cc2b615e2248be9845190bd1f5ef60f Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj@google.com>
Date: Wed, 4 Nov 2020 22:58:25 +0000
Subject: [PATCH 05/31] [libc] Fix WrapperGen seeing no arguments as a void
 argument.

This corrects WrapperGen generating incorrect wrappers for functions
that take no arguments. Previously it would generate a wrapper with a
single argument of type `void`.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D90800
---
 libc/utils/tools/WrapperGen/Main.cpp | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/libc/utils/tools/WrapperGen/Main.cpp b/libc/utils/tools/WrapperGen/Main.cpp
index ae606d1b66fa1c..0b064bcbb81421 100644
--- a/libc/utils/tools/WrapperGen/Main.cpp
+++ b/libc/utils/tools/WrapperGen/Main.cpp
@@ -47,6 +47,20 @@ static bool WrapperGenMain(llvm::raw_ostream &OS, llvm::RecordKeeper &Records) {
   for (size_t i = 0; i < ArgsList.size(); ++i) {
     llvm::Record *ArgType = ArgsList[i]->getValueAsDef("ArgType");
     auto TypeName = Indexer.getTypeAsString(ArgType);
+
+    if (TypeName.compare("void") == 0) {
+      if (ArgsList.size() == 1) {
+        break;
+      } else {
+        // the reason this is a fatal error is that a void argument means this
+        // function has no arguments; multiple copies of no arguments is an
+        // error.
+        llvm::PrintFatalError(
+            "The specification for function " + FunctionName +
+            " lists other arguments along with a void argument.");
+      }
+    }
+
     OS << TypeName << " " << ArgPrefix << i;
     CallArgs << ArgPrefix << i;
     if (i < ArgsList.size() - 1) {

From f347d78cca9a6b218e98d562b0e066488279602e Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara@apple.com>
Date: Wed, 4 Nov 2020 21:21:39 -0800
Subject: [PATCH 06/31] [AArch64][GlobalISel] Add AArch64::G_DUPLANE[X] opcodes
 for lane duplicates.

These were previously handled by pattern matching shuffles in the selector, but
adding a new opcode and making it equivalent to the AArch64duplane SDAG node
allows us to select more patterns, like lane indexed FMLAs (patch adding a test
for that will be committed later).

The pattern matching code has been simply moved to postlegalize lowering.

Differential Revision: https://reviews.llvm.org/D90820
---
 llvm/lib/Target/AArch64/AArch64Combine.td     |  11 +-
 llvm/lib/Target/AArch64/AArch64InstrGISel.td  |  27 ++++
 .../GISel/AArch64InstructionSelector.cpp      |  66 ----------
 .../GISel/AArch64PostLegalizerLowering.cpp    |  61 +++++++++
 ...postlegalizer-lowering-shuffle-duplane.mir | 120 ++++++++++++++++++
 .../GlobalISel/select-shuffle-to-duplane.mir  | 103 ---------------
 6 files changed, 218 insertions(+), 170 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-duplane.mir
 delete mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-to-duplane.mir

diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 560e362b074b58..7e7db2f5df0b84 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -83,6 +83,15 @@ def vashr_vlshr_imm : GICombineRule<
   (apply [{ applyVAshrLshrImm(*${root}, MRI, ${matchinfo}); }])
 >;
 
+def form_duplane_matchdata :
+  GIDefMatchData<"std::pair<unsigned, int>">;
+def form_duplane : GICombineRule <
+  (defs root:$root, form_duplane_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+          [{ return matchDupLane(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
 def adjust_icmp_imm_matchdata :
   GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">;
 def adjust_icmp_imm : GICombineRule <
@@ -108,7 +117,7 @@ def extractvecelt_pairwise_add : GICombineRule<
 def AArch64PostLegalizerLoweringHelper
     : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
                        [shuffle_vector_pseudos, vashr_vlshr_imm,
-                        icmp_lowering]> {
+                        icmp_lowering, form_duplane]> {
   let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 1bd9ce25125d9b..ec7817beaeb847 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -88,6 +88,29 @@ def G_DUP: AArch64GenericInstruction {
   let InOperandList = (ins type1:$lane);
   let hasSideEffects = 0;
 }
+
+// Represents a lane duplicate operation.
+def G_DUPLANE8 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, type1:$lane);
+  let hasSideEffects = 0;
+}
+def G_DUPLANE16 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, type1:$lane);
+  let hasSideEffects = 0;
+}
+def G_DUPLANE32 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, type1:$lane);
+  let hasSideEffects = 0;
+}
+def G_DUPLANE64 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, type1:$lane);
+  let hasSideEffects = 0;
+}
+
 // Represents a trn1 instruction. Produced post-legalization from
 // G_SHUFFLE_VECTORs with appropriate masks.
 def G_TRN1 : AArch64GenericInstruction {
@@ -131,6 +154,10 @@ def : GINodeEquiv<G_UZP2, AArch64uzp2>;
 def : GINodeEquiv<G_ZIP1, AArch64zip1>;
 def : GINodeEquiv<G_ZIP2, AArch64zip2>;
 def : GINodeEquiv<G_DUP, AArch64dup>;
+def : GINodeEquiv<G_DUPLANE8, AArch64duplane8>;
+def : GINodeEquiv<G_DUPLANE16, AArch64duplane16>;
+def : GINodeEquiv<G_DUPLANE32, AArch64duplane32>;
+def : GINodeEquiv<G_DUPLANE64, AArch64duplane64>;
 def : GINodeEquiv<G_TRN1, AArch64trn1>;
 def : GINodeEquiv<G_TRN2, AArch64trn2>;
 def : GINodeEquiv<G_EXT, AArch64ext>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index f0333d18f3c1e9..1f2d256d72ca61 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -136,8 +136,6 @@ class AArch64InstructionSelector : public InstructionSelector {
   bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
   bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
 
-  bool tryOptShuffleDupLane(MachineInstr &I, LLT DstTy, LLT SrcTy,
-                            ArrayRef<int> Mask, MachineRegisterInfo &MRI) const;
   bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
   bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
   bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
@@ -4319,67 +4317,6 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
   return nullptr;
 }
 
-bool AArch64InstructionSelector::tryOptShuffleDupLane(
-    MachineInstr &I, LLT DstTy, LLT SrcTy, ArrayRef<int> Mask,
-    MachineRegisterInfo &MRI) const {
-  assert(I.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-
-  // We assume that scalar->vector splats have been been handled in the
-  // post-legalizer combiner to G_DUP. However splats of a source vector's
-  // lane don't fit that pattern, detect it here:
-  //  %res = G_SHUFFLE_VECTOR %src:<n x ty>, undef, <n x i32> splat(lane-idx)
-  //    =>
-  //  %res = DUPv[N][Ty]lane %src, lane-idx
-  // FIXME: this case should be covered by re-implementing the perfect shuffle
-  // codegen mechanism.
-
-  auto LaneIdx = getSplatIndex(I);
-  if (!LaneIdx)
-    return false;
-
-  // The lane idx should be within the first source vector.
-  if (*LaneIdx >= SrcTy.getNumElements())
-    return false;
-
-  if (DstTy != SrcTy)
-    return false;
-
-  LLT ScalarTy = SrcTy.getElementType();
-  unsigned ScalarSize = ScalarTy.getSizeInBits();
-
-  unsigned Opc = 0;
-  switch (SrcTy.getNumElements()) {
-  case 2:
-    if (ScalarSize == 64)
-      Opc = AArch64::DUPv2i64lane;
-    break;
-  case 4:
-    if (ScalarSize == 32)
-      Opc = AArch64::DUPv4i32lane;
-    break;
-  case 8:
-    if (ScalarSize == 16)
-      Opc = AArch64::DUPv8i16lane;
-    break;
-  case 16:
-    if (ScalarSize == 8)
-      Opc = AArch64::DUPv16i8lane;
-    break;
-  default:
-    break;
-  }
-  if (!Opc)
-    return false;
-
-  MachineIRBuilder MIB(I);
-  auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()},
-                            {I.getOperand(1).getReg()})
-                 .addImm(*LaneIdx);
-  constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
-  I.eraseFromParent();
-  return true;
-}
-
 bool AArch64InstructionSelector::selectShuffleVector(
     MachineInstr &I, MachineRegisterInfo &MRI) const {
   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
@@ -4401,9 +4338,6 @@ bool AArch64InstructionSelector::selectShuffleVector(
     return false;
   }
 
-  if (tryOptShuffleDupLane(I, DstTy, Src1Ty, Mask, MRI))
-    return true;
-
   unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
 
   SmallVector<Constant *, 64> CstIdxs;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 9b433e0e90c658..645e85388490e0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -550,6 +550,67 @@ bool applyAdjustICmpImmAndPred(
   return true;
 }
 
+bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
+                  std::pair<unsigned, int> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+  Register Src1Reg = MI.getOperand(1).getReg();
+  const LLT SrcTy = MRI.getType(Src1Reg);
+  const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+  auto LaneIdx = getSplatIndex(MI);
+  if (!LaneIdx)
+    return false;
+
+  // The lane idx should be within the first source vector.
+  if (*LaneIdx >= SrcTy.getNumElements())
+    return false;
+
+  if (DstTy != SrcTy)
+    return false;
+
+  LLT ScalarTy = SrcTy.getElementType();
+  unsigned ScalarSize = ScalarTy.getSizeInBits();
+
+  unsigned Opc = 0;
+  switch (SrcTy.getNumElements()) {
+  case 2:
+    if (ScalarSize == 64)
+      Opc = AArch64::G_DUPLANE64;
+    break;
+  case 4:
+    if (ScalarSize == 32)
+      Opc = AArch64::G_DUPLANE32;
+    break;
+  case 8:
+    if (ScalarSize == 16)
+      Opc = AArch64::G_DUPLANE16;
+    break;
+  case 16:
+    if (ScalarSize == 8)
+      Opc = AArch64::G_DUPLANE8;
+    break;
+  default:
+    break;
+  }
+  if (!Opc)
+    return false;
+
+  MatchInfo.first = Opc;
+  MatchInfo.second = *LaneIdx;
+  return true;
+}
+
+bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
+                  MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+  B.setInstrAndDebugLoc(MI);
+  auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
+  B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()},
+               {MI.getOperand(1).getReg(), Lane});
+  MI.eraseFromParent();
+  return true;
+}
+
 #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
 #include "AArch64GenPostLegalizeGILowering.inc"
 #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-duplane.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-duplane.mir
new file mode 100644
index 00000000000000..948eb906056528
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-duplane.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64 -global-isel -start-before=aarch64-postlegalizer-lowering -stop-after=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=SELECTED
+
+---
+name:            duplane64
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: duplane64
+    ; CHECK: liveins: $q0
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[DUPLANE64_:%[0-9]+]]:_(<2 x s64>) = G_DUPLANE64 [[COPY]], [[C]](s64)
+    ; CHECK: $q0 = COPY [[DUPLANE64_]](<2 x s64>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    ; SELECTED-LABEL: name: duplane64
+    ; SELECTED: liveins: $q0
+    ; SELECTED: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; SELECTED: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[COPY]], 0
+    ; SELECTED: $q0 = COPY [[DUPv2i64lane]]
+    ; SELECTED: RET_ReallyLR implicit $q0
+    %1:_(<2 x s64>) = COPY $q0
+    %2:_(<2 x s64>) = G_IMPLICIT_DEF
+    %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0)
+    $q0 = COPY %4(<2 x s64>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            duplane32
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: duplane32
+    ; CHECK: liveins: $q0
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[DUPLANE32_:%[0-9]+]]:_(<4 x s32>) = G_DUPLANE32 [[COPY]], [[C]](s64)
+    ; CHECK: $q0 = COPY [[DUPLANE32_]](<4 x s32>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    ; SELECTED-LABEL: name: duplane32
+    ; SELECTED: liveins: $q0
+    ; SELECTED: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; SELECTED: [[DUPv4i32lane:%[0-9]+]]:fpr128 = DUPv4i32lane [[COPY]], 0
+    ; SELECTED: $q0 = COPY [[DUPv4i32lane]]
+    ; SELECTED: RET_ReallyLR implicit $q0
+    %1:_(<4 x s32>) = COPY $q0
+    %2:_(<4 x s32>) = G_IMPLICIT_DEF
+    %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0)
+    $q0 = COPY %4(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            duplane16
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: duplane16
+    ; CHECK: liveins: $q0
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[DUPLANE16_:%[0-9]+]]:_(<8 x s16>) = G_DUPLANE16 [[COPY]], [[C]](s64)
+    ; CHECK: $q0 = COPY [[DUPLANE16_]](<8 x s16>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    ; SELECTED-LABEL: name: duplane16
+    ; SELECTED: liveins: $q0
+    ; SELECTED: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; SELECTED: [[DUPv8i16lane:%[0-9]+]]:fpr128 = DUPv8i16lane [[COPY]], 0
+    ; SELECTED: $q0 = COPY [[DUPv8i16lane]]
+    ; SELECTED: RET_ReallyLR implicit $q0
+    %1:_(<8 x s16>) = COPY $q0
+    %2:_(<8 x s16>) = G_IMPLICIT_DEF
+    %4:_(<8 x s16>) = G_SHUFFLE_VECTOR %1(<8 x s16>), %2, shufflemask(0, 0, 0, 0, 0, 0, 0, 0)
+    $q0 = COPY %4(<8 x s16>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            duplane8
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: duplane8
+    ; CHECK: liveins: $q0
+    ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[DUPLANE8_:%[0-9]+]]:_(<16 x s8>) = G_DUPLANE8 [[COPY]], [[C]](s64)
+    ; CHECK: $q0 = COPY [[DUPLANE8_]](<16 x s8>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    ; SELECTED-LABEL: name: duplane8
+    ; SELECTED: liveins: $q0
+    ; SELECTED: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; SELECTED: [[DUPv16i8lane:%[0-9]+]]:fpr128 = DUPv16i8lane [[COPY]], 0
+    ; SELECTED: $q0 = COPY [[DUPv16i8lane]]
+    ; SELECTED: RET_ReallyLR implicit $q0
+    %1:_(<16 x s8>) = COPY $q0
+    %2:_(<16 x s8>) = G_IMPLICIT_DEF
+    %4:_(<16 x s8>) = G_SHUFFLE_VECTOR %1(<16 x s8>), %2, shufflemask(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+    $q0 = COPY %4(<16 x s8>)
+    RET_ReallyLR implicit $q0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-to-duplane.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-to-duplane.mir
deleted file mode 100644
index 5743c489fa2066..00000000000000
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-to-duplane.mir
+++ /dev/null
@@ -1,103 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
-...
----
-name:            duplane_v16i8
-alignment:       4
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-liveins:
-  - { reg: '$q0' }
-body:             |
-  bb.1:
-    liveins: $q0
-
-    ; CHECK-LABEL: name: duplane_v16i8
-    ; CHECK: liveins: $q0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[DUPv16i8lane:%[0-9]+]]:fpr128 = DUPv16i8lane [[COPY]], 0
-    ; CHECK: $q0 = COPY [[DUPv16i8lane]]
-    ; CHECK: RET_ReallyLR implicit $q0
-    %0:fpr(<16 x s8>) = COPY $q0
-    %2:fpr(<16 x s8>) = G_IMPLICIT_DEF
-    %1:fpr(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %2, shufflemask(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-    $q0 = COPY %1(<16 x s8>)
-    RET_ReallyLR implicit $q0
-
-...
----
-name:            duplane_v8i16
-alignment:       4
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-liveins:
-  - { reg: '$q0' }
-body:             |
-  bb.1:
-    liveins: $q0
-
-    ; CHECK-LABEL: name: duplane_v8i16
-    ; CHECK: liveins: $q0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[DUPv8i16lane:%[0-9]+]]:fpr128 = DUPv8i16lane [[COPY]], 0
-    ; CHECK: $q0 = COPY [[DUPv8i16lane]]
-    ; CHECK: RET_ReallyLR implicit $q0
-    %0:fpr(<8 x s16>) = COPY $q0
-    %2:fpr(<8 x s16>) = G_IMPLICIT_DEF
-    %1:fpr(<8 x s16>) = G_SHUFFLE_VECTOR %0(<8 x s16>), %2, shufflemask(0, 0, 0, 0, 0, 0, 0, 0)
-    $q0 = COPY %1(<8 x s16>)
-    RET_ReallyLR implicit $q0
-
-...
----
-name:            duplane_v4f32
-alignment:       4
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-liveins:
-  - { reg: '$q0' }
-body:             |
-  bb.1:
-    liveins: $q0
-
-    ; CHECK-LABEL: name: duplane_v4f32
-    ; CHECK: liveins: $q0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[DUPv4i32lane:%[0-9]+]]:fpr128 = DUPv4i32lane [[COPY]], 0
-    ; CHECK: $q0 = COPY [[DUPv4i32lane]]
-    ; CHECK: RET_ReallyLR implicit $q0
-    %0:fpr(<4 x s32>) = COPY $q0
-    %2:fpr(<4 x s32>) = G_IMPLICIT_DEF
-    %1:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %2, shufflemask(0, 0, 0, 0)
-    $q0 = COPY %1(<4 x s32>)
-    RET_ReallyLR implicit $q0
-
-...
----
-name:            duplane_v2i64
-alignment:       4
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-liveins:
-  - { reg: '$q0' }
-body:             |
-  bb.1:
-    liveins: $q0
-
-    ; CHECK-LABEL: name: duplane_v2i64
-    ; CHECK: liveins: $q0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[COPY]], 0
-    ; CHECK: $q0 = COPY [[DUPv2i64lane]]
-    ; CHECK: RET_ReallyLR implicit $q0
-    %0:fpr(<2 x s64>) = COPY $q0
-    %2:fpr(<2 x s64>) = G_IMPLICIT_DEF
-    %1:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(0, 0)
-    $q0 = COPY %1(<2 x s64>)
-    RET_ReallyLR implicit $q0
-
-...

From f7bc56826616814a656866fd50e90a35a8e461eb Mon Sep 17 00:00:00 2001
From: Sean Silva <silvasean@google.com>
Date: Wed, 4 Nov 2020 10:57:29 -0800
Subject: [PATCH 07/31] [mlir] Remove AppendToArgumentsList functionality from
 BufferizeTypeConverter.

This functionality is superceded by BufferResultsToOutParams pass (see
https://reviews.llvm.org/D90071) for users the require buffers to be
out-params. That pass should be run immediately after all tensors are gone from
the program (before buffer optimizations and deallocation insertion), such as
immediately after a "finalizing" bufferize pass.

The -test-finalizing-bufferize pass now defaults to what used to be the
`allowMemrefFunctionResults=true` flag. and the
finalizing-bufferize-allowed-memref-results.mlir file is moved
to test/Transforms/finalizing-bufferize.mlir.

Differential Revision: https://reviews.llvm.org/D90778
---
 mlir/include/mlir/Transforms/Bufferize.h      |  76 +---
 mlir/lib/Transforms/Bufferize.cpp             | 121 +-----
 ...zing-bufferize-allowed-memref-results.mlir | 184 ---------
 .../test/Transforms/finalizing-bufferize.mlir | 389 +++++-------------
 .../Transforms/TestFinalizingBufferize.cpp    |  28 +-
 mlir/tools/mlir-opt/mlir-opt.cpp              |   1 -
 6 files changed, 119 insertions(+), 680 deletions(-)
 delete mode 100644 mlir/test/Transforms/finalizing-bufferize-allowed-memref-results.mlir

diff --git a/mlir/include/mlir/Transforms/Bufferize.h b/mlir/include/mlir/Transforms/Bufferize.h
index 920eb6c3019074..3434be4214a7c1 100644
--- a/mlir/include/mlir/Transforms/Bufferize.h
+++ b/mlir/include/mlir/Transforms/Bufferize.h
@@ -44,12 +44,6 @@ namespace mlir {
 /// except for the ranked-tensor types which is converted to memref types.
 class BufferizeTypeConverter : public TypeConverter {
 public:
-  /// This enum is for showing how buffer placement operation converters should
-  /// conduct with certain result type after type conversion. This value can be
-  /// set/get for each specific type using setResultConversionKind or
-  /// getResultConversionKind.
-  enum ResultConversionKind { AppendToArgumentsList, KeepAsFunctionResult };
-
   BufferizeTypeConverter();
 
   /// This method tries to decompose a value of a certain type using provided
@@ -82,26 +76,6 @@ class BufferizeTypeConverter : public TypeConverter {
     addConversion(std::forward<FnT>(callback));
   }
 
-  /// This method returns ResultConversionKind for the mapping from `origin`
-  /// type to `input` type.
-  ResultConversionKind getResultConversionKind(Type origin, Type input);
-
-  /// This method registers ResultConversionKind for the mapping from type 'T'
-  /// to type 'U'.
-  template <typename T, typename U>
-  void setResultConversionKind(ResultConversionKind kind) {
-    assert((kind != AppendToArgumentsList ||
-            llvm::is_one_of<U, MemRefType, UnrankedMemRefType>::value) &&
-           "Only the memref typed values can be set to be appended to the "
-           "function argument list at the moment");
-    resultTypeConversions.emplace_back(
-        [=](Type origin, Type input) -> Optional<ResultConversionKind> {
-          if (origin.template isa<T>() && input.template isa<U>())
-            return kind;
-          return llvm::None;
-        });
-  }
-
 private:
   using DecomposeValueConversionCallFn = std::function<Optional<LogicalResult>(
       OpBuilder &, Location, Type, Value, SmallVectorImpl<Value> &)>;
@@ -109,9 +83,6 @@ class BufferizeTypeConverter : public TypeConverter {
   using DecomposeTypeConversionCallFn =
       std::function<Optional<LogicalResult>(Type, SmallVectorImpl<Type> &)>;
 
-  using ResultConversionKindFn =
-      std::function<Optional<ResultConversionKind>(Type, Type)>;
-
   /// Generate a wrapper for the given decompose value conversion callback.
   template <typename T, typename FnT>
   DecomposeValueConversionCallFn
@@ -139,7 +110,6 @@ class BufferizeTypeConverter : public TypeConverter {
     };
   }
 
-  SmallVector<ResultConversionKindFn, 2> resultTypeConversions;
   SmallVector<DecomposeValueConversionCallFn, 2> decomposeValueConversions;
   SmallVector<DecomposeTypeConversionCallFn, 2> decomposeTypeConversions;
 };
@@ -221,48 +191,10 @@ class BufferizeReturnOpConverter
   LogicalResult
   matchAndRewrite(ReturnOpSourceTy returnOp, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
-    Location loc = returnOp.getLoc();
-
-    // Split the operands depending on whether they need a copy operation or
-    // they remain as operands of the return operation. If an operand is
-    // decomposable and a decompose callback function has been provided by the
-    // user, it will be unpacked.
-    SmallVector<Value, 2> newOperands, needCopyOperands;
-    OpBuilder builder(returnOp);
-    for (auto operand : llvm::enumerate(operands)) {
-      SmallVector<Value, 2> values;
-      this->converter.tryDecomposeValue(builder, loc, operand.value().getType(),
-                                        operand.value(), values);
-      Type type = returnOp.getOperand(operand.index()).getType();
-      SmallVector<Type, 2> originTypes;
-      this->converter.tryDecomposeType(type, originTypes);
-      for (auto value : llvm::enumerate(values)) {
-        Type origin = originTypes[value.index()];
-        Type converted = value.value().getType();
-        auto kind = this->converter.getResultConversionKind(origin, converted);
-        if (kind == BufferizeTypeConverter::KeepAsFunctionResult)
-          newOperands.push_back(value.value());
-        else
-          // kind = BufferizeTypeConverter::AppendToArgumentsList
-          needCopyOperands.push_back(value.value());
-      }
-    }
-
-    // Insert Copy operations instead for the operands that have been removed
-    // from operand list and appended to the function arguments list.
-    Block &entryBlock = returnOp.getParentRegion()->front();
-    unsigned numFuncArgs = entryBlock.getNumArguments();
-    if (needCopyOperands.size() > numFuncArgs)
-      return returnOp.emitError(
-          "The number of operands that need Copy operations is more "
-          "than the number of target function arguments.");
-    unsigned destArgNum = numFuncArgs - needCopyOperands.size();
-    rewriter.setInsertionPoint(returnOp);
-    for (Value operand : needCopyOperands) {
-      rewriter.create<CopyOpTy>(loc, operand,
-                                entryBlock.getArgument(destArgNum));
-      ++destArgNum;
-    }
+    SmallVector<Value, 2> newOperands;
+    for (auto operand : operands)
+      this->converter.tryDecomposeValue(
+          rewriter, returnOp.getLoc(), operand.getType(), operand, newOperands);
     rewriter.replaceOpWithNewOp<ReturnOpTargetTy>(returnOp, newOperands);
     return success();
   }
diff --git a/mlir/lib/Transforms/Bufferize.cpp b/mlir/lib/Transforms/Bufferize.cpp
index 790b6d0ab9f242..4ca446f06669eb 100644
--- a/mlir/lib/Transforms/Bufferize.cpp
+++ b/mlir/lib/Transforms/Bufferize.cpp
@@ -63,15 +63,6 @@ void BufferizeTypeConverter::tryDecomposeType(Type type,
   types.push_back(type);
 }
 
-/// This method returns ResultConversionKind for the input type.
-BufferizeTypeConverter::ResultConversionKind
-BufferizeTypeConverter::getResultConversionKind(Type origin, Type converted) {
-  for (auto &conversion : resultTypeConversions)
-    if (auto res = conversion(origin, converted))
-      return res.getValue();
-  return KeepAsFunctionResult;
-}
-
 void mlir::populateBufferizeMaterializationLegality(ConversionTarget &target) {
   target.addLegalOp<TensorLoadOp, TensorToMemrefOp>();
 }
@@ -140,16 +131,8 @@ LogicalResult BufferizeFuncOpConverter::matchAndRewrite(
   for (Type resultType : funcType.getResults()) {
     SmallVector<Type, 2> originTypes;
     converter.tryDecomposeType(resultType, originTypes);
-    for (auto origin : originTypes) {
-      Type converted = converter.convertType(origin);
-      auto kind = converter.getResultConversionKind(origin, converted);
-      if (kind == BufferizeTypeConverter::AppendToArgumentsList) {
-        conversion.addInputs(converted);
-      } else {
-        assert(kind == BufferizeTypeConverter::KeepAsFunctionResult);
-        newResultTypes.push_back(converted);
-      }
-    }
+    for (auto origin : originTypes)
+      newResultTypes.push_back(converter.convertType(origin));
   }
 
   if (failed(rewriter.convertRegionTypes(&funcOp.getBody(), converter,
@@ -168,66 +151,12 @@ LogicalResult BufferizeFuncOpConverter::matchAndRewrite(
 // BufferizeCallOpConverter
 //===----------------------------------------------------------------------===//
 
-namespace {
-// This class represents a mapping from a result to a list of values and some
-// results that have not yet constructed. Instead, the indices of these
-// results in the operation that will be constructed are known. They will be
-// replaced with the actual values when they are available. The order of
-// adding to this mapping is important.
-class CallOpResultMapping {
-public:
-  CallOpResultMapping() { order = 0; };
-
-  /// Add an available value to the mapping.
-  void addMapping(Value value) { toValuesMapping.push_back({order++, value}); }
-
-  /// Add the index of unavailble result value to the mapping.
-  void addMapping(unsigned index) {
-    toIndicesMapping.push_back({order++, index});
-  }
-
-  /// This method returns the mapping values list. The unknown result values
-  /// that only their indices are available are replaced with their values.
-  void getMappingValues(ValueRange valuesToReplaceIndices,
-                        SmallVectorImpl<Value> &values) {
-    // Append available values to the list.
-    SmallVector<std::pair<unsigned, Value>, 2> res(toValuesMapping.begin(),
-                                                   toValuesMapping.end());
-    // Replace the indices with the actual values.
-    for (const std::pair<unsigned, unsigned> &entry : toIndicesMapping) {
-      assert(entry.second < valuesToReplaceIndices.size() &&
-             "The value index is out of range.");
-      res.push_back({entry.first, valuesToReplaceIndices[entry.second]});
-    }
-    // Sort the values based on their adding orders.
-    llvm::sort(res, [](const std::pair<unsigned, Value> &v1,
-                       const std::pair<unsigned, Value> &v2) {
-      return v1.first < v2.first;
-    });
-    // Fill the values.
-    for (const std::pair<unsigned, Value> &entry : res)
-      values.push_back(entry.second);
-  }
-
-private:
-  /// Keeping the inserting order of mapping values.
-  int order;
-
-  /// Containing the mapping values with their inserting orders.
-  SmallVector<std::pair<unsigned, Value>, 2> toValuesMapping;
-
-  /// Containing the indices of result values with their inserting orders.
-  SmallVector<std::pair<unsigned, unsigned>, 2> toIndicesMapping;
-};
-} // namespace
-
 /// Performs the actual rewriting step.
 LogicalResult BufferizeCallOpConverter::matchAndRewrite(
     CallOp callOp, ArrayRef<Value> operands,
     ConversionPatternRewriter &rewriter) const {
 
   Location loc = callOp.getLoc();
-  OpBuilder builder(callOp);
   SmallVector<Value, 2> newOperands;
 
   // TODO: if the CallOp references a FuncOp that only has a declaration (e.g.
@@ -237,39 +166,25 @@ LogicalResult BufferizeCallOpConverter::matchAndRewrite(
 
   // Create the operands list of the new `CallOp`. It unpacks the decomposable
   // values if a decompose callback function has been provided by the user.
-  for (auto operand : operands) {
-    SmallVector<Value, 2> values;
-    converter.tryDecomposeValue(builder, loc, operand.getType(), operand,
-                                values);
-    newOperands.append(values.begin(), values.end());
-  }
+  for (auto operand : operands)
+    converter.tryDecomposeValue(rewriter, loc, operand.getType(), operand,
+                                newOperands);
 
-  // Create the new result types for the new `CallOp` and a mapping from the old
-  // result to new value(s).
+  // Create the new result types for the new `CallOp` and track the indices in
+  // the new call op's results that correspond to the old call op's results.
   SmallVector<Type, 2> newResultTypes;
-  SmallVector<CallOpResultMapping, 4> mappings;
-  mappings.resize(callOp.getNumResults());
+  SmallVector<SmallVector<int, 2>, 4> expandedResultIndices;
+  expandedResultIndices.resize(callOp.getNumResults());
   for (auto result : llvm::enumerate(callOp.getResults())) {
     SmallVector<Type, 2> originTypes;
     converter.tryDecomposeType(result.value().getType(), originTypes);
-    auto &resultMapping = mappings[result.index()];
+    auto &resultMapping = expandedResultIndices[result.index()];
     for (Type origin : originTypes) {
       Type converted = converter.convertType(origin);
-      auto kind = converter.getResultConversionKind(origin, converted);
-      if (kind == BufferizeTypeConverter::KeepAsFunctionResult) {
-        newResultTypes.push_back(converted);
-        // The result value is not yet available. Its index is kept and it is
-        // replaced with the actual value of the new `CallOp` later.
-        resultMapping.addMapping(newResultTypes.size() - 1);
-      } else {
-        // kind = BufferizeTypeConverter::AppendToArgumentsList
-        MemRefType memref = converted.dyn_cast<MemRefType>();
-        if (!memref)
-          return callOp.emitError("Cannot allocate for a non-Memref type");
-        Value alloc = rewriter.create<AllocOp>(loc, memref);
-        newOperands.push_back(alloc);
-        resultMapping.addMapping(alloc);
-      }
+      newResultTypes.push_back(converted);
+      // The result value is not yet available. Its index is kept and it is
+      // replaced with the actual value of the new `CallOp` later.
+      resultMapping.push_back(newResultTypes.size() - 1);
     }
   }
 
@@ -278,12 +193,12 @@ LogicalResult BufferizeCallOpConverter::matchAndRewrite(
 
   // Build a replacing value for each result to replace its uses. If a result
   // has multiple mapping values, it needs to be packed to a single value.
-  OpBuilder nextBuilder(callOp.getOperation()->getNextNode());
   SmallVector<Value, 2> replacedValues;
   replacedValues.reserve(callOp.getNumResults());
   for (unsigned i = 0, e = callOp.getNumResults(); i < e; ++i) {
-    SmallVector<Value, 2> valuesToPack;
-    mappings[i].getMappingValues(newCallOp.getResults(), valuesToPack);
+    auto valuesToPack = llvm::to_vector<6>(
+        llvm::map_range(expandedResultIndices[i],
+                        [&](int i) { return newCallOp.getResult(i); }));
     if (valuesToPack.empty()) {
       // No replacement is required.
       replacedValues.push_back(nullptr);
@@ -293,7 +208,7 @@ LogicalResult BufferizeCallOpConverter::matchAndRewrite(
       // Values need to be packed using callback function. The same callback
       // that is used for materializeArgumentConversion is used for packing.
       Value packed = converter.materializeArgumentConversion(
-          nextBuilder, loc, callOp.getType(i), valuesToPack);
+          rewriter, loc, callOp.getType(i), valuesToPack);
       replacedValues.push_back(packed);
     }
   }
diff --git a/mlir/test/Transforms/finalizing-bufferize-allowed-memref-results.mlir b/mlir/test/Transforms/finalizing-bufferize-allowed-memref-results.mlir
deleted file mode 100644
index 220a597506b345..00000000000000
--- a/mlir/test/Transforms/finalizing-bufferize-allowed-memref-results.mlir
+++ /dev/null
@@ -1,184 +0,0 @@
-// RUN: mlir-opt -test-finalizing-bufferize-with-allowed-memref-results -split-input-file %s | FileCheck %s
-
-// Since allowMemrefEscaping is active for Bufferization in this test pass,
-// all tensor typed function results are converted to memref and remain as
-// function results. All memref typed function results will escape from the
-// deallocation phase of Bufferization.
-
-// CHECK-LABEL: func @void_function_signature_conversion
-func @void_function_signature_conversion(%arg0: tensor<4x8xf32>) {
-    return
-}
-// CHECK: ({{.*}}: memref<4x8xf32>)
-
-// -----
-
-// CHECK-LABEL: func @complex_signature_conversion
-func @complex_signature_conversion(
-  %arg0: tensor<5xf32>,
-  %arg1: memref<10xf32>,
-  %arg2: i1, %arg3: f16) -> (
-    i1,
-    tensor<5xf32>,
-    memref<10xf32>,
-    memref<15xf32>,
-    f16) {
-  %0 = alloc() : memref<15xf32>
-  %1 = test.tensor_based in(%arg0 : tensor<5xf32>) -> tensor<5xf32>
-  return %arg2, %1, %arg1, %0, %arg3 :
-   i1, tensor<5xf32>, memref<10xf32>, memref<15xf32>, f16
-}
-//      CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>,
-// CHECK-SAME: %[[ARG2:.*]]: i1, %[[ARG3:.*]]: f16)
-// CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, memref<15xf32>, f16)
-//      CHECK: %[[FIRST_ALLOC:.*]] = alloc()
-//      CHECK: %[[TENSOR_ALLOC:.*]] = alloc()
-//      CHECK: return %[[ARG2]], %[[TENSOR_ALLOC]], %[[ARG1]], %[[FIRST_ALLOC]],
-// CHECK-SAME: %[[ARG3]]
-
-// -----
-
-// CHECK-LABEL: func @no_signature_conversion_is_needed
-func @no_signature_conversion_is_needed(%arg0: memref<4x8xf32>) {
-  return
-}
-// CHECK: ({{.*}}: memref<4x8xf32>)
-
-// -----
-
-// CHECK-LABEL: func @no_signature_conversion_is_needed
-func @no_signature_conversion_is_needed(%arg0: i1, %arg1: f16) -> (i1, f16){
-  return %arg0, %arg1 : i1, f16
-}
-// CHECK: (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: f16) -> (i1, f16)
-// CHECK: return %[[ARG0]], %[[ARG1]]
-
-// -----
-
-// CHECK-LABEL: func @simple_signature_conversion
-func @simple_signature_conversion(%arg0: tensor<4x8xf32>) -> tensor<4x8xf32> {
-  return %arg0 : tensor<4x8xf32>
-}
-//      CHECK: (%[[ARG0:.*]]: [[TYPE:.*]]<[[RANK:.*]]>) -> [[TYPE]]<[[RANK]]>
-// CHECK-NEXT: return %[[ARG0]]
-
-// -----
-
-// CHECK-LABEL: func @func_with_unranked_arg_and_result
-func @func_with_unranked_arg_and_result(%arg0: tensor<*xf32>) -> tensor<*xf32> {
-  return %arg0 : tensor<*xf32>
-}
-// CHECK-SAME: ([[ARG:%.*]]: memref<*xf32>) -> memref<*xf32>
-// CHECK-NEXT: return [[ARG]] : memref<*xf32>
-
-// -----
-
-// CHECK-LABEL: func @func_and_block_signature_conversion
-func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32>{
-    cond_br %cond, ^bb1, ^bb2
-  ^bb1:
-    br ^exit(%arg0 : tensor<2xf32>)
-  ^bb2:
-    br ^exit(%arg0 : tensor<2xf32>)
-  ^exit(%arg2: tensor<2xf32>):
-    return %arg1 : tensor<4x4xf32>
-}
-//      CHECK: (%[[ARG0:.*]]: [[ARG0_TYPE:.*]], %[[COND:.*]]: i1, %[[ARG1:.*]]: [[ARG1_TYPE:.*]]) -> [[RESULT_TYPE:.*]]
-//      CHECK: br ^[[EXIT_BLOCK:.*]](%[[ARG0]] : [[ARG0_TYPE]])
-//      CHECK: br ^[[EXIT_BLOCK]](%[[ARG0]] : [[ARG0_TYPE]])
-//      CHECK: ^[[EXIT_BLOCK]](%{{.*}}: [[ARG0_TYPE]])
-// CHECK-NEXT:  return %[[ARG1]]
-
-// -----
-
-// CHECK-LABEL: func @callee
-func @callee(%arg1: tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>) {
-  %buff = alloc() : memref<2xf32>
-  return %arg1, %buff : tensor<5xf32>, memref<2xf32>
-}
-// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>) -> (memref<5xf32>, memref<2xf32>)
-// CHECK: %[[ALLOC:.*]] = alloc()
-// CHECK: return %[[CALLEE_ARG]], %[[ALLOC]]
-
-// CHECK-LABEL: func @caller
-func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> {
-  %x:2 = call @callee(%arg0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
-  %y:2 = call @callee(%x#0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
-  return %y#0 : tensor<5xf32>
-}
-// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>) -> memref<5xf32>
-// CHECK: %[[X:.*]]:2 = call @callee(%[[CALLER_ARG]])
-// CHECK: %[[Y:.*]]:2 = call @callee(%[[X]]#0)
-// CHECK: return %[[Y]]#0
-
-// -----
-
-// Test case: Testing BufferizeCallOpConverter to see if it matches with the
-// signature of the new signature of the callee function when there are tuple
-// typed args and results. BufferizeTypeConverter is set to flatten tuple typed
-// arguments. The tuple typed values should be decomposed and composed using
-// get_tuple_element and make_tuple operations of test dialect. Tensor types are
-// converted to Memref. Memref typed function results remain as function
-// results.
-
-// CHECK-LABEL: func @callee
-func @callee(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>){
-  return %arg0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
-}
-// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>)
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>)
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]
-
-// CHECK-LABEL: func @caller
-func @caller(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> tuple<tensor<2xf32>,i1, tensor<5xf32>>{
-  %x0 = call @callee(%arg0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
-  %y0 = call @callee(%x0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
-  return %y0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
-}
-// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>)
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>)
-// CHECK-NEXT: %[[ARG_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]])
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>)
-// CHECK-NEXT: %[[RESULT_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2)
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]])
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>)
-// CHECK-NEXT: %[[RETURN_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2)
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]
-
-// -----
-
-// Test case: Testing BufferizeFuncOpConverter and
-// BufferizeReturnOpConverter to see if the return operation matches with the
-// new function signature when there are tuple typed args and results.
-// BufferizeTypeConverter is set to flatten tuple typed arguments. The tuple
-// typed values should be decomposed and composed using get_tuple_element and
-// make_tuple operations of test dialect. Tensor types are converted to Memref.
-// Memref typed function results remain as function results.
-
-// CHECK-LABEL: func @decompose_tuple_typed_function_args_and_results
-func @decompose_tuple_typed_function_args_and_results(%arg0: tuple<i1,f32>, %arg1: tensor<10xf32>, %arg2: tuple<i1, tensor<5xf32>>) -> (tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>){
-  return %arg2, %arg1, %arg0 : tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>
-}
-// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1, %[[ARG4:.*]]: memref<5xf32>
-// CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, i1, f32)
-// CHECK-NEXT: %[[FIRST_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]])
-// CHECK-NEXT: %[[SECOND_TUPLE:.*]] = "test.make_tuple"(%[[ARG3]], %[[ARG4]])
-// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]]  = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[SECOND_TUPLE_SECOND_ELEM]], %[[ARG2]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]]
diff --git a/mlir/test/Transforms/finalizing-bufferize.mlir b/mlir/test/Transforms/finalizing-bufferize.mlir
index 83436bc569112f..2dc16317869ee9 100644
--- a/mlir/test/Transforms/finalizing-bufferize.mlir
+++ b/mlir/test/Transforms/finalizing-bufferize.mlir
@@ -1,31 +1,36 @@
 // RUN: mlir-opt -test-finalizing-bufferize -split-input-file %s | FileCheck %s
 
-// CHECK-LABEL: func @func_signature_conversion
-func @func_signature_conversion(%arg0: tensor<4x8xf32>) {
+// CHECK-LABEL: func @void_function_signature_conversion
+func @void_function_signature_conversion(%arg0: tensor<4x8xf32>) {
     return
 }
-// CHECK: ({{.*}}: memref<4x8xf32>) {
+// CHECK: ({{.*}}: memref<4x8xf32>)
 
 // -----
 
-// Only tensor typed function result should be converted to memref and move to
-// the function arguments list. The other memref function results remain as
-// function results.
-
-// CHECK-LABEL: func @memref_in_function_results
-func @memref_in_function_results(%arg0: tensor<5xf32>, %arg1: memref<10xf32>)
-                            -> (tensor<5xf32>, memref<10xf32>, memref<15xf32>) {
+// CHECK-LABEL: func @complex_signature_conversion
+func @complex_signature_conversion(
+  %arg0: tensor<5xf32>,
+  %arg1: memref<10xf32>,
+  %arg2: i1,
+  %arg3: f16) -> (
+    i1,
+    tensor<5xf32>,
+    memref<10xf32>,
+    memref<15xf32>,
+    f16) {
   %0 = alloc() : memref<15xf32>
   %1 = test.tensor_based in(%arg0 : tensor<5xf32>) -> tensor<5xf32>
-  return %1, %arg1, %0 : tensor<5xf32>, memref<10xf32>, memref<15xf32>
+  return %arg2, %1, %arg1, %0, %arg3 :
+   i1, tensor<5xf32>, memref<10xf32>, memref<15xf32>, f16
 }
 //      CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>,
-// CHECK-SAME: %[[RESULT:.*]]: memref<5xf32>)
-// CHECK-SAME: (memref<10xf32>, memref<15xf32>)
+// CHECK-SAME: %[[ARG2:.*]]: i1, %[[ARG3:.*]]: f16)
+// CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, memref<15xf32>, f16)
 //      CHECK: %[[FIRST_ALLOC:.*]] = alloc()
 //      CHECK: %[[TENSOR_ALLOC:.*]] = alloc()
-//      CHECK: test.copy(%[[TENSOR_ALLOC]], %[[RESULT]])
-//      CHECK: return %[[ARG1]], %[[FIRST_ALLOC]]
+//      CHECK: return %[[ARG2]], %[[TENSOR_ALLOC]], %[[ARG1]], %[[FIRST_ALLOC]],
+// CHECK-SAME: %[[ARG3]]
 
 // -----
 
@@ -33,7 +38,7 @@ func @memref_in_function_results(%arg0: tensor<5xf32>, %arg1: memref<10xf32>)
 func @no_signature_conversion_is_needed(%arg0: memref<4x8xf32>) {
   return
 }
-// CHECK: ({{.*}}: memref<4x8xf32>) {
+// CHECK: ({{.*}}: memref<4x8xf32>)
 
 // -----
 
@@ -46,39 +51,26 @@ func @no_signature_conversion_is_needed(%arg0: i1, %arg1: f16) -> (i1, f16){
 
 // -----
 
-// CHECK-LABEL: func @complex_signature_conversion
-func @complex_signature_conversion(%arg0: tensor<4x8xf32>, %arg1: i1,
-                                   %arg2: tensor<5x5xf64>,%arg3: f16) ->
-                                   (i1, tensor<5x5xf64>, f16, tensor<4x8xf32>) {
-    return %arg1, %arg2, %arg3, %arg0 : i1, tensor<5x5xf64>, f16,
-           tensor<4x8xf32>
+// CHECK-LABEL: func @simple_signature_conversion
+func @simple_signature_conversion(%arg0: tensor<4x8xf32>) -> tensor<4x8xf32> {
+  return %arg0 : tensor<4x8xf32>
 }
-//      CHECK: (%[[ARG0:.*]]: memref<4x8xf32>, %[[ARG1:.*]]: i1
-// CHECK-SAME: %[[ARG2:.*]]: memref<5x5xf64>, %[[ARG3:.*]]: f16
-// CHECK-SAME: %[[RESULT1:.*]]: memref<5x5xf64>
-// CHECK-SAME: %[[RESULT2:.*]]: memref<4x8xf32>) -> (i1, f16) {
-// CHECK-NEXT: test.copy(%[[ARG2]], %[[RESULT1]])
-// CHECK-NEXT: test.copy(%[[ARG0]], %[[RESULT2]])
-// CHECK-NEXT: return %[[ARG1]], %[[ARG3]]
+//      CHECK: (%[[ARG0:.*]]: [[TYPE:.*]]<[[RANK:.*]]>) -> [[TYPE]]<[[RANK]]>
+// CHECK-NEXT: return %[[ARG0]]
 
 // -----
 
-// CHECK-LABEL: func @non_void_to_void_return_op_converter
-func @non_void_to_void_return_op_converter(%arg0: tensor<4x8xf32>)
-                                           -> tensor<4x8xf32> {
-  return %arg0 : tensor<4x8xf32>
+// CHECK-LABEL: func @func_with_unranked_arg_and_result
+func @func_with_unranked_arg_and_result(%arg0: tensor<*xf32>) -> tensor<*xf32> {
+  return %arg0 : tensor<*xf32>
 }
-//      CHECK: (%[[ARG0:.*]]: [[TYPE:.*]]<[[RANK:.*]]>,
-// CHECK-SAME: %[[RESULT:.*]]: [[TYPE]]<[[RANK]]>) {
-// CHECK-NEXT: test.copy(%[[ARG0]], %[[RESULT]])
-// CHECK-NEXT: return
+// CHECK-SAME: ([[ARG:%.*]]: memref<*xf32>) -> memref<*xf32>
+// CHECK-NEXT: return [[ARG]] : memref<*xf32>
 
 // -----
 
 // CHECK-LABEL: func @func_and_block_signature_conversion
-func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1,
-                                          %arg1: tensor<4x4xf32>)
-                                          -> tensor<4x4xf32>{
+func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32>{
     cond_br %cond, ^bb1, ^bb2
   ^bb1:
     br ^exit(%arg0 : tensor<2xf32>)
@@ -87,293 +79,102 @@ func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1,
   ^exit(%arg2: tensor<2xf32>):
     return %arg1 : tensor<4x4xf32>
 }
-//      CHECK: (%[[ARG0:.*]]: [[ARG0_TYPE:.*]], %[[COND:.*]]: i1,
-// CHECK-SAME: %[[ARG1:.*]]: [[ARG1_TYPE:.*]],
-// CHECK-SAME: %[[RESULT:.*]]: [[RESULT_TYPE:.*]]) {
+//      CHECK: (%[[ARG0:.*]]: [[ARG0_TYPE:.*]], %[[COND:.*]]: i1, %[[ARG1:.*]]: [[ARG1_TYPE:.*]]) -> [[RESULT_TYPE:.*]] {
 //      CHECK: br ^[[EXIT_BLOCK:.*]](%[[ARG0]] : [[ARG0_TYPE]])
 //      CHECK: br ^[[EXIT_BLOCK]](%[[ARG0]] : [[ARG0_TYPE]])
 //      CHECK: ^[[EXIT_BLOCK]](%{{.*}}: [[ARG0_TYPE]])
-// CHECK-NEXT: test.copy(%[[ARG1]], %[[RESULT]])
-// CHECK-NEXT: return
-
-// -----
-
-// Test Case: Simple case for checking if BufferizePlacer creates AllocOps
-//            right before TensorBasedOp.
-
-// CHECK-LABEL: func @compute_allocs_position_simple
-func @compute_allocs_position_simple(%cond: i1, %arg0: tensor<2xf32>)
-                                     -> tensor<2xf32>{
-    %0 = test.tensor_based in(%arg0 : tensor<2xf32>) -> tensor<2xf32>
-    %1 = test.tensor_based in(%0 : tensor<2xf32>) -> tensor<2xf32>
-    return %1 : tensor<2xf32>
-}
-//      CHECK: (%{{.*}}: {{.*}}, %[[ARG0:.*]]: memref<2xf32>,
-// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[FIRST_ALLOC]]
-//      CHECK: %[[SECOND_ALLOC:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[FIRST_ALLOC]]{{.*}} out(%[[SECOND_ALLOC]]
+// CHECK-NEXT:  return %[[ARG1]] : [[RESULT_TYPE]]
 
 // -----
 
-// Test Case: if-else case for checking if BufferizePlacer creates AllocOps
-//            right before TensorBasedOp.
-
-// CHECK-LABEL: func @compute_allocs_position
-func @compute_allocs_position(%cond: i1, %arg0: tensor<2xf32>) -> tensor<2xf32>{
-    %0 = test.tensor_based in(%arg0 : tensor<2xf32>) -> tensor<2xf32>
-    %1 = test.tensor_based in(%0 : tensor<2xf32>) -> tensor<2xf32>
-    cond_br %cond, ^bb1(%arg0, %0: tensor<2xf32>, tensor<2xf32>),
-                   ^bb2(%0, %arg0: tensor<2xf32>, tensor<2xf32>)
-  ^bb1(%arg1 : tensor<2xf32>, %arg2 : tensor<2xf32>):
-    %2 = test.tensor_based in(%arg0 : tensor<2xf32>) -> tensor<2xf32>
-    %3 = test.tensor_based in(%2 : tensor<2xf32>) -> tensor<2xf32>
-    br ^exit(%arg1, %arg2 : tensor<2xf32>, tensor<2xf32>)
-  ^bb2(%arg3 : tensor<2xf32>, %arg4 : tensor<2xf32>):
-    %4 = test.tensor_based in(%arg0 : tensor<2xf32>) -> tensor<2xf32>
-    %5 = test.tensor_based in(%4 : tensor<2xf32>) -> tensor<2xf32>
-    br ^exit(%arg3, %arg4 : tensor<2xf32>, tensor<2xf32>)
-  ^exit(%arg5 : tensor<2xf32>, %arg6 : tensor<2xf32>):
-    %6 = test.tensor_based in(%arg0 : tensor<2xf32>)  -> tensor<2xf32>
-    %7 = test.tensor_based in(%6 : tensor<2xf32>) -> tensor<2xf32>
-    return %7 : tensor<2xf32>
-}
-//      CHECK: (%{{.*}}: {{.*}}, %[[ARG0:.*]]: memref<2xf32>,
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOC0]]
-//      CHECK: %[[ALLOC1:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ALLOC0]]{{.*}} out(%[[ALLOC1]]
-//      CHECK: cond_br %{{.*}}, ^[[BB0:.*]]({{.*}}), ^[[BB1:.*]](
-// CHECK-NEXT: ^[[BB0]]
-// CHECK-NEXT: %[[ALLOC2:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOC2]]
-//      CHECK: %[[ALLOC3:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ALLOC2]]{{.*}} out(%[[ALLOC3]]
-//      CHECK: br ^[[EXIT:.*]]({{.*}})
-// CHECK-NEXT: ^[[BB1]]
-// CHECK-NEXT: %[[ALLOC4:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOC4]]
-//      CHECK: %[[ALLOC5:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ALLOC4]]{{.*}} out(%[[ALLOC5]]
-//      CHECK: br ^[[EXIT]]
-// CHECK-NEXT: ^[[EXIT]]
-// CHECK-NEXT: %[[ALLOC6:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOC6]]
-//      CHECK: %[[ALLOC7:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ALLOC6]]{{.*}} out(%[[ALLOC7]]
-
-// -----
-
-// Test case: Checking BufferizeCallOpConverter and
-// BufferizeFuncOpConverter and BufferizeReturnOpConverter all
-// together. The signature of `callee` after signature conversion would be:
-
-// func @callee(%arg0: memref<5xf32>,%arg1: memref<5xf32>) -> ()
-
-// The operands and results of caller and return operations must be matched
-// respectively.
-
-// CHECK-LABEL: func @callee
-func @callee(%arg1: tensor<5xf32>) -> tensor<5xf32> {
-  %0 = test.tensor_based in(%arg1 : tensor<5xf32>) -> tensor<5xf32>
-  return %0 : tensor<5xf32>
-}
-// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>,
-// CHECK-SAME: %[[CALLEE_RESULT:.*]]: memref<5xf32>)
-// CHECK: %[[ALLOC:.*]] = alloc()
-// CHECK: test.buffer_based
-// CHECK: test.copy(%[[ALLOC]], %[[CALLEE_RESULT]])
-// CHECK: return
-
-// CHECK-LABEL: func @caller
-func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> {
-  %x = call @callee(%arg0) : (tensor<5xf32>) -> tensor<5xf32>
-  %y = call @callee(%x) : (tensor<5xf32>) -> tensor<5xf32>
-  return %y : tensor<5xf32>
-}
-// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>,
-// CHECK-SAME: %[[CALLER_RESULT:.*]]: memref<5xf32>)
-// CHECK: %[[FIRST_ALLOC:.*]] = alloc()
-// CHECK: call @callee(%[[CALLER_ARG]], %[[FIRST_ALLOC]])
-// CHECK: %[[SECOND_ALLOC:.*]] = alloc()
-// CHECK: call @callee(%[[FIRST_ALLOC]], %[[SECOND_ALLOC]])
-// CHECK: test.copy(%[[SECOND_ALLOC]], %[[CALLER_RESULT]])
-// CHECK: return
-
-// -----
-
-// Test case: Checking BufferizeCallOpConverter and
-// BufferizeFuncOpConverter and BufferizeReturnOpConverter all
-// together on functions that also have memref typed results. The signature of
-// `callee` after signature conversion would be:
-
-// func @callee(%arg0: memref<5xf32>,%arg1: memref<5xf32>)-> memref<2xf32>
-
-// where %arg0 is the input and %arg1 is the output buffer and the original
-// memref type result remain as the function result. Then, the rewriter should
-// match the caller's signature with the callee. Thus, two buffers will be
-// allocated instead of %x0 and %y0 and they are passed to the callers' operands
-// list as the output buffers. %x1 and %y1 remain as callers' results.
-
 // CHECK-LABEL: func @callee
 func @callee(%arg1: tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>) {
   %buff = alloc() : memref<2xf32>
   return %arg1, %buff : tensor<5xf32>, memref<2xf32>
 }
-//      CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>,
-// CHECK-SAME: %[[CALLEE_RESULT:.*]]: memref<5xf32>)
-// CHECK-SAME: memref<2xf32>
-//      CHECK: %[[ALLOC:.*]] = alloc()
-//      CHECK: test.copy(%[[CALLEE_ARG]], %[[CALLEE_RESULT]])
-//      CHECK: return %[[ALLOC]]
+// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>) -> (memref<5xf32>, memref<2xf32>)
+// CHECK: %[[ALLOC:.*]] = alloc()
+// CHECK: return %[[CALLEE_ARG]], %[[ALLOC]]
 
 // CHECK-LABEL: func @caller
 func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> {
-  %x0, %x1 = call @callee(%arg0) : (tensor<5xf32>)
-                                   -> (tensor<5xf32>, memref<2xf32>)
-  %y0, %y1 = call @callee(%x0) : (tensor<5xf32>)
-                                 -> (tensor<5xf32>, memref<2xf32>)
-  return %y0 : tensor<5xf32>
-}
-// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>,
-// CHECK-SAME: %[[CALLER_RESULT:.*]]: memref<5xf32>)
-// CHECK: %[[X0:.*]] = alloc()
-// CHECK: %[[X1:.*]] = call @callee(%[[CALLER_ARG]], %[[X0]])
-// CHECK: %[[Y0:.*]] = alloc()
-// CHECK: %[[Y1:.*]] = call @callee(%[[X0]], %[[Y0]])
-// CHECK: test.copy(%[[Y0]], %[[CALLER_RESULT]])
-// CHECK: return
-
-// -----
-
-// CHECK-LABEL: func @func_with_unranked_arg
-func @func_with_unranked_arg(%arg0: tensor<*xf32>) {
-  return
+  %x:2 = call @callee(%arg0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
+  %y:2 = call @callee(%x#0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
+  return %y#0 : tensor<5xf32>
 }
-// CHECK-SAME: ([[ARG:%.*]]: memref<*xf32>)
+// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>) -> memref<5xf32>
+// CHECK: %[[X:.*]]:2 = call @callee(%[[CALLER_ARG]])
+// CHECK: %[[Y:.*]]:2 = call @callee(%[[X]]#0)
+// CHECK: return %[[Y]]#0
 
 // -----
 
 // Test case: Testing BufferizeCallOpConverter to see if it matches with the
 // signature of the new signature of the callee function when there are tuple
-// typed args and results. BufferizeTypeConverter is set to flatten tuple
-// typed arguments. The tuple typed values should be decomposed and composed
-// using get_tuple_element and make_tuple operations of test dialect. Tensor
-// types are converted to Memref. Memref typed function results are appended to
-// the function arguments list.
+// typed args and results. BufferizeTypeConverter is set to flatten tuple typed
+// arguments. The tuple typed values should be decomposed and composed using
+// get_tuple_element and make_tuple operations of test dialect. Tensor types are
+// converted to Memref. Memref typed function results remain as function
+// results.
 
 // CHECK-LABEL: func @callee
-func @callee(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>)
-             -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>){
+func @callee(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>){
   return %arg0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
 }
-// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1,
-// CHECK-SAME: %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>,
-// CHECK-SAME: %[[RESULT1:.*]]: memref<5xf32>) -> i1
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]],
-// CHECK-SAME: %[[ARG2]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 2 : i32}
-// CHECK-NEXT: test.copy(%[[FIRST_ELEM]], %[[RESULT0]])
-// CHECK-NEXT: test.copy(%[[THIRD_ELEM]], %[[RESULT1]])
-// CHECK-NEXT: return %[[SECOND_ELEM]]
+// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>)
+// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>)
+// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]
 
 // CHECK-LABEL: func @caller
-func @caller(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>)
-             -> tuple<tensor<2xf32>,i1, tensor<5xf32>>{
-  %x0 = call @callee(%arg0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
-                              -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
-  %y0 = call @callee(%x0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
-                            -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
+func @caller(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> tuple<tensor<2xf32>,i1, tensor<5xf32>>{
+  %x0 = call @callee(%arg0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
+  %y0 = call @callee(%x0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
   return %y0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
 }
-// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1,
-// CHECK-SAME: %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>,
-// CHECK-SAME: %[[RESULT1:.*]]: memref<5xf32>) -> i1
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]],
-// CHECK-SAME: %[[ARG2]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 2 : i32}
-// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
-// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc()
-// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]],
-// CHECK-SAME: %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]],
-// CHECK-SAME: %[[SECOND_ALLOC]])
-// CHECK-SAME: (memref<2xf32>, i1,
-// CHECK-SAME: memref<5xf32>, memref<2xf32>, memref<5xf32>) -> i1
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]],
-// CHECK-SAME: %[[CALLEE_RESULT]], %[[SECOND_ALLOC]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 2 : i32}
-// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
-// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc()
-// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]],
-// CHECK-SAME: %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]],
-// CHECK-SAME: %[[SECOND_ALLOC]])
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>, memref<2xf32>, memref<5xf32>)
-// CHECK-SAME: i1
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]],
-// CHECK-SAME: %[[CALLEE_RESULT]], %[[SECOND_ALLOC]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 2 : i32}
-// CHECK-NEXT: test.copy(%[[FIRST_ELEM]], %[[RESULT0]])
-// CHECK-NEXT: test.copy(%[[THIRD_ELEM]], %[[RESULT1]])
-// CHECK-NEXT: return %[[SECOND_ELEM]]
+// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>)
+// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>)
+// CHECK-NEXT: %[[ARG_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]])
+// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>)
+// CHECK-NEXT: %[[RESULT_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2)
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]])
+// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>)
+// CHECK-NEXT: %[[RETURN_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2)
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]
 
 // -----
 
-// Test case: Testing BufferizeFuncOpConverter and BufferizeReturnOpConverter
-// to see if the return operation matches with the new function signature when
-// there are tuple typed args and results. BufferizeTypeConverter is set to
-// flatten tuple typed arguments. The tuple typed values should be decomposed
-// and composed using get_tuple_element and make_tuple operations of test
-// dialect. Tensor types are converted to Memref. Memref typed function results
-// are appended to the function arguments list.
+// Test case: Testing BufferizeFuncOpConverter and
+// BufferizeReturnOpConverter to see if the return operation matches with the
+// new function signature when there are tuple typed args and results.
+// BufferizeTypeConverter is set to flatten tuple typed arguments. The tuple
+// typed values should be decomposed and composed using get_tuple_element and
+// make_tuple operations of test dialect. Tensor types are converted to Memref.
+// Memref typed function results remain as function results.
 
 // CHECK-LABEL: func @decompose_tuple_typed_function_args_and_results
-func @decompose_tuple_typed_function_args_and_results(%arg0: tuple<i1,f32>,
-                                                      %arg1: tensor<10xf32>,
-                                                      %arg2: tuple<i1,
-                                                             tensor<5xf32>>)
-                                                      -> (tuple<i1,
-                                                                tensor<5xf32>>,
-                                                      tensor<10xf32>,
-                                                            tuple<i1,f32>){
-  return %arg2, %arg1, %arg0 : tuple<i1, tensor<5xf32>>, tensor<10xf32>,
-                                    tuple<i1,f32>
+func @decompose_tuple_typed_function_args_and_results(%arg0: tuple<i1,f32>, %arg1: tensor<10xf32>, %arg2: tuple<i1, tensor<5xf32>>) -> (tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>){
+  return %arg2, %arg1, %arg0 : tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>
 }
-// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32,
-// CHECK-SAME: %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1,
-// CHECK-SAME: %[[ARG4:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<5xf32>,
-// CHECK-SAME: %[[RESULT1:.*]]: memref<10xf32>
-// CHECK-SAME: (i1, i1, f32)
+// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1, %[[ARG4:.*]]: memref<5xf32>
+// CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, i1, f32)
 // CHECK-NEXT: %[[FIRST_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]])
 // CHECK-NEXT: %[[SECOND_TUPLE:.*]] = "test.make_tuple"(%[[ARG3]], %[[ARG4]])
-// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]]  = "test.get_tuple_element"
-// CHECK-SAME: (%[[SECOND_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"
-// CHECK-SAME: (%[[SECOND_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"
-// CHECK-SAME: (%[[FIRST_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"
-// CHECK-SAME: (%[[FIRST_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: test.copy(%[[SECOND_TUPLE_SECOND_ELEM]], %[[RESULT0]])
-// CHECK-NEXT: test.copy(%[[ARG2]], %[[RESULT1]])
-// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_FIRST_ELEM]],
-// CHECK-SAME: %[[FIRST_TUPLE_SECOND_ELEM]]
+// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]]  = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[SECOND_TUPLE_SECOND_ELEM]], %[[ARG2]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]]
diff --git a/mlir/test/lib/Transforms/TestFinalizingBufferize.cpp b/mlir/test/lib/Transforms/TestFinalizingBufferize.cpp
index f74f8aef153ebb..b9001f3d52dd90 100644
--- a/mlir/test/lib/Transforms/TestFinalizingBufferize.cpp
+++ b/mlir/test/lib/Transforms/TestFinalizingBufferize.cpp
@@ -35,17 +35,9 @@ namespace {
 /// otherwise the IR will end up invalid. Thus, finalizing bufferization passes
 /// require an atomic change to the entire program (e.g. the whole module).
 ///
-/// `allowMemrefFunctionResults` informs the buffer finalization policy to allow
-/// functions that have memref typed results. Patterns involved with converting
-/// func/call/return respect the finalization policy to ensure a consistent
-/// atomic conversion of the entire module. `allowMemrefFunctionResults` also
-/// allows memref typed results to escape from the deallocation.
-///
 /// TODO: Split out BufferizeFinalizationPolicy from BufferizeTypeConverter.
-template <bool allowMemrefFunctionResults>
 struct TestFinalizingBufferizePass
-    : mlir::PassWrapper<TestFinalizingBufferizePass<allowMemrefFunctionResults>,
-                        OperationPass<ModuleOp>> {
+    : mlir::PassWrapper<TestFinalizingBufferizePass, OperationPass<ModuleOp>> {
 
   /// Converts tensor based test operations to buffer based ones using
   /// bufferize.
@@ -123,13 +115,6 @@ struct TestFinalizingBufferizePass
              converter.isLegal(&funcOp.getBody());
     });
 
-    auto kind = allowMemrefFunctionResults
-                    ? BufferizeTypeConverter::KeepAsFunctionResult
-                    : BufferizeTypeConverter::AppendToArgumentsList;
-    converter.setResultConversionKind<RankedTensorType, MemRefType>(kind);
-    converter.setResultConversionKind<UnrankedTensorType, UnrankedMemRefType>(
-        kind);
-
     converter.addDecomposeTypeConversion(
         [](TupleType tupleType, SmallVectorImpl<Type> &types) {
           tupleType.getFlattenedTypes(types);
@@ -175,17 +160,8 @@ struct TestFinalizingBufferizePass
 namespace mlir {
 namespace test {
 void registerTestFinalizingBufferizePass() {
-  PassRegistration<
-      TestFinalizingBufferizePass</*allowMemrefFunctionResults=*/false>>(
+  PassRegistration<TestFinalizingBufferizePass>(
       "test-finalizing-bufferize", "Tests finalizing bufferize conversions");
 }
-
-void registerTestPreparationPassWithAllowedMemrefResults() {
-  PassRegistration<
-      TestFinalizingBufferizePass</*allowMemrefFunctionResults=*/true>>(
-      "test-finalizing-bufferize-with-allowed-memref-results",
-      "Tests finalizing buffierize conversions, allowing functions to have "
-      "memref typed results.");
-}
 } // namespace test
 } // namespace mlir
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index d18cd116fcb31a..7517fb6ec267fc 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -146,7 +146,6 @@ void registerTestPasses() {
   test::registerTestMemRefDependenceCheck();
   test::registerTestMemRefStrideCalculation();
   test::registerTestOpaqueLoc();
-  test::registerTestPreparationPassWithAllowedMemrefResults();
   test::registerTestRecursiveTypesPass();
   test::registerTestSCFUtilsPass();
   test::registerTestVectorConversions();

From 5b30d9adc0536eee7fe0f164a550084916899acc Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov@arm.com>
Date: Thu, 5 Nov 2020 19:11:43 +0000
Subject: [PATCH 08/31]  [MachineOutliner] Do not outline debug instructions

The debug location is removed from any outlined instruction. This
causes the MachineVerifier to crash on outlined DBG_VALUE
instructions.

Then, debug instructions are "invisible" to the outliner, that is, two
ranges of instructions from different functions are considered
identical if the only difference is debug instructions. Since a debug
instruction from one function is unlikely to provide sensible debug
information about all functions, sharing an outlined sequence, this
patch just removes debug instructions from the outlined functions.

Differential Revision: https://reviews.llvm.org/D89485
---
 llvm/lib/CodeGen/MachineOutliner.cpp          |  2 +
 .../machine-outliner-remove-debug-instr.mir   | 53 +++++++++++++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 llvm/test/CodeGen/ARM/machine-outliner-remove-debug-instr.mir

diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 715a2ba4667d23..a94a6e29dab988 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -654,6 +654,8 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
       OriginalMF->getFrameInstructions();
   for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E;
        ++I) {
+    if (I->isDebugInstr())
+      continue;
     MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
     if (I->isCFIInstruction()) {
       unsigned CFIIndex = NewMI->getOperand(0).getCFIIndex();
diff --git a/llvm/test/CodeGen/ARM/machine-outliner-remove-debug-instr.mir b/llvm/test/CodeGen/ARM/machine-outliner-remove-debug-instr.mir
new file mode 100644
index 00000000000000..53aadbc9d3dedf
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/machine-outliner-remove-debug-instr.mir
@@ -0,0 +1,53 @@
+# RUN: llc -verify-machineinstrs -run-pass=machine-outliner -mtriple=thumbv7m-none-eabi %s -o - | FileCheck %s
+
+# Check the outlined function does not contain debug instructions
+# CHECK-LABEL: name: f
+# CHECK:       tBL {{.*}}  @OUTLINED_FUNCTION_0,
+
+# CHECK-LABEL: name: g
+# CHECK:       tBL {{.*}}  @OUTLINED_FUNCTION_0,
+
+# CHECK-LABEL: name: OUTLINED_FUNCTION_0
+# CHECK-NOT:   DBG_VALUE
+# CHECK:       tTAILJMPdND @h
+--- |
+  define void @f() { entry: ret void }
+
+  define void @g() { entry: ret void }
+
+  declare void @h()
+...
+---
+name:            f
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $r0, $r1, $r2, $r3, $r4, $lr
+
+    frame-setup tPUSH 14, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
+
+    $r4 = tMOVr $r1, 14, $noreg
+    DBG_VALUE $r4, $noreg
+    renamable $r0, dead $cpsr = nsw tMUL renamable $r1, killed renamable $r0, 14, $noreg
+    renamable $r0, dead $cpsr = nsw tSUBrr killed renamable $r0, renamable $r1, 14, $noreg
+    tBL 14, $noreg, @h, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit-def $sp, implicit-def $r0
+
+    frame-destroy tPOP_RET 14, $noreg, def $r4, def $pc, implicit killed $r0
+...
+---
+name:            g
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $r0, $r1, $r2, $r3, $r4, $lr
+
+    frame-setup tPUSH 14, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
+
+    $r4 = tMOVr $r1, 14, $noreg
+    DBG_VALUE $r4, $noreg
+    renamable $r0, dead $cpsr = nsw tMUL renamable $r1, killed renamable $r0, 14, $noreg
+    renamable $r0, dead $cpsr = nsw tSUBrr killed renamable $r0, renamable $r1, 14, $noreg
+    tBL 14, $noreg, @h, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit-def $sp, implicit-def $r0
+
+    frame-destroy tPOP_RET 14, $noreg, def $r4, def $pc, implicit killed $r0
+...

From b79e990f4019c297cbfb62a03b9629d56a37086c Mon Sep 17 00:00:00 2001
From: rojamd <rojamd@users.noreply.github.com>
Date: Thu, 5 Nov 2020 14:41:35 -0500
Subject: [PATCH 09/31] [lld][COFF] Add command line options for LTO with new
 pass manager

This is more or less a port of rL329598 (D45275) to the COFF linker.
Since there were already LTO-related settings under -opt:, I added
them there instead of new flags.

Differential Revision: https://reviews.llvm.org/D90624
---
 lld/COFF/Config.h                     |  5 +++++
 lld/COFF/Driver.cpp                   | 12 ++++++++++++
 lld/COFF/LTO.cpp                      |  2 ++
 lld/test/COFF/lto-new-pass-manager.ll | 20 ++++++++++++++++++++
 4 files changed, 39 insertions(+)
 create mode 100644 lld/test/COFF/lto-new-pass-manager.ll

diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h
index 77a08c200da29c..65ddc326ba7838 100644
--- a/lld/COFF/Config.h
+++ b/lld/COFF/Config.h
@@ -157,6 +157,11 @@ struct Configuration {
   // Used for /opt:lldltocachepolicy=policy
   llvm::CachePruningPolicy ltoCachePolicy;
 
+  // Used for /opt:[no]ltonewpassmanager
+  bool ltoNewPassManager = false;
+  // Used for /opt:[no]ltodebugpassmanager
+  bool ltoDebugPassManager = false;
+
   // Used for /merge:from=to (e.g. /merge:.rdata=.text)
   std::map<StringRef, StringRef> merge;
 
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 3c9f675be65ed2..d52abd10037562 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -1513,6 +1513,8 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
   unsigned icfLevel =
       args.hasArg(OPT_profile) ? 0 : 1; // 0: off, 1: limited, 2: on
   unsigned tailMerge = 1;
+  bool ltoNewPM = false;
+  bool ltoDebugPM = false;
   for (auto *arg : args.filtered(OPT_opt)) {
     std::string str = StringRef(arg->getValue()).lower();
     SmallVector<StringRef, 1> vec;
@@ -1530,6 +1532,14 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
         tailMerge = 2;
       } else if (s == "nolldtailmerge") {
         tailMerge = 0;
+      } else if (s == "ltonewpassmanager") {
+        ltoNewPM = true;
+      } else if (s == "noltonewpassmanager") {
+        ltoNewPM = false;
+      } else if (s == "ltodebugpassmanager") {
+        ltoDebugPM = true;
+      } else if (s == "noltodebugpassmanager") {
+        ltoDebugPM = false;
       } else if (s.startswith("lldlto=")) {
         StringRef optLevel = s.substr(7);
         if (optLevel.getAsInteger(10, config->ltoo) || config->ltoo > 3)
@@ -1559,6 +1569,8 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
   config->doGC = doGC;
   config->doICF = icfLevel > 0;
   config->tailMerge = (tailMerge == 1 && config->doICF) || tailMerge == 2;
+  config->ltoNewPassManager = ltoNewPM;
+  config->ltoDebugPassManager = ltoDebugPM;
 
   // Handle /lldsavetemps
   if (args.hasArg(OPT_lldsavetemps))
diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp
index 1fa685fb4620ed..2fa3536db873d5 100644
--- a/lld/COFF/LTO.cpp
+++ b/lld/COFF/LTO.cpp
@@ -82,6 +82,8 @@ static lto::Config createConfig() {
   c.MAttrs = getMAttrs();
   c.CGOptLevel = args::getCGOptLevel(config->ltoo);
   c.AlwaysEmitRegularLTOObj = !config->ltoObjPath.empty();
+  c.UseNewPM = config->ltoNewPassManager;
+  c.DebugPassManager = config->ltoDebugPassManager;
 
   if (config->saveTemps)
     checkError(c.addSaveTemps(std::string(config->outputFile) + ".",
diff --git a/lld/test/COFF/lto-new-pass-manager.ll b/lld/test/COFF/lto-new-pass-manager.ll
new file mode 100644
index 00000000000000..b81e00f933b915
--- /dev/null
+++ b/lld/test/COFF/lto-new-pass-manager.ll
@@ -0,0 +1,20 @@
+; REQUIRES: x86
+; RUN: llvm-as %s -o %s.obj
+
+; RUN: lld-link %s.obj -entry:main -opt:ltonewpassmanager -opt:ltodebugpassmanager 2>&1 | FileCheck %s --check-prefix=ENABLED
+; ENABLED: Starting llvm::Module pass manager run.
+; ENABLED: Finished llvm::Module pass manager run.
+
+; Passing -time just to avoid empty FileCheck input
+; RUN: lld-link %s.obj -entry:main -time -opt:ltonewpassmanager -opt:ltodebugpassmanager -opt:noltonewpassmanager 2>&1 | FileCheck %s --check-prefix=DISABLED
+; RUN: lld-link %s.obj -entry:main -time -opt:ltonewpassmanager -opt:ltodebugpassmanager -opt:noltodebugpassmanager 2>&1 | FileCheck %s --check-prefix=DISABLED
+; DISABLED-NOT: Starting llvm::Module pass manager run.
+; DISABLED-NOT: Finished llvm::Module pass manager run.
+
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.11.0"
+
+define dso_local i32 @main(i32 %argc, i8** nocapture readnone %0) local_unnamed_addr {
+entry:
+  ret i32 %argc
+}

From b643deb03fb935d414f74e07b702ebb4e5c33bf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@moritz.systems>
Date: Thu, 5 Nov 2020 18:56:10 +0100
Subject: [PATCH 10/31] [lldb] [test] Fix TestGdbRemoteThreadName code on
 FreeBSD

Fix TestGdbRemoteThreadName to call ::pthread_setname_np instead
of ::pthread_set_name_np on FreeBSD.  While technically both names
are correct, the former is preferable because of compatibility
with Linux.  Furthermore, the latter requires `#include <pthread_np.h>`
that was missing causing the test to fail to compile.

Differential Revision: https://reviews.llvm.org/D90862
---
 lldb/test/API/tools/lldb-server/thread-name/main.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lldb/test/API/tools/lldb-server/thread-name/main.cpp b/lldb/test/API/tools/lldb-server/thread-name/main.cpp
index 898e9a35e9ace6..02eea12ca98a03 100644
--- a/lldb/test/API/tools/lldb-server/thread-name/main.cpp
+++ b/lldb/test/API/tools/lldb-server/thread-name/main.cpp
@@ -4,9 +4,7 @@
 void set_thread_name(const char *name) {
 #if defined(__APPLE__)
   ::pthread_setname_np(name);
-#elif defined(__FreeBSD__)
-  ::pthread_set_name_np(::pthread_self(), name);
-#elif defined(__linux__)
+#elif defined(__FreeBSD__) || defined(__linux__)
   ::pthread_setname_np(::pthread_self(), name);
 #elif defined(__NetBSD__)
   ::pthread_setname_np(::pthread_self(), "%s", const_cast<char *>(name));

From 40140e122f8b6512ebe22efc32dacf14f10117f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@moritz.systems>
Date: Thu, 5 Nov 2020 18:59:28 +0100
Subject: [PATCH 11/31] [lldb] [Process/FreeBSDRemote] Remove thread name
 caching

Remove the thread name caching code.  It does not handle the possibility
of thread name changing between requests, therefore breaking
TestGdbRemoteThreadName.  While technically we could cache the results
and reset the cache on resuming process, the gain from doing that
does not seem worth the effort.

Differential Revision: https://reviews.llvm.org/D90863
---
 .../FreeBSDRemote/NativeThreadFreeBSD.cpp     | 54 +++++++++----------
 .../FreeBSDRemote/NativeThreadFreeBSD.h       |  1 -
 .../thread-name/TestGdbRemoteThreadName.py    |  1 -
 3 files changed, 24 insertions(+), 32 deletions(-)

diff --git a/lldb/source/Plugins/Process/FreeBSDRemote/NativeThreadFreeBSD.cpp b/lldb/source/Plugins/Process/FreeBSDRemote/NativeThreadFreeBSD.cpp
index 2e62b0a25ed224..3c80f113b19730 100644
--- a/lldb/source/Plugins/Process/FreeBSDRemote/NativeThreadFreeBSD.cpp
+++ b/lldb/source/Plugins/Process/FreeBSDRemote/NativeThreadFreeBSD.cpp
@@ -149,41 +149,35 @@ void NativeThreadFreeBSD::SetStepping() {
 }
 
 std::string NativeThreadFreeBSD::GetName() {
-  if (!m_thread_name) {
-    Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
-
-    std::vector<struct kinfo_proc> kp;
-    int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD,
-                  static_cast<int>(GetProcess().GetID())};
-
-    while (1) {
-      size_t len = kp.size() * sizeof(struct kinfo_proc);
-      void *ptr = len == 0 ? nullptr : kp.data();
-      int error = ::sysctl(mib, 4, ptr, &len, nullptr, 0);
-      if (ptr == nullptr || (error != 0 && errno == ENOMEM)) {
-        kp.resize(len / sizeof(struct kinfo_proc));
-        continue;
-      }
-      if (error != 0) {
-        len = 0;
-        LLDB_LOG(log, "tid = {0} in state {1} failed to get thread name: {2}", GetID(),
-                 m_state, strerror(errno));
-      }
+  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
+
+  std::vector<struct kinfo_proc> kp;
+  int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD,
+                static_cast<int>(GetProcess().GetID())};
+
+  while (1) {
+    size_t len = kp.size() * sizeof(struct kinfo_proc);
+    void *ptr = len == 0 ? nullptr : kp.data();
+    int error = ::sysctl(mib, 4, ptr, &len, nullptr, 0);
+    if (ptr == nullptr || (error != 0 && errno == ENOMEM)) {
       kp.resize(len / sizeof(struct kinfo_proc));
-      break;
+      continue;
     }
-
-    // empty == unknown
-    m_thread_name = std::string();
-    for (auto& procinfo : kp) {
-      if (procinfo.ki_tid == (lwpid_t)GetID()) {
-        m_thread_name = procinfo.ki_tdname;
-        break;
-      }
+    if (error != 0) {
+      len = 0;
+      LLDB_LOG(log, "tid = {0} in state {1} failed to get thread name: {2}", GetID(),
+               m_state, strerror(errno));
     }
+    kp.resize(len / sizeof(struct kinfo_proc));
+    break;
+  }
+
+  for (auto& procinfo : kp) {
+    if (procinfo.ki_tid == static_cast<lwpid_t>(GetID()))
+      return procinfo.ki_tdname;
   }
 
-  return m_thread_name.getValue();
+  return "";
 }
 
 lldb::StateType NativeThreadFreeBSD::GetState() { return m_state; }
diff --git a/lldb/source/Plugins/Process/FreeBSDRemote/NativeThreadFreeBSD.h b/lldb/source/Plugins/Process/FreeBSDRemote/NativeThreadFreeBSD.h
index 665e4ea4897107..e4d4941747364f 100644
--- a/lldb/source/Plugins/Process/FreeBSDRemote/NativeThreadFreeBSD.h
+++ b/lldb/source/Plugins/Process/FreeBSDRemote/NativeThreadFreeBSD.h
@@ -74,7 +74,6 @@ class NativeThreadFreeBSD : public NativeThreadProtocol {
   using WatchpointIndexMap = std::map<lldb::addr_t, uint32_t>;
   WatchpointIndexMap m_watchpoint_index_map;
   WatchpointIndexMap m_hw_break_index_map;
-  llvm::Optional<std::string> m_thread_name;
 };
 
 typedef std::shared_ptr<NativeThreadFreeBSD> NativeThreadFreeBSDSP;
diff --git a/lldb/test/API/tools/lldb-server/thread-name/TestGdbRemoteThreadName.py b/lldb/test/API/tools/lldb-server/thread-name/TestGdbRemoteThreadName.py
index c4f08da7099c89..9ec40c11742883 100644
--- a/lldb/test/API/tools/lldb-server/thread-name/TestGdbRemoteThreadName.py
+++ b/lldb/test/API/tools/lldb-server/thread-name/TestGdbRemoteThreadName.py
@@ -29,7 +29,6 @@ def run_and_check_name(self, expected_name):
         self.assertEqual(expected_name, kv_dict.get("name"))
 
     @skipIfWindows # the test is not updated for Windows.
-    @expectedFailureAll(oslist=["freebsd"])
     @llgs_test
     def test(self):
         """ Make sure lldb-server can retrieve inferior thread name"""

From 3b9b90a1914f1e470ba7d333b26bd34787337806 Mon Sep 17 00:00:00 2001
From: Nathan James <n.james93@hotmail.co.uk>
Date: Thu, 5 Nov 2020 19:51:04 +0000
Subject: [PATCH 12/31] [clang-tidy] Extend IdentifierNamingCheck per file
 config

Add IgnoreMainLikeFunctions to the per file config. This can be extended for new options added to the check easily.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D90832
---
 .../readability/IdentifierNamingCheck.cpp     | 80 +++++++++++--------
 .../readability/IdentifierNamingCheck.h       | 29 +++++--
 .../global-style-disabled/.clang-tidy         |  5 --
 .../global-style-disabled/header.h            |  3 -
 .../global-style1/.clang-tidy                 |  2 +
 .../global-style1/header.h                    |  2 +
 .../global-style2/.clang-tidy                 |  2 +
 .../global-style2/header.h                    |  2 +
 ...lity-identifier-naming-multiple-styles.cpp | 32 ++++----
 9 files changed, 93 insertions(+), 64 deletions(-)
 delete mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/.clang-tidy
 delete mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/header.h

diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
index 4af1b444cf024a..5d63066490a651 100644
--- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
@@ -122,9 +122,9 @@ static StringRef const StyleNames[] = {
 #undef NAMING_KEYS
 // clang-format on
 
-static std::vector<llvm::Optional<IdentifierNamingCheck::NamingStyle>>
-getNamingStyles(const ClangTidyCheck::OptionsView &Options) {
-  std::vector<llvm::Optional<IdentifierNamingCheck::NamingStyle>> Styles(
+static IdentifierNamingCheck::FileStyle
+getFileStyleFromOptions(const ClangTidyCheck::OptionsView &Options) {
+  SmallVector<llvm::Optional<IdentifierNamingCheck::NamingStyle>, 0> Styles(
       SK_Count);
   SmallString<64> StyleString;
   for (unsigned I = 0; I < SK_Count; ++I) {
@@ -145,23 +145,23 @@ getNamingStyles(const ClangTidyCheck::OptionsView &Options) {
       Styles[I].emplace(std::move(CaseOptional), std::move(Prefix),
                         std::move(Postfix));
   }
-  return Styles;
+  bool IgnoreMainLike = Options.get("IgnoreMainLikeFunctions", false);
+  return {std::move(Styles), IgnoreMainLike};
 }
 
 IdentifierNamingCheck::IdentifierNamingCheck(StringRef Name,
                                              ClangTidyContext *Context)
     : RenamerClangTidyCheck(Name, Context), Context(Context), CheckName(Name),
       GetConfigPerFile(Options.get("GetConfigPerFile", true)),
-      IgnoreFailedSplit(Options.get("IgnoreFailedSplit", false)),
-      IgnoreMainLikeFunctions(Options.get("IgnoreMainLikeFunctions", false)) {
+      IgnoreFailedSplit(Options.get("IgnoreFailedSplit", false)) {
 
   auto IterAndInserted = NamingStylesCache.try_emplace(
       llvm::sys::path::parent_path(Context->getCurrentFile()),
-      getNamingStyles(Options));
+      getFileStyleFromOptions(Options));
   assert(IterAndInserted.second && "Couldn't insert Style");
   // Holding a reference to the data in the vector is safe as it should never
   // move.
-  MainFileStyle = IterAndInserted.first->getValue();
+  MainFileStyle = &IterAndInserted.first->getValue();
 }
 
 IdentifierNamingCheck::~IdentifierNamingCheck() = default;
@@ -169,26 +169,28 @@ IdentifierNamingCheck::~IdentifierNamingCheck() = default;
 void IdentifierNamingCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
   RenamerClangTidyCheck::storeOptions(Opts);
   SmallString<64> StyleString;
+  ArrayRef<llvm::Optional<NamingStyle>> Styles = MainFileStyle->getStyles();
   for (size_t I = 0; I < SK_Count; ++I) {
-    if (!MainFileStyle[I])
+    if (!Styles[I])
       continue;
     StyleString = StyleNames[I];
     size_t StyleSize = StyleString.size();
     StyleString.append("Prefix");
-    Options.store(Opts, StyleString, MainFileStyle[I]->Prefix);
+    Options.store(Opts, StyleString, Styles[I]->Prefix);
     // Fast replacement of [Pre]fix -> [Suf]fix.
     memcpy(&StyleString[StyleSize], "Suf", 3);
-    Options.store(Opts, StyleString, MainFileStyle[I]->Suffix);
-    if (MainFileStyle[I]->Case) {
+    Options.store(Opts, StyleString, Styles[I]->Suffix);
+    if (Styles[I]->Case) {
       memcpy(&StyleString[StyleSize], "Case", 4);
       StyleString.pop_back();
       StyleString.pop_back();
-      Options.store(Opts, StyleString, *MainFileStyle[I]->Case);
+      Options.store(Opts, StyleString, *Styles[I]->Case);
     }
   }
   Options.store(Opts, "GetConfigPerFile", GetConfigPerFile);
   Options.store(Opts, "IgnoreFailedSplit", IgnoreFailedSplit);
-  Options.store(Opts, "IgnoreMainLikeFunctions", IgnoreMainLikeFunctions);
+  Options.store(Opts, "IgnoreMainLikeFunctions",
+                MainFileStyle->isIgnoringMainLikeFunction());
 }
 
 static bool matchesStyle(StringRef Name,
@@ -704,23 +706,27 @@ llvm::Optional<RenamerClangTidyCheck::FailureInfo>
 IdentifierNamingCheck::GetDeclFailureInfo(const NamedDecl *Decl,
                                           const SourceManager &SM) const {
   SourceLocation Loc = Decl->getLocation();
-  ArrayRef<llvm::Optional<NamingStyle>> NamingStyles =
-      getStyleForFile(SM.getFilename(Loc));
-
-  return getFailureInfo(
-      Decl->getName(), Loc, NamingStyles,
-      findStyleKind(Decl, NamingStyles, IgnoreMainLikeFunctions), SM,
-      IgnoreFailedSplit);
+  const FileStyle &FileStyle = getStyleForFile(SM.getFilename(Loc));
+  if (!FileStyle.isActive())
+    return llvm::None;
+
+  return getFailureInfo(Decl->getName(), Loc, FileStyle.getStyles(),
+                        findStyleKind(Decl, FileStyle.getStyles(),
+                                      FileStyle.isIgnoringMainLikeFunction()),
+                        SM, IgnoreFailedSplit);
 }
 
 llvm::Optional<RenamerClangTidyCheck::FailureInfo>
 IdentifierNamingCheck::GetMacroFailureInfo(const Token &MacroNameTok,
                                            const SourceManager &SM) const {
   SourceLocation Loc = MacroNameTok.getLocation();
+  const FileStyle &Style = getStyleForFile(SM.getFilename(Loc));
+  if (!Style.isActive())
+    return llvm::None;
 
   return getFailureInfo(MacroNameTok.getIdentifierInfo()->getName(), Loc,
-                        getStyleForFile(SM.getFilename(Loc)),
-                        SK_MacroDefinition, SM, IgnoreFailedSplit);
+                        Style.getStyles(), SK_MacroDefinition, SM,
+                        IgnoreFailedSplit);
 }
 
 RenamerClangTidyCheck::DiagInfo
@@ -732,19 +738,27 @@ IdentifierNamingCheck::GetDiagInfo(const NamingCheckId &ID,
                   }};
 }
 
-ArrayRef<llvm::Optional<IdentifierNamingCheck::NamingStyle>>
+const IdentifierNamingCheck::FileStyle &
 IdentifierNamingCheck::getStyleForFile(StringRef FileName) const {
   if (!GetConfigPerFile)
-    return MainFileStyle;
-  auto &Styles = NamingStylesCache[llvm::sys::path::parent_path(FileName)];
-  if (Styles.empty()) {
-    ClangTidyOptions Options = Context->getOptionsForFile(FileName);
-    if (Options.Checks && GlobList(*Options.Checks).contains(CheckName))
-      Styles = getNamingStyles({CheckName, Options.CheckOptions});
-    else
-      Styles.resize(SK_Count, None);
+    return *MainFileStyle;
+  StringRef Parent = llvm::sys::path::parent_path(FileName);
+  auto Iter = NamingStylesCache.find(Parent);
+  if (Iter != NamingStylesCache.end())
+    return Iter->getValue();
+
+  ClangTidyOptions Options = Context->getOptionsForFile(FileName);
+  if (Options.Checks && GlobList(*Options.Checks).contains(CheckName)) {
+    auto It = NamingStylesCache.try_emplace(
+        Parent, getFileStyleFromOptions({CheckName, Options.CheckOptions}));
+    assert(It.second);
+    return It.first->getValue();
   }
-  return Styles;
+  assert(false);
+  // Default construction gives an empty style.
+  auto It = NamingStylesCache.try_emplace(Parent);
+  assert(It.second);
+  return It.first->getValue();
 }
 
 } // namespace readability
diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h
index ad1c582d100bc0..77c03f77d91dbd 100644
--- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h
@@ -60,6 +60,26 @@ class IdentifierNamingCheck final : public RenamerClangTidyCheck {
     std::string Suffix;
   };
 
+  struct FileStyle {
+    FileStyle() : IsActive(false), IgnoreMainLikeFunctions(false) {}
+    FileStyle(SmallVectorImpl<Optional<NamingStyle>> &&Styles,
+              bool IgnoreMainLike)
+        : Styles(std::move(Styles)), IsActive(true),
+          IgnoreMainLikeFunctions(IgnoreMainLike) {}
+
+    ArrayRef<Optional<NamingStyle>> getStyles() const {
+      assert(IsActive);
+      return Styles;
+    }
+    bool isActive() const { return IsActive; }
+    bool isIgnoringMainLikeFunction() const { return IgnoreMainLikeFunctions; }
+
+  private:
+    SmallVector<Optional<NamingStyle>, 0> Styles;
+    bool IsActive;
+    bool IgnoreMainLikeFunctions;
+  };
+
 private:
   llvm::Optional<FailureInfo>
   GetDeclFailureInfo(const NamedDecl *Decl,
@@ -70,19 +90,16 @@ class IdentifierNamingCheck final : public RenamerClangTidyCheck {
   DiagInfo GetDiagInfo(const NamingCheckId &ID,
                        const NamingCheckFailure &Failure) const override;
 
-  ArrayRef<llvm::Optional<NamingStyle>>
-  getStyleForFile(StringRef FileName) const;
+  const FileStyle &getStyleForFile(StringRef FileName) const;
 
   /// Stores the style options as a vector, indexed by the specified \ref
   /// StyleKind, for a given directory.
-  mutable llvm::StringMap<std::vector<llvm::Optional<NamingStyle>>>
-      NamingStylesCache;
-  ArrayRef<llvm::Optional<NamingStyle>> MainFileStyle;
+  mutable llvm::StringMap<FileStyle> NamingStylesCache;
+  FileStyle *MainFileStyle;
   ClangTidyContext *const Context;
   const std::string CheckName;
   const bool GetConfigPerFile;
   const bool IgnoreFailedSplit;
-  const bool IgnoreMainLikeFunctions;
 };
 
 } // namespace readability
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/.clang-tidy b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/.clang-tidy
deleted file mode 100644
index 6a704df8b7b190..00000000000000
--- a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/.clang-tidy
+++ /dev/null
@@ -1,5 +0,0 @@
-Checks: -readability-identifier-naming
-CheckOptions:
-  - key:             readability-identifier-naming.GlobalFunctionCase
-    value:           lower_case
-
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/header.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/header.h
deleted file mode 100644
index e863f70f7fcb25..00000000000000
--- a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/header.h
+++ /dev/null
@@ -1,3 +0,0 @@
-void disabled_style_1();
-void disabledStyle2();
-void DISABLED_STYLE_3();
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/.clang-tidy b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/.clang-tidy
index 85af9672b61d3e..fc68c6df4a80a9 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/.clang-tidy
+++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/.clang-tidy
@@ -2,4 +2,6 @@ Checks: readability-identifier-naming
 CheckOptions:
   - key:             readability-identifier-naming.GlobalFunctionCase
     value:           lower_case
+  - key:             readability-identifier-naming.IgnoreMainLikeFunctions
+    value:           true
 
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/header.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/header.h
index b170bed7c3f602..abbf7dfa483952 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/header.h
+++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/header.h
@@ -3,3 +3,5 @@
 void style_first_good();
 
 void styleFirstBad();
+
+int thisIsMainLikeIgnored(int argc, const char *argv[]) {}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/.clang-tidy b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/.clang-tidy
index b2e67ea9c87b5f..d77875c97e68e8 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/.clang-tidy
+++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/.clang-tidy
@@ -2,4 +2,6 @@ Checks: readability-identifier-naming
 CheckOptions:
   - key:             readability-identifier-naming.GlobalFunctionCase
     value:           UPPER_CASE
+  - key:             readability-identifier-naming.IgnoreMainLikeFunctions
+    value:           false
 
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/header.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/header.h
index 6b78ad82a1fddc..9d3e846a080b9c 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/header.h
+++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/header.h
@@ -3,3 +3,5 @@
 void STYLE_SECOND_GOOD();
 
 void styleSecondBad();
+
+int thisIsMainLikeNotIgnored(int argc, const char *argv[]) {}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming-multiple-styles.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming-multiple-styles.cpp
index 54880d2ca3d0a0..0608305a82250e 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming-multiple-styles.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming-multiple-styles.cpp
@@ -13,6 +13,7 @@
 // RUN: readability-identifier-naming %t -- \
 // RUN:  -config='{ InheritParentConfig: true, CheckOptions: [ \
 // RUN:   {key: readability-identifier-naming.FunctionCase, value: camelBack}, \
+// RUN:   {key: readability-identifier-naming.ParameterCase, value: CamelCase}, \
 // RUN:   {key: readability-identifier-naming.GetConfigPerFile, value: true} \
 // RUN:  ]}' -header-filter='.*' -- -I%theaders
 
@@ -21,20 +22,14 @@
 // RUN: cp -R %S/Inputs/readability-identifier-naming/. %theaders
 // RUN: %check_clang_tidy -check-suffixes=DISABLED,SHARED -std=c++11 %s \
 // RUN: readability-identifier-naming %t -- \
-// RUN:  -config='{ InheritParentConfig: true, CheckOptions: [ \
+// RUN:  -config='{ InheritParentConfig: false, CheckOptions: [ \
 // RUN:   {key: readability-identifier-naming.FunctionCase, value: camelBack}, \
+// RUN:   {key: readability-identifier-naming.ParameterCase, value: CamelCase}, \
 // RUN:   {key: readability-identifier-naming.GetConfigPerFile, value: false} \
 // RUN:  ]}' -header-filter='.*' -- -I%theaders
 
-#include "global-style-disabled/header.h"
 #include "global-style1/header.h"
 #include "global-style2/header.h"
-// CHECK-MESSAGES-ENABLED-DAG: global-style1/header.h:5:6: warning: invalid case style for global function 'styleFirstBad'
-// CHECK-MESSAGES-ENABLED-DAG: global-style2/header.h:5:6: warning: invalid case style for global function 'styleSecondBad'
-// CHECK-MESSAGES-DISABLED-DAG: global-style1/header.h:3:6: warning: invalid case style for function 'style_first_good'
-// CHECK-MESSAGES-DISABLED-DAG: global-style2/header.h:3:6: warning: invalid case style for function 'STYLE_SECOND_GOOD'
-// CHECK-MESSAGES-DISABLED-DAG: global-style-disabled/header.h:1:6: warning: invalid case style for function 'disabled_style_1'
-// CHECK-MESSAGES-DISABLED-DAG: global-style-disabled/header.h:3:6: warning: invalid case style for function 'DISABLED_STYLE_3'
 
 void goodStyle() {
   style_first_good();
@@ -42,7 +37,7 @@ void goodStyle() {
   //      CHECK-FIXES-DISABLED: styleFirstGood();
   // CHECK-FIXES-DISABLED-NEXT: styleSecondGood();
 }
-// CHECK-MESSAGES-SHARED-DAG: :[[@LINE+1]]:6: warning: invalid case style for function 'bad_style'
+// CHECK-MESSAGES-SHARED: :[[@LINE+1]]:6: warning: invalid case style for function 'bad_style'
 void bad_style() {
   styleFirstBad();
   styleSecondBad();
@@ -54,11 +49,14 @@ void bad_style() {
 //  CHECK-FIXES-ENABLED-NEXT:   STYLE_SECOND_BAD();
 //   CHECK-FIXES-SHARED-NEXT: }
 
-void expectNoStyle() {
-  disabled_style_1();
-  disabledStyle2();
-  DISABLED_STYLE_3();
-  //      CHECK-FIXES-DISABLED: disabledStyle1();
-  // CHECK-FIXES-DISABLED-NEXT: disabledStyle2();
-  // CHECK-FIXES-DISABLED-NEXT: disabledStyle3();
-}
+// CHECK-MESSAGES-DISABLED: global-style1/header.h:3:6: warning: invalid case style for function 'style_first_good'
+// CHECK-MESSAGES-ENABLED:  global-style1/header.h:5:6: warning: invalid case style for global function 'styleFirstBad'
+// CHECK-MESSAGES-ENABLED:  global-style1/header.h:7:5: warning: invalid case style for global function 'thisIsMainLikeIgnored'
+// CHECK-MESSAGES-DISABLED: global-style1/header.h:7:31: warning: invalid case style for parameter 'argc'
+// CHECK-MESSAGES-DISABLED: global-style1/header.h:7:49: warning: invalid case style for parameter 'argv'
+
+// CHECK-MESSAGES-DISABLED: global-style2/header.h:3:6: warning: invalid case style for function 'STYLE_SECOND_GOOD'
+// CHECK-MESSAGES-ENABLED:  global-style2/header.h:5:6: warning: invalid case style for global function 'styleSecondBad'
+// CHECK-MESSAGES-ENABLED:  global-style2/header.h:7:5: warning: invalid case style for global function 'thisIsMainLikeNotIgnored'
+// CHECK-MESSAGES-SHARED:   global-style2/header.h:7:34: warning: invalid case style for parameter 'argc'
+// CHECK-MESSAGES-SHARED:   global-style2/header.h:7:52: warning: invalid case style for parameter 'argv'

From 264a6df353b7e7ac4269bc10467bd2a991db3173 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 5 Nov 2020 14:51:14 -0500
Subject: [PATCH 13/31] [ARM] remove cost-kind predicate for cmp/sel costs

This is the cmp/sel sibling to D90692.
Again, the reasoning is: the throughput cost is number of instructions/uops,
so size/blended costs are identical except in special cases (for example,
fdiv or other known-expensive machine instructions or things like MVE that
may require cracking into >1 uops).

We need to check for a valid (non-null) condition type parameter because
SimplifyCFG may pass nullptr for that (and so we will crash multiple
regression tests without that check). I'm not sure if passing nullptr makes
sense, but other code in the cost model does appear to check if that param
is set or not.

Differential Revision: https://reviews.llvm.org/D90781
---
 llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp   | 16 ++++++++--------
 .../CostModel/ARM/intrinsic-cost-kinds.ll        |  4 ++--
 llvm/test/Analysis/CostModel/ARM/select.ll       | 14 +++++++-------
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 04c4e18c60482f..d36e70f82059b5 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -839,12 +839,8 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
     return Cost;
   }
 
-  if (CostKind != TTI::TCK_RecipThroughput)
-    return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
-                                     I);
-
   // On NEON a vector select gets lowered to vbsl.
-  if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
+  if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT && CondTy) {
     // Lowering of some vector selects is currently far from perfect.
     static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
       { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
@@ -865,9 +861,13 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
     return LT.first;
   }
 
-  int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy()
-                     ? ST->getMVEVectorCostFactor()
-                     : 1;
+  // Default to cheap (throughput/size of 1 instruction) but adjust throughput
+  // for "multiple beats" potentially needed by MVE instructions.
+  int BaseCost = 1;
+  if (CostKind != TTI::TCK_CodeSize && ST->hasMVEIntegerOps() &&
+      ValTy->isVectorTy())
+    BaseCost = ST->getMVEVectorCostFactor();
+
   return BaseCost *
          BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
 }
diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
index 40ff13f0d1ce30..4e18acad161750 100644
--- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
@@ -153,7 +153,7 @@ define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x
 ;
 ; SIZE_LATE-LABEL: 'fshl'
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 562 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 564 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
@@ -217,7 +217,7 @@ define void @reduce_fmax(<16 x float> %va) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE_LATE-LABEL: 'reduce_fmax'
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 620 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 628 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
diff --git a/llvm/test/Analysis/CostModel/ARM/select.ll b/llvm/test/Analysis/CostModel/ARM/select.ll
index 2c5f893f272ee8..67a558003923cc 100644
--- a/llvm/test/Analysis/CostModel/ARM/select.ll
+++ b/llvm/test/Analysis/CostModel/ARM/select.ll
@@ -185,21 +185,21 @@ define void @selects() {
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-THUMB1-SIZE-LABEL: 'selects'

From a1229c9518672cedfe407654145bf7e2614769f7 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Thu, 5 Nov 2020 02:29:40 +0000
Subject: [PATCH 14/31] Always link the MLIR python bindings native extension
 to libMLIR.so

The Python bindings now require -DLLVM_BUILD_LLVM_DYLIB=ON to build.
This change is needed to be able to build multiple Python native
extension without having each of them embedding a copy of MLIR, which
would make them incompatible with each other. Instead they should all
link to the same copy of MLIR.

Differential Revision: https://reviews.llvm.org/D90813
---
 mlir/lib/Bindings/Python/CMakeLists.txt | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/mlir/lib/Bindings/Python/CMakeLists.txt b/mlir/lib/Bindings/Python/CMakeLists.txt
index d4913bb4394735..296b915c58cf56 100644
--- a/mlir/lib/Bindings/Python/CMakeLists.txt
+++ b/mlir/lib/Bindings/Python/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT LLVM_BUILD_LLVM_DYLIB)
+  message(FATAL_ERROR "Building the MLIR Python bindings require -DLLVM_BUILD_LLVM_DYLIB=ON")
+endif()
+
 ################################################################################
 # Copy python source tree.
 ################################################################################
@@ -102,21 +106,10 @@ set_target_properties(
   MLIRBindingsPythonExtension PROPERTIES CXX_VISIBILITY_PRESET "hidden")
 
 set(PYEXT_DEPS)
-if(LLVM_BUILD_LLVM_DYLIB)
-  list(APPEND PYEXT_DEPS
-    # Depend on libMLIR.so first so that deps primarily come from the shared
-    # library.
-    MLIR
-  )
-endif()
-
-# Full static dependencies are also added and will augment what is in the
-# shared lib if needed (or in fully static builds, will result in mondo-built
-# extension).
 list(APPEND PYEXT_DEPS
-  # Depend only on the MLIR C-API.
-  MLIRCAPIIR
-  MLIRCAPIRegistration
+  # Depend on libMLIR.so first so that deps primarily come from the shared
+  # library.
+  MLIR
 )
 
 target_link_libraries(MLIRBindingsPythonExtension

From 24b3b2cd74888b97ead45c25af65417dd09abe78 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Thu, 5 Nov 2020 04:55:55 +0000
Subject: [PATCH 15/31] Refactor MLIR python extension CMake boilerplate in a
 reusable function (NFC)

Differential Revision: https://reviews.llvm.org/D90816
---
 .../modules/AddMLIRPythonExtension.cmake      | 117 ++++++++++++++++++
 mlir/lib/Bindings/Python/CMakeLists.txt       | 114 ++---------------
 2 files changed, 128 insertions(+), 103 deletions(-)
 create mode 100644 mlir/cmake/modules/AddMLIRPythonExtension.cmake

diff --git a/mlir/cmake/modules/AddMLIRPythonExtension.cmake b/mlir/cmake/modules/AddMLIRPythonExtension.cmake
new file mode 100644
index 00000000000000..5741f512ab9f40
--- /dev/null
+++ b/mlir/cmake/modules/AddMLIRPythonExtension.cmake
@@ -0,0 +1,117 @@
+################################################################################
+# Build python extension
+################################################################################
+function(add_mlir_python_extension libname extname)
+  cmake_parse_arguments(ARG
+  ""
+  "INSTALL_DIR"
+  "SOURCES;LINK_LIBS"
+  ${ARGN})
+  if (ARG_UNPARSED_ARGUMENTS)
+    message(FATAL_ERROR " Unhandled arguments to add_mlir_python_extension(${libname}, ... : ${ARG_UNPARSED_ARGUMENTS}")
+  endif()
+  if ("${ARG_SOURCES}" STREQUAL "")
+    message(FATAL_ERROR " Missing SOURCES argument to add_mlir_python_extension(${libname}, ...")
+  endif()
+  if(NOT LLVM_BUILD_LLVM_DYLIB)
+    message(FATAL_ERROR "Building MLIR Python extension require -DLLVM_BUILD_LLVM_DYLIB=ON")
+  endif()
+
+  # Normally on unix-like platforms, extensions are built as "MODULE" libraries
+  # and do not explicitly link to the python shared object. This allows for
+  # some greater deployment flexibility since the extension will bind to
+  # symbols in the python interpreter on load. However, it also keeps the
+  # linker from erroring on undefined symbols, leaving this to (usually obtuse)
+  # runtime errors. Building in "SHARED" mode with an explicit link to the
+  # python libraries allows us to build with the expectation of no undefined
+  # symbols, which is better for development. Note that not all python
+  # configurations provide build-time libraries to link against, in which
+  # case, we fall back to MODULE linking.
+  if(PYTHON_LIBRARIES STREQUAL "" OR NOT MLIR_PYTHON_BINDINGS_VERSION_LOCKED)
+    set(PYEXT_LINK_MODE MODULE)
+    set(PYEXT_LIBADD)
+  else()
+    set(PYEXT_LINK_MODE SHARED)
+    set(PYEXT_LIBADD ${PYTHON_LIBRARIES})
+  endif()
+
+  # The actual extension library produces a shared-object or DLL and has
+  # sources that must be compiled in accordance with pybind11 needs (RTTI and
+  # exceptions).
+  add_library(${libname} ${PYEXT_LINK_MODE}
+    ${ARG_SOURCES}
+  )
+
+  target_include_directories(${libname} PRIVATE
+    "${PYTHON_INCLUDE_DIRS}"
+    "${pybind11_INCLUDE_DIRS}"
+  )
+
+  # The extension itself must be compiled with RTTI and exceptions enabled.
+  # Also, some warning classes triggered by pybind11 are disabled.
+  target_compile_options(${libname} PRIVATE
+    $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
+      # Enable RTTI and exceptions.
+      -frtti -fexceptions
+      # Noisy pybind warnings
+      -Wno-unused-value
+      -Wno-covered-switch-default
+    >
+    $<$<CXX_COMPILER_ID:MSVC>:
+      # Enable RTTI and exceptions.
+      /EHsc /GR>
+  )
+
+  # Configure the output to match python expectations.
+  set_target_properties(
+    ${libname} PROPERTIES
+    # Build-time RPath layouts require to be a directory one up from the
+    # binary root.
+    # TODO: Don't reference the LLVM_BINARY_DIR here: the invariant is that
+    # the output directory must be at the same level of the lib directory
+    # where libMLIR.so is installed. This is presently not optimal from a
+    # project separation perspective and a discussion on how to better
+    # segment MLIR libraries needs to happen.
+    LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/python
+    OUTPUT_NAME "_mlirTransforms"
+    PREFIX "${PYTHON_MODULE_PREFIX}"
+    SUFFIX "${PYTHON_MODULE_SUFFIX}${PYTHON_MODULE_EXTENSION}"
+  )
+
+  # pybind11 requires binding code to be compiled with -fvisibility=hidden
+  # For static linkage, better code can be generated if the entire project
+  # compiles that way, but that is not enforced here. Instead, include a linker
+  # script that explicitly hides anything but the PyInit_* symbols, allowing gc
+  # to take place.
+  set_target_properties(${libname} PROPERTIES CXX_VISIBILITY_PRESET "hidden")
+
+  target_link_libraries(${libname}
+    PRIVATE
+    MLIR # Always link to libMLIR.so
+    ${ARG_LINK_LIBS}
+    ${PYEXT_LIBADD}
+  )
+
+  llvm_setup_rpath(${libname})
+
+  ################################################################################
+  # Install
+  ################################################################################
+  if (INSTALL_DIR)
+    install(TARGETS ${libname}
+      COMPONENT ${libname}
+      LIBRARY DESTINATION ${ARG_INSTALL_DIR}
+      ARCHIVE DESTINATION ${ARG_INSTALL_DIR}
+      # NOTE: Even on DLL-platforms, extensions go in the lib directory tree.
+      RUNTIME DESTINATION ${ARG_INSTALL_DIR}
+    )
+  endif()
+
+  if (NOT LLVM_ENABLE_IDE)
+    add_llvm_install_targets(
+      install-${libname}
+      DEPENDS ${libname}
+      COMPONENT ${libname})
+  endif()
+
+endfunction()
diff --git a/mlir/lib/Bindings/Python/CMakeLists.txt b/mlir/lib/Bindings/Python/CMakeLists.txt
index 296b915c58cf56..917a339aeed581 100644
--- a/mlir/lib/Bindings/Python/CMakeLists.txt
+++ b/mlir/lib/Bindings/Python/CMakeLists.txt
@@ -2,6 +2,8 @@ if(NOT LLVM_BUILD_LLVM_DYLIB)
   message(FATAL_ERROR "Building the MLIR Python bindings require -DLLVM_BUILD_LLVM_DYLIB=ON")
 endif()
 
+include(AddMLIRPythonExtension)
+
 ################################################################################
 # Copy python source tree.
 ################################################################################
@@ -29,108 +31,18 @@ foreach(PY_SRC_FILE ${PY_SRC_FILES})
 endforeach()
 
 ################################################################################
-# Build python extension
+# Build core python extension
 ################################################################################
-
-# Normally on unix-like platforms, extensions are built as "MODULE" libraries
-# and do not explicitly link to the python shared object. This allows for
-# some greater deployment flexibility since the extension will bind to
-# symbols in the python interpreter on load. However, it also keeps the
-# linker from erroring on undefined symbols, leaving this to (usually obtuse)
-# runtime errors. Building in "SHARED" mode with an explicit link to the
-# python libraries allows us to build with the expectation of no undefined
-# symbols, which is better for development. Note that not all python
-# configurations provide build-time libraries to link against, in which
-# case, we fall back to MODULE linking.
-if(PYTHON_LIBRARIES STREQUAL "" OR NOT MLIR_PYTHON_BINDINGS_VERSION_LOCKED)
-  set(PYEXT_LINK_MODE MODULE)
-  set(PYEXT_LIBADD)
-else()
-  set(PYEXT_LINK_MODE SHARED)
-  set(PYEXT_LIBADD ${PYTHON_LIBRARIES})
-endif()
-
-# The actual extension library produces a shared-object or DLL and has
-# sources that must be compiled in accordance with pybind11 needs (RTTI and
-# exceptions).
-# TODO: Link the libraries separately once a helper function is available
-# to more generically add a pybind11 compliant library.
-add_library(MLIRBindingsPythonExtension ${PYEXT_LINK_MODE}
-  MainModule.cpp
-  IRModules.cpp
-  PybindUtils.cpp
-)
-
-target_include_directories(MLIRBindingsPythonExtension PRIVATE
-  "${PYTHON_INCLUDE_DIRS}"
-  "${pybind11_INCLUDE_DIRS}")
-
-# The extension itself must be compiled with RTTI and exceptions enabled.
-# Also, some warning classes triggered by pybind11 are disabled.
-target_compile_options(MLIRBindingsPythonExtension PRIVATE
-  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-    # Enable RTTI and exceptions.
-    -frtti -fexceptions
-    # Noisy pybind warnings
-    -Wno-unused-value
-    -Wno-covered-switch-default
-  >
-  $<$<CXX_COMPILER_ID:MSVC>:
-    # Enable RTTI and exceptions.
-    /EHsc /GR>
-)
-
-# Configure the output to match python expectations.
-set_target_properties(
-  MLIRBindingsPythonExtension PROPERTIES
-    # Build-time RPath layouts require to be a directory one up from the
-    # binary root.
-    # TODO: Don't reference the LLVM_BINARY_DIR here: the invariant is that
-    # the output directory must be at the same level of the lib directory
-    # where libMLIR.so is installed. This is presently not optimal from a
-    # project separation perspective and a discussion on how to better
-    # segment MLIR libraries needs to happen.
-    LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/python
-    OUTPUT_NAME "_mlir"
-    PREFIX "${PYTHON_MODULE_PREFIX}"
-    SUFFIX "${PYTHON_MODULE_SUFFIX}${PYTHON_MODULE_EXTENSION}"
-)
-
-# pybind11 requires binding code to be compiled with -fvisibility=hidden
-# For static linkage, better code can be generated if the entire project
-# compiles that way, but that is not enforced here. Instead, include a linker
-# script that explicitly hides anything but the PyInit_* symbols, allowing gc
-# to take place.
-# TODO: Add a Windows .def file and figure out the right thing to do on MacOS.
-set_target_properties(
-  MLIRBindingsPythonExtension PROPERTIES CXX_VISIBILITY_PRESET "hidden")
-
-set(PYEXT_DEPS)
-list(APPEND PYEXT_DEPS
-  # Depend on libMLIR.so first so that deps primarily come from the shared
-  # library.
-  MLIR
+add_mlir_python_extension(MLIRBindingsPythonExtension _mlir
+  INSTALL_DIR
+    python
+  SOURCES
+    MainModule.cpp
+    IRModules.cpp
+    Pass.cpp
+    PybindUtils.cpp
 )
-
-target_link_libraries(MLIRBindingsPythonExtension
-  PRIVATE
-  ${PYEXT_DEPS}
-  ${PYEXT_LIBADD}
-)
-
 add_dependencies(MLIRBindingsPythonExtension MLIRBindingsPythonSources)
-llvm_setup_rpath(MLIRBindingsPythonExtension)
-
-################################################################################
-# Install
-################################################################################
-
-install(TARGETS MLIRBindingsPythonExtension
-  COMPONENT MLIRBindingsPythonExtension
-  LIBRARY DESTINATION python
-  ARCHIVE DESTINATION python
-  # NOTE: Even on DLL-platforms, extensions go in the lib directory tree.
-  RUNTIME DESTINATION python)
 
 # Note that we copy from the source tree just like for headers because
 # it will not be polluted with py_cache runtime artifacts (from testing and
@@ -143,10 +55,6 @@ install(
 )
 
 if (NOT LLVM_ENABLE_IDE)
-  add_llvm_install_targets(
-    install-MLIRBindingsPythonExtension
-    DEPENDS MLIRBindingsPythonExtension
-    COMPONENT MLIRBindingsPythonExtension)
   add_llvm_install_targets(
     install-MLIRBindingsPythonSources
     DEPENDS MLIRBindingsPythonSources

From 7f977086ebd1eee91e3816ca9d684e53099ad366 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Thu, 5 Nov 2020 20:04:52 +0000
Subject: [PATCH 16/31] Fix MLIR Python bindings build (remove inexistant
 source from CMake list, NFC)

---
 mlir/lib/Bindings/Python/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mlir/lib/Bindings/Python/CMakeLists.txt b/mlir/lib/Bindings/Python/CMakeLists.txt
index 917a339aeed581..447b819c4aca29 100644
--- a/mlir/lib/Bindings/Python/CMakeLists.txt
+++ b/mlir/lib/Bindings/Python/CMakeLists.txt
@@ -39,7 +39,6 @@ add_mlir_python_extension(MLIRBindingsPythonExtension _mlir
   SOURCES
     MainModule.cpp
     IRModules.cpp
-    Pass.cpp
     PybindUtils.cpp
 )
 add_dependencies(MLIRBindingsPythonExtension MLIRBindingsPythonSources)

From 72dcd902e7bb9c8d9814d75c69fb00a57570b339 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Thu, 5 Nov 2020 20:01:32 +0000
Subject: [PATCH 17/31] Add a custom MLIRBindingsPythonExtension cmake target
 to group all Python bindings (NFC)

This target will depend on each individual extension and represent "all"
Python bindings in the repo. User projects can get a finer grain control by
depending directly on some individual targets as needed.
---
 mlir/lib/Bindings/Python/CMakeLists.txt | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Bindings/Python/CMakeLists.txt b/mlir/lib/Bindings/Python/CMakeLists.txt
index 447b819c4aca29..04762b46a315bd 100644
--- a/mlir/lib/Bindings/Python/CMakeLists.txt
+++ b/mlir/lib/Bindings/Python/CMakeLists.txt
@@ -3,7 +3,7 @@ if(NOT LLVM_BUILD_LLVM_DYLIB)
 endif()
 
 include(AddMLIRPythonExtension)
-
+add_custom_target(MLIRBindingsPythonExtension)
 ################################################################################
 # Copy python source tree.
 ################################################################################
@@ -18,6 +18,7 @@ set(PY_SRC_FILES
 add_custom_target(MLIRBindingsPythonSources ALL
   DEPENDS ${PY_SRC_FILES}
 )
+add_dependencies(MLIRBindingsPythonExtension MLIRBindingsPythonSources)
 
 foreach(PY_SRC_FILE ${PY_SRC_FILES})
   set(PY_DEST_FILE "${PROJECT_BINARY_DIR}/python/${PY_SRC_FILE}")
@@ -33,7 +34,7 @@ endforeach()
 ################################################################################
 # Build core python extension
 ################################################################################
-add_mlir_python_extension(MLIRBindingsPythonExtension _mlir
+add_mlir_python_extension(MLIRCoreBindingsPythonExtension _mlir
   INSTALL_DIR
     python
   SOURCES
@@ -41,7 +42,7 @@ add_mlir_python_extension(MLIRBindingsPythonExtension _mlir
     IRModules.cpp
     PybindUtils.cpp
 )
-add_dependencies(MLIRBindingsPythonExtension MLIRBindingsPythonSources)
+add_dependencies(MLIRBindingsPythonExtension MLIRCoreBindingsPythonExtension)
 
 # Note that we copy from the source tree just like for headers because
 # it will not be polluted with py_cache runtime artifacts (from testing and

From 738d981eb6b4417bbe4ebab9e3ee43c7699771c3 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 5 Nov 2020 15:13:27 -0500
Subject: [PATCH 18/31] [libc++] Update the CI Dockerfile

Remove Phabricator, which isn't needed anymore since we don't report
the job results ourselves. Also, install python3-sphinx instead of
sphinx-doc, since the latter doesn't provide the sphinx-build binary.
---
 libcxx/utils/ci/Dockerfile | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/libcxx/utils/ci/Dockerfile b/libcxx/utils/ci/Dockerfile
index f0de9f64ba9133..9085573086471e 100644
--- a/libcxx/utils/ci/Dockerfile
+++ b/libcxx/utils/ci/Dockerfile
@@ -39,20 +39,16 @@
 
 FROM ubuntu:bionic
 
-RUN apt-get update
-RUN apt-get install -y bash curl
+# Make sure apt-get doesn't try to prompt for stuff like our time zone, etc.
+ENV DEBIAN_FRONTEND=noninteractive
 
-# Install various tools used by the build or the test suite
-RUN apt-get install -y ninja-build python3 sphinx-doc git
+RUN apt-get update && apt-get install -y bash curl
 
-# Install the Phabricator Python module to allow uploading results to Phabricator.
-# This MUST be done before installing a recent GCC, otherwise /usr/bin/gcc is
-# overwritten to an older GCC.
-RUN apt-get install -y python3-pip
-RUN pip3 install phabricator
+# Install various tools used by the build or the test suite
+RUN apt-get update && apt-get install -y ninja-build python3 python3-sphinx git
 
 # Install the most recently released LLVM
-RUN apt-get install -y lsb-release wget software-properties-common
+RUN apt-get update && apt-get install -y lsb-release wget software-properties-common
 RUN bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
 RUN ln -s $(find /usr/bin -regex '.+/clang\+\+-[a-zA-Z0-9.]+') /usr/bin/clang++
 RUN ln -s $(find /usr/bin -regex '.+/clang-[a-zA-Z0-9.]+') /usr/bin/clang
@@ -70,7 +66,7 @@ RUN rm /tmp/install-cmake.sh
 
 # Change the user to a non-root user, since some of the libc++ tests
 # (e.g. filesystem) require running as non-root. Also setup passwordless sudo.
-RUN apt-get install -y sudo
+RUN apt-get update && apt-get install -y sudo
 RUN echo "ALL ALL = (ALL) NOPASSWD: ALL" >> /etc/sudoers
 RUN useradd --create-home libcxx-builder
 USER libcxx-builder

From f7e4f041d65280258027ab8e2b55f497b5d1d56e Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 5 Nov 2020 14:34:29 -0500
Subject: [PATCH 19/31] [libc++] Add a CI job to build the documentation

At the same time, fix an issue that broke the documentation since 2eadbc86142b.
---
 libcxx/docs/index.rst                  |  1 -
 libcxx/utils/ci/buildkite-pipeline.yml | 11 +++++++++++
 libcxx/utils/ci/run-buildbot.sh        |  7 +++++++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst
index 75d5b8226b4c85..f1c0906f4c4dfc 100644
--- a/libcxx/docs/index.rst
+++ b/libcxx/docs/index.rst
@@ -154,7 +154,6 @@ Design Documents
 .. toctree::
    :maxdepth: 1
 
-   DesignDocs/AvailabilityMarkup
    DesignDocs/DebugMode
    DesignDocs/CapturingConfigInfo
    DesignDocs/ABIVersioning
diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml
index 05d670368fe945..faa108156dee40 100644
--- a/libcxx/utils/ci/buildkite-pipeline.yml
+++ b/libcxx/utils/ci/buildkite-pipeline.yml
@@ -202,6 +202,17 @@ steps:
         - exit_status: -1  # Agent was lost
           limit: 2
 
+  - label: "Documentation"
+    command: "libcxx/utils/ci/run-buildbot.sh documentation"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+
   - label: "Legacy standalone build"
     command: "libcxx/utils/ci/run-buildbot.sh legacy-standalone"
     artifact_paths:
diff --git a/libcxx/utils/ci/run-buildbot.sh b/libcxx/utils/ci/run-buildbot.sh
index 1f0b3c7c71b37e..5502cde7c7046e 100755
--- a/libcxx/utils/ci/run-buildbot.sh
+++ b/libcxx/utils/ci/run-buildbot.sh
@@ -169,6 +169,13 @@ benchmarks)
     generate-cmake
     check-cxx-benchmarks
 ;;
+documentation)
+  export CC=clang
+  export CXX=clang++
+  generate-cmake -DLLVM_ENABLE_SPHINX=ON
+  echo "+++ Generating documentation"
+  ninja -C "${BUILD_DIR}" docs-libcxx-html
+;;
 unified-standalone)
     export CC=clang
     export CXX=clang++

From 1af037f643fc5499f83d92e5aec199950871d475 Mon Sep 17 00:00:00 2001
From: Albion Fung <conanap@lep82435v.canlab.ibm.com>
Date: Thu, 5 Nov 2020 14:55:33 -0500
Subject: [PATCH 20/31] [PowerPC] Correct cpsgn's behaviour on PowerPC to match
 that of the ABI

This patch fixes the reversed behaviour exhibited by cpsgn on PPC. It now matches the ABI.

Differential Revision: https://reviews.llvm.org/D84962
---
 clang/lib/Headers/altivec.h           |  4 +--
 clang/test/CodeGen/builtins-ppc-vsx.c | 44 +++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 2df420d640f107..24d600e952014d 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -2996,12 +2996,12 @@ static __inline__ void __ATTRS_o_ai vec_xst_len_r(vector unsigned char __a,
 #ifdef __VSX__
 static __inline__ vector float __ATTRS_o_ai vec_cpsgn(vector float __a,
                                                       vector float __b) {
-  return __builtin_vsx_xvcpsgnsp(__a, __b);
+  return __builtin_vsx_xvcpsgnsp(__b, __a);
 }
 
 static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a,
                                                        vector double __b) {
-  return __builtin_vsx_xvcpsgndp(__a, __b);
+  return __builtin_vsx_xvcpsgndp(__b, __a);
 }
 #endif
 
diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c
index d99b0c1e8f413f..18aa7d22fa3f30 100644
--- a/clang/test/CodeGen/builtins-ppc-vsx.c
+++ b/clang/test/CodeGen/builtins-ppc-vsx.c
@@ -1850,3 +1850,47 @@ void testVectorInt128Pack(){
 // CHECK-NEXT-LE: %{{[0-9]+}} = extractelement <2 x i64> %[[V1]], i32 0
 
 }
+
+void test_vector_cpsgn_float(vector float a, vector float b) {
+// CHECK-LABEL: test_vector_cpsgn_float
+// CHECK-DAG: load{{.*}}%__a
+// CHECK-DAG: load{{.*}}%__b
+// CHECK-NOT: SEPARATOR
+// CHECK-DAG: [[RA:%[0-9]+]] = load <4 x float>, <4 x float>* %__a.addr
+// CHECK-DAG: [[RB:%[0-9]+]] = load <4 x float>, <4 x float>* %__b.addr
+// CHECK-NEXT: call <4 x float> @llvm.copysign.v4f32(<4 x float> [[RB]], <4 x float> [[RA]])
+  vec_cpsgn(a, b);
+}
+
+void test_vector_cpsgn_double(vector double a, vector double b) {
+// CHECK-LABEL: test_vector_cpsgn_double
+// CHECK-DAG: load{{.*}}%__a
+// CHECK-DAG: load{{.*}}%__b
+// CHECK-NOT: SEPARATOR
+// CHECK-DAG: [[RA:%[0-9]+]] = load <2 x double>, <2 x double>* %__a.addr
+// CHECK-DAG: [[RB:%[0-9]+]] = load <2 x double>, <2 x double>* %__b.addr
+// CHECK-NEXT: call <2 x double> @llvm.copysign.v2f64(<2 x double> [[RB]], <2 x double> [[RA]])
+  vec_cpsgn(a, b);
+}
+
+void test_builtin_xvcpsgnsp(vector float a, vector float b) {
+// CHECK-LABEL: test_builtin_xvcpsgnsp
+// CHECK-DAG: load{{.*}}%a
+// CHECK-DAG: load{{.*}}%b
+// CHECK-NOT: SEPARATOR
+// CHECK-DAG: [[RA:%[0-9]+]] = load <4 x float>, <4 x float>* %a.addr
+// CHECK-DAG: [[RB:%[0-9]+]] = load <4 x float>, <4 x float>* %b.addr
+// CHECK-NEXT: call <4 x float> @llvm.copysign.v4f32(<4 x float> [[RA]], <4 x float> [[RB]])
+  __builtin_vsx_xvcpsgnsp(a, b);
+}
+
+void test_builtin_xvcpsgndp(vector double a, vector double b) {
+// CHECK-LABEL: test_builtin_xvcpsgndp
+// CHECK-DAG: load{{.*}}%a
+// CHECK-DAG: load{{.*}}%b
+// CHECK-NOT: SEPARATOR
+// CHECK-DAG: [[RA:%[0-9]+]] = load <2 x double>, <2 x double>* %a.addr
+// CHECK-DAG: [[RB:%[0-9]+]] = load <2 x double>, <2 x double>* %b.addr
+// CHECK-NEXT: call <2 x double> @llvm.copysign.v2f64(<2 x double> [[RA]], <2 x double> [[RB]])
+  __builtin_vsx_xvcpsgndp(a, b);
+}

From d2e7dca5ca92c655e451d6fcb806df38d7f2d56b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Ole=20H=C3=BCser?= <j.hueser@beckhoff.de>
Date: Thu, 5 Nov 2020 11:01:51 -0800
Subject: [PATCH 21/31] [CodeGen] Fix Bug 47499: __unaligned extension
 inconsistent behaviour with C and C++

For the language C++ the keyword __unaligned (a Microsoft extension) had no effect on pointers.

The reason, why there was a difference between C and C++ for the keyword __unaligned:
For C, the Method getAsCXXREcordDecl() returns nullptr. That guarantees that hasUnaligned() is called.
If the language is C++, it is not guaranteed, that hasUnaligend() is called and evaluated.

Here are some links:

The Bug: https://bugs.llvm.org/show_bug.cgi?id=47499
Thread on the cfe-dev mailing list: http://lists.llvm.org/pipermail/cfe-dev/2020-September/066783.html
Diff, that introduced the check hasUnaligned() in getNaturalTypeAlignment(): https://reviews.llvm.org/D30166

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D90630
---
 clang/lib/CodeGen/CodeGenModule.cpp        | 13 +++++----
 clang/test/CodeGen/unaligned-struct-copy.c | 32 ++++++++++++++++++++++
 2 files changed, 39 insertions(+), 6 deletions(-)
 create mode 100644 clang/test/CodeGen/unaligned-struct-copy.c

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 24c067539f839f..ccf5d24bb9ebd2 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -6197,16 +6197,17 @@ CharUnits CodeGenModule::getNaturalTypeAlignment(QualType T,
     *BaseInfo = LValueBaseInfo(AlignmentSource::Type);
 
   CharUnits Alignment;
-  // For C++ class pointees, we don't know whether we're pointing at a
-  // base or a complete object, so we generally need to use the
-  // non-virtual alignment.
   const CXXRecordDecl *RD;
-  if (forPointeeType && !AlignForArray && (RD = T->getAsCXXRecordDecl())) {
+  if (T.getQualifiers().hasUnaligned()) {
+    Alignment = CharUnits::One();
+  } else if (forPointeeType && !AlignForArray &&
+             (RD = T->getAsCXXRecordDecl())) {
+    // For C++ class pointees, we don't know whether we're pointing at a
+    // base or a complete object, so we generally need to use the
+    // non-virtual alignment.
     Alignment = getClassPointerAlignment(RD);
   } else {
     Alignment = getContext().getTypeAlignInChars(T);
-    if (T.getQualifiers().hasUnaligned())
-      Alignment = CharUnits::One();
   }
 
   // Cap to the global maximum type alignment unless the alignment
diff --git a/clang/test/CodeGen/unaligned-struct-copy.c b/clang/test/CodeGen/unaligned-struct-copy.c
new file mode 100644
index 00000000000000..45a9670bc23473
--- /dev/null
+++ b/clang/test/CodeGen/unaligned-struct-copy.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -xc   -O2 -triple thumbv7a-unknown-windows-eabi -fms-extensions -emit-llvm < %s | FileCheck %s
+// RUN: %clang_cc1 -xc++ -O2 -triple thumbv7a-unknown-windows-eabi -fms-extensions -emit-llvm < %s | FileCheck %s
+// RUN: %clang_cc1 -xc   -O2 -triple x86_64-unknown-linux-gnu -fms-extensions -emit-llvm < %s | FileCheck %s
+// RUN: %clang_cc1 -xc++ -O2 -triple x86_64-unknown-linux-gnu -fms-extensions -emit-llvm < %s | FileCheck %s
+
+struct S1 {
+  unsigned long x;
+};
+
+// CHECK: define
+// CHECK-SAME: void
+// CHECK-SAME: test1
+
+void test1(__unaligned struct S1 *out) {
+  // CHECK: store
+  // CHECK-SAME: align 1
+  out->x = 5;
+  // CHECK: ret void
+}
+
+// CHECK: define
+// CHECK-SAME: void
+// CHECK-SAME: test2
+
+void test2(__unaligned struct S1 *out, __unaligned struct S1 *in) {
+  // CHECK: load
+  // CHECK-SAME: align 1
+  // CHECK: store
+  // CHECK-SAME: align 1
+  *out = *in;
+  // CHECK: ret void
+}

From f55247456e219bb64521c3a73c618267d5bf671c Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Thu, 5 Nov 2020 09:58:02 -0800
Subject: [PATCH 22/31] Fix bugs in EOL marking in command line tokenizers

Add unit tests for this behavior, since the integration test for
clang-cl did not catch these bugs.

Fixes PR47604

Differential Revision: https://reviews.llvm.org/D90866
---
 llvm/lib/Support/CommandLine.cpp           |  12 +--
 llvm/unittests/Support/CommandLineTest.cpp | 100 ++++++++++++++-------
 2 files changed, 74 insertions(+), 38 deletions(-)

diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index e53421a277f1f2..a185863fddb96f 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -832,7 +832,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
     // Consume runs of whitespace.
     if (Token.empty()) {
       while (I != E && isWhitespace(Src[I])) {
-        // Mark the end of lines in response files
+        // Mark the end of lines in response files.
         if (MarkEOLs && Src[I] == '\n')
           NewArgv.push_back(nullptr);
         ++I;
@@ -869,6 +869,9 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
     if (isWhitespace(C)) {
       if (!Token.empty())
         NewArgv.push_back(Saver.save(StringRef(Token)).data());
+      // Mark the end of lines in response files.
+      if (MarkEOLs && C == '\n')
+        NewArgv.push_back(nullptr);
       Token.clear();
       continue;
     }
@@ -880,9 +883,6 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
   // Append the last token after hitting EOF with no whitespace.
   if (!Token.empty())
     NewArgv.push_back(Saver.save(StringRef(Token)).data());
-  // Mark the end of response files
-  if (MarkEOLs)
-    NewArgv.push_back(nullptr);
 }
 
 /// Backslashes are interpreted in a rather complicated way in the Windows-style
@@ -956,11 +956,11 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
         ++I;
       StringRef NormalChars = Src.slice(Start, I);
       if (I >= E || isWhitespaceOrNull(Src[I])) {
-        if (I < E && Src[I] == '\n')
-          MarkEOL();
         // No special characters: slice out the substring and start the next
         // token. Copy the string if the caller asks us to.
         AddToken(AlwaysCopy ? Saver.save(NormalChars) : NormalChars);
+        if (I < E && Src[I] == '\n')
+          MarkEOL();
       } else if (Src[I] == '\"') {
         Token += NormalChars;
         State = QUOTED;
diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp
index c02e9e59a5e0fd..a05f3894ef05db 100644
--- a/llvm/unittests/Support/CommandLineTest.cpp
+++ b/llvm/unittests/Support/CommandLineTest.cpp
@@ -199,14 +199,15 @@ typedef void ParserFunction(StringRef Source, StringSaver &Saver,
                             bool MarkEOLs);
 
 void testCommandLineTokenizer(ParserFunction *parse, StringRef Input,
-                              const char *const Output[], size_t OutputSize) {
+                              ArrayRef<const char *> Output,
+                              bool MarkEOLs = false) {
   SmallVector<const char *, 0> Actual;
   BumpPtrAllocator A;
   StringSaver Saver(A);
-  parse(Input, Saver, Actual, /*MarkEOLs=*/false);
-  EXPECT_EQ(OutputSize, Actual.size());
+  parse(Input, Saver, Actual, MarkEOLs);
+  EXPECT_EQ(Output.size(), Actual.size());
   for (unsigned I = 0, E = Actual.size(); I != E; ++I) {
-    if (I < OutputSize) {
+    if (I < Output.size()) {
       EXPECT_STREQ(Output[I], Actual[I]);
     }
   }
@@ -219,8 +220,7 @@ TEST(CommandLineTest, TokenizeGNUCommandLine) {
   const char *const Output[] = {
       "foo bar",     "foo bar",   "foo bar",          "foo\\bar",
       "-DFOO=bar()", "foobarbaz", "C:\\src\\foo.cpp", "C:srcfoo.cpp"};
-  testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeWindowsCommandLine1) {
@@ -228,75 +228,85 @@ TEST(CommandLineTest, TokenizeWindowsCommandLine1) {
       R"(a\b c\\d e\\"f g" h\"i j\\\"k "lmn" o pqr "st \"u" \v)";
   const char *const Output[] = { "a\\b", "c\\\\d", "e\\f g", "h\"i", "j\\\"k",
                                  "lmn", "o", "pqr", "st \"u", "\\v" };
-  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeWindowsCommandLine2) {
   const char Input[] = "clang -c -DFOO=\"\"\"ABC\"\"\" x.cpp";
   const char *const Output[] = { "clang", "-c", "-DFOO=\"ABC\"", "x.cpp"};
-  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeWindowsCommandLineQuotedLastArgument) {
   const char Input1[] = R"(a b c d "")";
   const char *const Output1[] = {"a", "b", "c", "d", ""};
-  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input1, Output1,
-                           array_lengthof(Output1));
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input1, Output1);
   const char Input2[] = R"(a b c d ")";
   const char *const Output2[] = {"a", "b", "c", "d"};
-  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input2, Output2,
-                           array_lengthof(Output2));
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input2, Output2);
+}
+
+TEST(CommandLineTest, TokenizeAndMarkEOLs) {
+  // Clang uses EOL marking in response files to support options that consume
+  // the rest of the arguments on the current line, but do not consume arguments
+  // from subsequent lines. For example, given these rsp files contents:
+  // /c /Zi /O2
+  // /Oy- /link /debug /opt:ref
+  // /Zc:ThreadsafeStatics-
+  //
+  // clang-cl needs to treat "/debug /opt:ref" as linker flags, and everything
+  // else as compiler flags. The tokenizer inserts nullptr sentinels into the
+  // output so that clang-cl can find the end of the current line.
+  const char Input[] = "clang -Xclang foo\n\nfoo\"bar\"baz\n x.cpp\n";
+  const char *const Output[] = {"clang", "-Xclang", "foo",
+                                nullptr, nullptr,   "foobarbaz",
+                                nullptr, "x.cpp",   nullptr};
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
+                           /*MarkEOLs=*/true);
+  testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output,
+                           /*MarkEOLs=*/true);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile1) {
   const char *Input = "\\";
   const char *const Output[] = { "\\" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile2) {
   const char *Input = "\\abc";
   const char *const Output[] = { "abc" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile3) {
   const char *Input = "abc\\";
   const char *const Output[] = { "abc\\" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile4) {
   const char *Input = "abc\\\n123";
   const char *const Output[] = { "abc123" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile5) {
   const char *Input = "abc\\\r\n123";
   const char *const Output[] = { "abc123" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile6) {
   const char *Input = "abc\\\n";
   const char *const Output[] = { "abc" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile7) {
   const char *Input = "abc\\\r\n";
   const char *const Output[] = { "abc" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile8) {
@@ -318,15 +328,13 @@ TEST(CommandLineTest, TokenizeConfigFile9) {
 TEST(CommandLineTest, TokenizeConfigFile10) {
   const char *Input = "\\\nabc";
   const char *const Output[] = { "abc" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile11) {
   const char *Input = "\\\r\nabc";
   const char *const Output[] = { "abc" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, AliasesWithArguments) {
@@ -962,6 +970,34 @@ TEST(CommandLineTest, ResponseFileRelativePath) {
               testing::Pointwise(StringEquality(), {"test/test", "-flag"}));
 }
 
+TEST(CommandLineTest, ResponseFileEOLs) {
+  vfs::InMemoryFileSystem FS;
+#ifdef _WIN32
+  const char *TestRoot = "C:\\";
+#else
+  const char *TestRoot = "//net";
+#endif
+  FS.setCurrentWorkingDirectory(TestRoot);
+  FS.addFile("eols.rsp", 0,
+             MemoryBuffer::getMemBuffer("-Xclang -Wno-whatever\n input.cpp"));
+  SmallVector<const char *, 2> Argv = {"clang", "@eols.rsp"};
+  BumpPtrAllocator A;
+  StringSaver Saver(A);
+  ASSERT_TRUE(cl::ExpandResponseFiles(Saver, cl::TokenizeWindowsCommandLine,
+                                      Argv, true, true, FS,
+                                      /*CurrentDir=*/StringRef(TestRoot)));
+  const char *Expected[] = {"clang", "-Xclang", "-Wno-whatever", nullptr,
+                            "input.cpp"};
+  ASSERT_EQ(array_lengthof(Expected), Argv.size());
+  for (size_t I = 0, E = array_lengthof(Expected); I < E; ++I) {
+    if (Expected[I] == nullptr) {
+      ASSERT_EQ(Argv[I], nullptr);
+    } else {
+      ASSERT_STREQ(Expected[I], Argv[I]);
+    }
+  }
+}
+
 TEST(CommandLineTest, SetDefautValue) {
   cl::ResetCommandLineParser();
 

From f0e585d58580956996c0add0afc62798e7498177 Mon Sep 17 00:00:00 2001
From: "Kazushi (Jam) Marukawa" <marukawa@nec.com>
Date: Tue, 3 Nov 2020 22:42:10 +0900
Subject: [PATCH 23/31] [VE] Add isReMaterializable and isAsCheapAsAMove flags

Add isReMaterializable and isCheapAsAMove flags to integer instructions
which cost cheap.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D90833
---
 llvm/lib/Target/VE/VEInstrInfo.td           | 32 +++++++++--
 llvm/test/CodeGen/VE/Scalar/br_cc.ll        | 28 ++++-----
 llvm/test/CodeGen/VE/Scalar/call.ll         | 14 ++---
 llvm/test/CodeGen/VE/Scalar/ctlz.ll         | 12 ++--
 llvm/test/CodeGen/VE/Scalar/cttz.ll         | 12 ++--
 llvm/test/CodeGen/VE/Scalar/fp_frem.ll      | 12 ++--
 llvm/test/CodeGen/VE/Scalar/selectccf32c.ll | 13 ++---
 llvm/test/CodeGen/VE/Scalar/selectccf64c.ll | 13 ++---
 llvm/test/CodeGen/VE/Scalar/selectcci32c.ll | 13 ++---
 llvm/test/CodeGen/VE/Scalar/selectcci64c.ll | 13 ++---
 llvm/test/CodeGen/VE/Scalar/va_caller.ll    | 64 +++++++++++----------
 11 files changed, 125 insertions(+), 101 deletions(-)

diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index e0223586bad19d..c4a46928ddabba 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -938,10 +938,11 @@ multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC> {
 }
 
 // Section 8.2.1 - LEA
-let cx = 0, DecoderMethod = "DecodeLoadI64" in
-defm LEA : RMm<"lea", 0x06, I64>;
-let cx = 1, DecoderMethod = "DecodeLoadI64" in
-defm LEASL : RMm<"lea.sl", 0x06, I64>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1,
+    DecoderMethod = "DecodeLoadI64" in {
+  let cx = 0 in defm LEA : RMm<"lea", 0x06, I64>;
+  let cx = 1 in defm LEASL : RMm<"lea.sl", 0x06, I64>;
+}
 
 def : Pat<(iPTR ADDRrri:$addr), (LEArri MEMrri:$addr)>;
 def : Pat<(iPTR ADDRrii:$addr), (LEArii MEMrii:$addr)>;
@@ -1140,6 +1141,8 @@ def SVOB : RR<0x30, (outs), (ins), "svob">;
 // Section 8.4 - Fixed-point Operation Instructions
 //-----------------------------------------------------------------------------
 
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
 // Section 8.4.1 - ADD (Add)
 defm ADDUL : RRm<"addu.l", 0x48, I64, i64>;
 let cx = 1 in defm ADDUW : RRm<"addu.w", 0x48, I32, i32>;
@@ -1162,6 +1165,8 @@ let cx = 1 in defm SUBSWZX : RRNCm<"subs.w.zx", 0x5A, I32, i32>;
 // Section 8.4.6 - SBX (Subtract)
 defm SUBSL : RRNCm<"subs.l", 0x5B, I64, i64, sub>;
 
+} // isReMaterializable, isAsCheapAsAMove
+
 // Section 8.4.7 - MPY (Multiply)
 defm MULUL : RRm<"mulu.l", 0x49, I64, i64>;
 let cx = 1 in defm MULUW : RRm<"mulu.w", 0x49, I32, i32>;
@@ -1187,6 +1192,8 @@ let cx = 1 in defm DIVSWZX : RRNCm<"divs.w.zx", 0x7B, I32, i32>;
 // Section 8.4.13 - DVX (Divide)
 defm DIVSL : RRNCm<"divs.l", 0x7F, I64, i64, sdiv>;
 
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
 // Section 8.4.14 - CMP (Compare)
 defm CMPUL : RRNCm<"cmpu.l", 0x55, I64, i64>;
 let cx = 1 in defm CMPUW : RRNCm<"cmpu.w", 0x55, I32, i32>;
@@ -1209,10 +1216,14 @@ let cx = 1, cw = 1 in defm MINSWZX : RRm<"mins.w.zx", 0x78, I32, i32>;
 defm MAXSL : RRm<"maxs.l", 0x68, I64, i64>;
 let cw = 1 in defm MINSL : RRm<"mins.l", 0x68, I64, i64>;
 
+} // isReMaterializable, isAsCheapAsAMove
+
 //-----------------------------------------------------------------------------
 // Section 8.5 - Logical Operation Instructions
 //-----------------------------------------------------------------------------
 
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
 // Section 8.5.1 - AND (AND)
 defm AND : RRm<"and", 0x44, I64, i64, and>;
 
@@ -1225,9 +1236,12 @@ defm XOR : RRm<"xor", 0x46, I64, i64, xor>;
 // Section 8.5.4 - EQV (Equivalence)
 defm EQV : RRm<"eqv", 0x47, I64, i64>;
 
+} // isReMaterializable, isAsCheapAsAMove
+
 // Section 8.5.5 - NND (Negate AND)
 def and_not : PatFrags<(ops node:$x, node:$y),
                        [(and (not node:$x), node:$y)]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 defm NND : RRNCm<"nnd", 0x54, I64, i64, and_not>;
 
 // Section 8.5.6 - MRG (Merge)
@@ -1237,16 +1251,20 @@ defm MRG : RRMRGm<"mrg", 0x56, I64, i64>;
 def ctlz_pat : PatFrags<(ops node:$src),
                         [(ctlz node:$src),
                          (ctlz_zero_undef node:$src)]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz_pat>;
 
 // Section 8.5.8 - PCNT (Population Count)
 defm PCNT : RRI1m<"pcnt", 0x38, I64, i64, ctpop>;
 
 // Section 8.5.9 - BRV (Bit Reverse)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 defm BRV : RRI1m<"brv", 0x39, I64, i64, bitreverse>;
 
 // Section 8.5.10 - BSWP (Byte Swap)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 defm BSWP : RRSWPm<"bswp", 0x2B, I64, i64>;
+
 def : Pat<(i64 (bswap i64:$src)),
           (BSWPri $src, 0)>;
 def : Pat<(i64 (bswap (i64 mimm:$src))),
@@ -1273,17 +1291,21 @@ def : MnemonicAlias<"cmov.s", "cmov.s.at">;
 //-----------------------------------------------------------------------------
 
 // Section 8.6.1 - SLL (Shift Left Logical)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 defm SLL : RRIm<"sll", 0x65, I64, i64, shl>;
 
 // Section 8.6.2 - SLD (Shift Left Double)
 defm SLD : RRILDm<"sld", 0x64, I64, i64>;
 
 // Section 8.6.3 - SRL (Shift Right Logical)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 defm SRL : RRIm<"srl", 0x75, I64, i64, srl>;
 
 // Section 8.6.4 - SRD (Shift Right Double)
 defm SRD : RRIRDm<"srd", 0x74, I64, i64>;
 
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
 // Section 8.6.5 - SLA (Shift Left Arithmetic)
 defm SLAWSX : RRIm<"sla.w.sx", 0x66, I32, i32, shl>;
 let cx = 1 in defm SLAWZX : RRIm<"sla.w.zx", 0x66, I32, i32>;
@@ -1298,6 +1320,8 @@ let cx = 1 in defm SRAWZX : RRIm<"sra.w.zx", 0x76, I32, i32>;
 // Section 8.6.8 - SRAX (Shift Right Arithmetic)
 defm SRAL : RRIm<"sra.l", 0x77, I64, i64, sra>;
 
+} // isReMaterializable, isAsCheapAsAMove
+
 def : Pat<(i32 (srl i32:$src, (i32 simm7:$val))),
           (EXTRACT_SUBREG (SRLri (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
             $src, sub_i32), !add(32, 64)), imm:$val), sub_i32)>;
diff --git a/llvm/test/CodeGen/VE/Scalar/br_cc.ll b/llvm/test/CodeGen/VE/Scalar/br_cc.ll
index e85069927abe7a..60c91749525a2c 100644
--- a/llvm/test/CodeGen/VE/Scalar/br_cc.ll
+++ b/llvm/test/CodeGen/VE/Scalar/br_cc.ll
@@ -523,7 +523,7 @@ define void @br_cc_i128_imm(i128 %0) {
 ; CHECK:       .LBB{{[0-9]+}}_4:
 ; CHECK-NEXT:    or %s2, 0, (0)1
 ; CHECK-NEXT:    cmps.l %s1, %s1, %s2
-; CHECK-NEXT:    or %s3, 0, %s2
+; CHECK-NEXT:    or %s3, 0, (0)1
 ; CHECK-NEXT:    cmov.l.gt %s3, (63)0, %s1
 ; CHECK-NEXT:    or %s4, 63, (0)1
 ; CHECK-NEXT:    cmpu.l %s0, %s0, %s4
@@ -553,7 +553,7 @@ define void @br_cc_u128_imm(i128 %0) {
 ; CHECK:       .LBB{{[0-9]+}}_4:
 ; CHECK-NEXT:    or %s2, 0, (0)1
 ; CHECK-NEXT:    cmps.l %s1, %s1, %s2
-; CHECK-NEXT:    or %s3, 0, %s2
+; CHECK-NEXT:    or %s3, 0, (0)1
 ; CHECK-NEXT:    cmov.l.ne %s3, (63)0, %s1
 ; CHECK-NEXT:    or %s4, 63, (0)1
 ; CHECK-NEXT:    cmpu.l %s0, %s0, %s4
@@ -856,15 +856,15 @@ define void @br_cc_imm_i128(i128 %0) {
 ; CHECK-LABEL: br_cc_imm_i128:
 ; CHECK:       .LBB{{[0-9]+}}_4:
 ; CHECK-NEXT:    or %s2, -1, (0)1
-; CHECK-NEXT:    or %s3, 0, (0)1
 ; CHECK-NEXT:    cmps.l %s1, %s1, %s2
-; CHECK-NEXT:    or %s2, 0, %s3
-; CHECK-NEXT:    cmov.l.lt %s2, (63)0, %s1
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.l.lt %s3, (63)0, %s1
 ; CHECK-NEXT:    or %s4, -64, (0)1
 ; CHECK-NEXT:    cmpu.l %s0, %s0, %s4
-; CHECK-NEXT:    cmov.l.lt %s3, (63)0, %s0
-; CHECK-NEXT:    cmov.l.eq %s2, %s3, %s1
-; CHECK-NEXT:    brne.w 0, %s2, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:    cmov.l.lt %s2, (63)0, %s0
+; CHECK-NEXT:    cmov.l.eq %s3, %s2, %s1
+; CHECK-NEXT:    brne.w 0, %s3, .LBB{{[0-9]+}}_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
@@ -887,15 +887,15 @@ define void @br_cc_imm_u128(i128 %0) {
 ; CHECK-LABEL: br_cc_imm_u128:
 ; CHECK:       .LBB{{[0-9]+}}_4:
 ; CHECK-NEXT:    or %s2, -1, (0)1
-; CHECK-NEXT:    or %s3, 0, (0)1
 ; CHECK-NEXT:    cmps.l %s1, %s1, %s2
-; CHECK-NEXT:    or %s2, 0, %s3
-; CHECK-NEXT:    cmov.l.ne %s2, (63)0, %s1
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.l.ne %s3, (63)0, %s1
 ; CHECK-NEXT:    or %s4, -64, (0)1
 ; CHECK-NEXT:    cmpu.l %s0, %s0, %s4
-; CHECK-NEXT:    cmov.l.lt %s3, (63)0, %s0
-; CHECK-NEXT:    cmov.l.eq %s2, %s3, %s1
-; CHECK-NEXT:    brne.w 0, %s2, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:    cmov.l.lt %s2, (63)0, %s0
+; CHECK-NEXT:    cmov.l.eq %s3, %s2, %s1
+; CHECK-NEXT:    brne.w 0, %s3, .LBB{{[0-9]+}}_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
diff --git a/llvm/test/CodeGen/VE/Scalar/call.ll b/llvm/test/CodeGen/VE/Scalar/call.ll
index e0049e11772cce..9d949919dc80a0 100644
--- a/llvm/test/CodeGen/VE/Scalar/call.ll
+++ b/llvm/test/CodeGen/VE/Scalar/call.ll
@@ -50,9 +50,10 @@ define i32 @stack_call_int_szext() {
 ; CHECK-NEXT:    or %s0, -1, (0)1
 ; CHECK-NEXT:    st %s0, 248(, %s11)
 ; CHECK-NEXT:    lea %s34, 65535
-; CHECK-NEXT:    lea %s1, stack_callee_int_szext@lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, stack_callee_int_szext@hi(, %s1)
+; CHECK-NEXT:    lea %s0, stack_callee_int_szext@lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, stack_callee_int_szext@hi(, %s0)
+; CHECK-NEXT:    or %s0, -1, (0)1
 ; CHECK-NEXT:    lea %s1, 255
 ; CHECK-NEXT:    or %s2, 3, (0)1
 ; CHECK-NEXT:    or %s3, 4, (0)1
@@ -73,6 +74,9 @@ define float @stack_call_float() {
 ; CHECK-NEXT:    lea.sl %s0, 1092616192
 ; CHECK-NEXT:    st %s0, 248(, %s11)
 ; CHECK-NEXT:    lea.sl %s34, 1091567616
+; CHECK-NEXT:    lea %s0, stack_callee_float@lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, stack_callee_float@hi(, %s0)
 ; CHECK-NEXT:    lea.sl %s0, 1065353216
 ; CHECK-NEXT:    lea.sl %s1, 1073741824
 ; CHECK-NEXT:    lea.sl %s2, 1077936128
@@ -81,9 +85,6 @@ define float @stack_call_float() {
 ; CHECK-NEXT:    lea.sl %s5, 1086324736
 ; CHECK-NEXT:    lea.sl %s6, 1088421888
 ; CHECK-NEXT:    lea.sl %s7, 1090519040
-; CHECK-NEXT:    lea %s35, stack_callee_float@lo
-; CHECK-NEXT:    and %s35, %s35, (32)0
-; CHECK-NEXT:    lea.sl %s12, stack_callee_float@hi(, %s35)
 ; CHECK-NEXT:    st %s34, 240(, %s11)
 ; CHECK-NEXT:    bsic %s10, (, %s12)
 ; CHECK-NEXT:    or %s11, 0, %s9
@@ -111,4 +112,3 @@ define float @stack_call_float2(float %p0) {
   %r = tail call float @stack_callee_float(float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0)
   ret float %r
 }
-
diff --git a/llvm/test/CodeGen/VE/Scalar/ctlz.ll b/llvm/test/CodeGen/VE/Scalar/ctlz.ll
index 6be8accf1343f1..37e8648048180b 100644
--- a/llvm/test/CodeGen/VE/Scalar/ctlz.ll
+++ b/llvm/test/CodeGen/VE/Scalar/ctlz.ll
@@ -10,12 +10,12 @@ define i128 @func128(i128 %p){
 ; CHECK-LABEL: func128:
 ; CHECK:       .LBB{{[0-9]+}}_2:
 ; CHECK-NEXT:    or %s2, 0, (0)1
-; CHECK-NEXT:    cmps.l %s3, %s1, %s2
+; CHECK-NEXT:    cmps.l %s2, %s1, %s2
 ; CHECK-NEXT:    ldz %s1, %s1
 ; CHECK-NEXT:    ldz %s0, %s0
 ; CHECK-NEXT:    lea %s0, 64(, %s0)
-; CHECK-NEXT:    cmov.l.ne %s0, %s1, %s3
-; CHECK-NEXT:    or %s1, 0, %s2
+; CHECK-NEXT:    cmov.l.ne %s0, %s1, %s2
+; CHECK-NEXT:    or %s1, 0, (0)1
 ; CHECK-NEXT:    or %s11, 0, %s9
   %r = tail call i128 @llvm.ctlz.i128(i128 %p, i1 true)
   ret i128 %r
@@ -180,12 +180,12 @@ define i128 @func128x(i128 %p){
 ; CHECK-LABEL: func128x:
 ; CHECK:       .LBB{{[0-9]+}}_2:
 ; CHECK-NEXT:    or %s2, 0, (0)1
-; CHECK-NEXT:    cmps.l %s3, %s1, %s2
+; CHECK-NEXT:    cmps.l %s2, %s1, %s2
 ; CHECK-NEXT:    ldz %s1, %s1
 ; CHECK-NEXT:    ldz %s0, %s0
 ; CHECK-NEXT:    lea %s0, 64(, %s0)
-; CHECK-NEXT:    cmov.l.ne %s0, %s1, %s3
-; CHECK-NEXT:    or %s1, 0, %s2
+; CHECK-NEXT:    cmov.l.ne %s0, %s1, %s2
+; CHECK-NEXT:    or %s1, 0, (0)1
 ; CHECK-NEXT:    or %s11, 0, %s9
   %r = tail call i128 @llvm.ctlz.i128(i128 %p, i1 false)
   ret i128 %r
diff --git a/llvm/test/CodeGen/VE/Scalar/cttz.ll b/llvm/test/CodeGen/VE/Scalar/cttz.ll
index f99bc3f76bb867..c600e3751b8cab 100644
--- a/llvm/test/CodeGen/VE/Scalar/cttz.ll
+++ b/llvm/test/CodeGen/VE/Scalar/cttz.ll
@@ -10,16 +10,16 @@ define i128 @func128(i128 %p) {
 ; CHECK-LABEL: func128:
 ; CHECK:       .LBB{{[0-9]+}}_2:
 ; CHECK-NEXT:    or %s2, 0, (0)1
-; CHECK-NEXT:    cmps.l %s3, %s0, %s2
-; CHECK-NEXT:    lea %s4, -1(, %s0)
-; CHECK-NEXT:    nnd %s0, %s0, %s4
-; CHECK-NEXT:    pcnt %s4, %s0
+; CHECK-NEXT:    cmps.l %s2, %s0, %s2
+; CHECK-NEXT:    lea %s3, -1(, %s0)
+; CHECK-NEXT:    nnd %s0, %s0, %s3
+; CHECK-NEXT:    pcnt %s3, %s0
 ; CHECK-NEXT:    lea %s0, -1(, %s1)
 ; CHECK-NEXT:    nnd %s0, %s1, %s0
 ; CHECK-NEXT:    pcnt %s0, %s0
 ; CHECK-NEXT:    lea %s0, 64(, %s0)
-; CHECK-NEXT:    cmov.l.ne %s0, %s4, %s3
-; CHECK-NEXT:    or %s1, 0, %s2
+; CHECK-NEXT:    cmov.l.ne %s0, %s3, %s2
+; CHECK-NEXT:    or %s1, 0, (0)1
 ; CHECK-NEXT:    or %s11, 0, %s9
   %r = tail call i128 @llvm.cttz.i128(i128 %p, i1 true)
   ret i128 %r
diff --git a/llvm/test/CodeGen/VE/Scalar/fp_frem.ll b/llvm/test/CodeGen/VE/Scalar/fp_frem.ll
index 3906c67dc333df..2a3c119810049b 100644
--- a/llvm/test/CodeGen/VE/Scalar/fp_frem.ll
+++ b/llvm/test/CodeGen/VE/Scalar/fp_frem.ll
@@ -75,10 +75,10 @@ define float @frem_float_zero(float %0) {
 ; CHECK-LABEL: frem_float_zero:
 ; CHECK:       .LBB{{[0-9]+}}_2:
 ; CHECK-NEXT:    or %s1, 0, %s0
+; CHECK-NEXT:    lea %s0, fmodf@lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, fmodf@hi(, %s0)
 ; CHECK-NEXT:    lea.sl %s0, 0
-; CHECK-NEXT:    lea %s2, fmodf@lo
-; CHECK-NEXT:    and %s2, %s2, (32)0
-; CHECK-NEXT:    lea.sl %s12, fmodf@hi(, %s2)
 ; CHECK-NEXT:    bsic %s10, (, %s12)
 ; CHECK-NEXT:    or %s11, 0, %s9
   %2 = frem float 0.000000e+00, %0
@@ -125,10 +125,10 @@ define float @frem_float_cont(float %0) {
 ; CHECK-LABEL: frem_float_cont:
 ; CHECK:       .LBB{{[0-9]+}}_2:
 ; CHECK-NEXT:    or %s1, 0, %s0
+; CHECK-NEXT:    lea %s0, fmodf@lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, fmodf@hi(, %s0)
 ; CHECK-NEXT:    lea.sl %s0, -1073741824
-; CHECK-NEXT:    lea %s2, fmodf@lo
-; CHECK-NEXT:    and %s2, %s2, (32)0
-; CHECK-NEXT:    lea.sl %s12, fmodf@hi(, %s2)
 ; CHECK-NEXT:    bsic %s10, (, %s12)
 ; CHECK-NEXT:    or %s11, 0, %s9
   %2 = frem float -2.000000e+00, %0
diff --git a/llvm/test/CodeGen/VE/Scalar/selectccf32c.ll b/llvm/test/CodeGen/VE/Scalar/selectccf32c.ll
index c30eba96bf5f66..2916a67f7cef53 100644
--- a/llvm/test/CodeGen/VE/Scalar/selectccf32c.ll
+++ b/llvm/test/CodeGen/VE/Scalar/selectccf32c.ll
@@ -59,15 +59,15 @@ define float @selectccsgti64(i64, i64, float, float) {
 define float @selectccsgti128(i128, i128, float, float) {
 ; CHECK-LABEL: selectccsgti128:
 ; CHECK:       .LBB{{[0-9]+}}_2:
-; CHECK-NEXT:    or %s6, 0, (0)1
 ; CHECK-NEXT:    cmps.l %s1, %s1, %s3
-; CHECK-NEXT:    or %s3, 0, %s6
-; CHECK-NEXT:    cmov.l.gt %s3, (63)0, %s1
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    or %s6, 0, (0)1
+; CHECK-NEXT:    cmov.l.gt %s6, (63)0, %s1
 ; CHECK-NEXT:    cmpu.l %s0, %s0, %s2
-; CHECK-NEXT:    or %s2, 0, %s6
+; CHECK-NEXT:    or %s2, 0, (0)1
 ; CHECK-NEXT:    cmov.l.gt %s2, (63)0, %s0
-; CHECK-NEXT:    cmov.l.eq %s3, %s2, %s1
-; CHECK-NEXT:    cmps.w.sx %s0, %s3, %s6
+; CHECK-NEXT:    cmov.l.eq %s6, %s2, %s1
+; CHECK-NEXT:    cmps.w.sx %s0, %s6, %s3
 ; CHECK-NEXT:    cmov.w.ne %s5, %s4, %s0
 ; CHECK-NEXT:    or %s0, 0, %s5
 ; CHECK-NEXT:    or %s11, 0, %s9
@@ -99,4 +99,3 @@ define float @selectccogtf64(double, double, float, float) {
   %6 = select i1 %5, float %2, float %3
   ret float %6
 }
-
diff --git a/llvm/test/CodeGen/VE/Scalar/selectccf64c.ll b/llvm/test/CodeGen/VE/Scalar/selectccf64c.ll
index 4f113edbed5f7d..65cb66f2b95c2a 100644
--- a/llvm/test/CodeGen/VE/Scalar/selectccf64c.ll
+++ b/llvm/test/CodeGen/VE/Scalar/selectccf64c.ll
@@ -59,15 +59,15 @@ define double @selectccsgti64(i64, i64, double, double) {
 define double @selectccsgti128(i128, i128, double, double) {
 ; CHECK-LABEL: selectccsgti128:
 ; CHECK:       .LBB{{[0-9]+}}_2:
-; CHECK-NEXT:    or %s6, 0, (0)1
 ; CHECK-NEXT:    cmps.l %s1, %s1, %s3
-; CHECK-NEXT:    or %s3, 0, %s6
-; CHECK-NEXT:    cmov.l.gt %s3, (63)0, %s1
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    or %s6, 0, (0)1
+; CHECK-NEXT:    cmov.l.gt %s6, (63)0, %s1
 ; CHECK-NEXT:    cmpu.l %s0, %s0, %s2
-; CHECK-NEXT:    or %s2, 0, %s6
+; CHECK-NEXT:    or %s2, 0, (0)1
 ; CHECK-NEXT:    cmov.l.gt %s2, (63)0, %s0
-; CHECK-NEXT:    cmov.l.eq %s3, %s2, %s1
-; CHECK-NEXT:    cmps.w.sx %s0, %s3, %s6
+; CHECK-NEXT:    cmov.l.eq %s6, %s2, %s1
+; CHECK-NEXT:    cmps.w.sx %s0, %s6, %s3
 ; CHECK-NEXT:    cmov.w.ne %s5, %s4, %s0
 ; CHECK-NEXT:    or %s0, 0, %s5
 ; CHECK-NEXT:    or %s11, 0, %s9
@@ -99,4 +99,3 @@ define double @selectccogtf64(double, double, double, double) {
   %6 = select i1 %5, double %2, double %3
   ret double %6
 }
-
diff --git a/llvm/test/CodeGen/VE/Scalar/selectcci32c.ll b/llvm/test/CodeGen/VE/Scalar/selectcci32c.ll
index 982259c2bc8929..ecbce09afd91ea 100644
--- a/llvm/test/CodeGen/VE/Scalar/selectcci32c.ll
+++ b/llvm/test/CodeGen/VE/Scalar/selectcci32c.ll
@@ -59,15 +59,15 @@ define i32 @selectccsgti64(i64, i64, i32, i32) {
 define i32 @selectccsgti128(i128, i128, i32, i32) {
 ; CHECK-LABEL: selectccsgti128:
 ; CHECK:       .LBB{{[0-9]+}}_2:
-; CHECK-NEXT:    or %s6, 0, (0)1
 ; CHECK-NEXT:    cmps.l %s1, %s1, %s3
-; CHECK-NEXT:    or %s3, 0, %s6
-; CHECK-NEXT:    cmov.l.gt %s3, (63)0, %s1
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    or %s6, 0, (0)1
+; CHECK-NEXT:    cmov.l.gt %s6, (63)0, %s1
 ; CHECK-NEXT:    cmpu.l %s0, %s0, %s2
-; CHECK-NEXT:    or %s2, 0, %s6
+; CHECK-NEXT:    or %s2, 0, (0)1
 ; CHECK-NEXT:    cmov.l.gt %s2, (63)0, %s0
-; CHECK-NEXT:    cmov.l.eq %s3, %s2, %s1
-; CHECK-NEXT:    cmps.w.sx %s0, %s3, %s6
+; CHECK-NEXT:    cmov.l.eq %s6, %s2, %s1
+; CHECK-NEXT:    cmps.w.sx %s0, %s6, %s3
 ; CHECK-NEXT:    cmov.w.ne %s5, %s4, %s0
 ; CHECK-NEXT:    or %s0, 0, %s5
 ; CHECK-NEXT:    or %s11, 0, %s9
@@ -99,4 +99,3 @@ define i32 @selectccogtf64(double, double, i32, i32) {
   %6 = select i1 %5, i32 %2, i32 %3
   ret i32 %6
 }
-
diff --git a/llvm/test/CodeGen/VE/Scalar/selectcci64c.ll b/llvm/test/CodeGen/VE/Scalar/selectcci64c.ll
index 7a91bfe10f8862..112978969cdbd7 100644
--- a/llvm/test/CodeGen/VE/Scalar/selectcci64c.ll
+++ b/llvm/test/CodeGen/VE/Scalar/selectcci64c.ll
@@ -59,15 +59,15 @@ define i64 @selectccsgti64(i64, i64, i64, i64) {
 define i64 @selectccsgti128(i128, i128, i64, i64) {
 ; CHECK-LABEL: selectccsgti128:
 ; CHECK:       .LBB{{[0-9]+}}_2:
-; CHECK-NEXT:    or %s6, 0, (0)1
 ; CHECK-NEXT:    cmps.l %s1, %s1, %s3
-; CHECK-NEXT:    or %s3, 0, %s6
-; CHECK-NEXT:    cmov.l.gt %s3, (63)0, %s1
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    or %s6, 0, (0)1
+; CHECK-NEXT:    cmov.l.gt %s6, (63)0, %s1
 ; CHECK-NEXT:    cmpu.l %s0, %s0, %s2
-; CHECK-NEXT:    or %s2, 0, %s6
+; CHECK-NEXT:    or %s2, 0, (0)1
 ; CHECK-NEXT:    cmov.l.gt %s2, (63)0, %s0
-; CHECK-NEXT:    cmov.l.eq %s3, %s2, %s1
-; CHECK-NEXT:    cmps.w.sx %s0, %s3, %s6
+; CHECK-NEXT:    cmov.l.eq %s6, %s2, %s1
+; CHECK-NEXT:    cmps.w.sx %s0, %s6, %s3
 ; CHECK-NEXT:    cmov.w.ne %s5, %s4, %s0
 ; CHECK-NEXT:    or %s0, 0, %s5
 ; CHECK-NEXT:    or %s11, 0, %s9
@@ -99,4 +99,3 @@ define i64 @selectccogtf64(double, double, i64, i64) {
   %6 = select i1 %5, i64 %2, i64 %3
   ret i64 %6
 }
-
diff --git a/llvm/test/CodeGen/VE/Scalar/va_caller.ll b/llvm/test/CodeGen/VE/Scalar/va_caller.ll
index 1211fe50c539ea..931b6bdab49350 100644
--- a/llvm/test/CodeGen/VE/Scalar/va_caller.ll
+++ b/llvm/test/CodeGen/VE/Scalar/va_caller.ll
@@ -5,44 +5,48 @@ declare i32 @func(i32, ...)
 define i32 @caller() {
 ; CHECK-LABEL: caller:
 ; CHECK:       .LBB{{[0-9]+}}_2:
-; CHECK-NEXT:    st %s18, 48(, %s9) # 8-byte Folded Spill
-; CHECK-NEXT:    or %s18, 0, (0)1
-; CHECK-NEXT:    st %s18, 264(, %s11)
-; CHECK-NEXT:    or %s0, 10, (0)1
-; CHECK-NEXT:    st %s0, 256(, %s11)
-; CHECK-NEXT:    lea.sl %s0, 1075970048
-; CHECK-NEXT:    st %s0, 248(, %s11)
-; CHECK-NEXT:    or %s0, 8, (0)1
-; CHECK-NEXT:    st %s0, 240(, %s11)
-; CHECK-NEXT:    st %s18, 232(, %s11)
-; CHECK-NEXT:    or %s5, 5, (0)1
-; CHECK-NEXT:    st %s5, 216(, %s11)
-; CHECK-NEXT:    or %s4, 4, (0)1
-; CHECK-NEXT:    st %s4, 208(, %s11)
-; CHECK-NEXT:    or %s3, 3, (0)1
-; CHECK-NEXT:    st %s3, 200(, %s11)
-; CHECK-NEXT:    or %s2, 2, (0)1
-; CHECK-NEXT:    st %s2, 192(, %s11)
+; CHECK-NEXT:    or %s0, 0, (0)1
+; CHECK-NEXT:    st %s0, 264(, %s11)
+; CHECK-NEXT:    or %s1, 10, (0)1
+; CHECK-NEXT:    st %s1, 256(, %s11)
+; CHECK-NEXT:    lea.sl %s1, 1075970048
+; CHECK-NEXT:    st %s1, 248(, %s11)
+; CHECK-NEXT:    or %s1, 8, (0)1
+; CHECK-NEXT:    st %s1, 240(, %s11)
+; CHECK-NEXT:    st %s0, 232(, %s11)
+; CHECK-NEXT:    or %s1, 5, (0)1
+; CHECK-NEXT:    st %s1, 216(, %s11)
+; CHECK-NEXT:    or %s1, 4, (0)1
+; CHECK-NEXT:    st %s1, 208(, %s11)
+; CHECK-NEXT:    or %s1, 3, (0)1
+; CHECK-NEXT:    st %s1, 200(, %s11)
+; CHECK-NEXT:    or %s1, 2, (0)1
+; CHECK-NEXT:    st %s1, 192(, %s11)
 ; CHECK-NEXT:    or %s1, 1, (0)1
 ; CHECK-NEXT:    st %s1, 184(, %s11)
-; CHECK-NEXT:    lea %s0, .LCPI{{[0-9]+}}_0@lo
-; CHECK-NEXT:    and %s0, %s0, (32)0
-; CHECK-NEXT:    lea.sl %s0, .LCPI{{[0-9]+}}_0@hi(, %s0)
-; CHECK-NEXT:    ld %s34, 8(, %s0)
-; CHECK-NEXT:    ld %s35, (, %s0)
-; CHECK-NEXT:    st %s18, 176(, %s11)
-; CHECK-NEXT:    lea.sl %s6, 1086324736
-; CHECK-NEXT:    st %s6, 224(, %s11)
+; CHECK-NEXT:    lea %s1, .LCPI{{[0-9]+}}_0@lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, .LCPI{{[0-9]+}}_0@hi(, %s1)
+; CHECK-NEXT:    ld %s34, 8(, %s1)
+; CHECK-NEXT:    ld %s35, (, %s1)
+; CHECK-NEXT:    st %s0, 176(, %s11)
+; CHECK-NEXT:    lea.sl %s0, 1086324736
+; CHECK-NEXT:    st %s0, 224(, %s11)
 ; CHECK-NEXT:    st %s34, 280(, %s11)
 ; CHECK-NEXT:    lea %s0, func@lo
 ; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    lea.sl %s12, func@hi(, %s0)
+; CHECK-NEXT:    or %s0, 0, (0)1
+; CHECK-NEXT:    or %s1, 1, (0)1
+; CHECK-NEXT:    or %s2, 2, (0)1
+; CHECK-NEXT:    or %s3, 3, (0)1
+; CHECK-NEXT:    or %s4, 4, (0)1
+; CHECK-NEXT:    or %s5, 5, (0)1
+; CHECK-NEXT:    lea.sl %s6, 1086324736
+; CHECK-NEXT:    or %s7, 0, (0)1
 ; CHECK-NEXT:    st %s35, 272(, %s11)
-; CHECK-NEXT:    or %s0, 0, %s18
-; CHECK-NEXT:    or %s7, 0, %s18
 ; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    or %s0, 0, %s18
-; CHECK-NEXT:    ld %s18, 48(, %s9) # 8-byte Folded Reload
+; CHECK-NEXT:    or %s0, 0, (0)1
 ; CHECK-NEXT:    or %s11, 0, %s9
   call i32 (i32, ...) @func(i32 0, i16 1, i8 2, i32 3, i16 4, i8 5, float 6.0, i8* null, i64 8, double 9.0, i128 10, fp128 0xLA000000000000000)
   ret i32 0

From 71e108cd86e70b06c5fa3a63689dcb3555c3d13f Mon Sep 17 00:00:00 2001
From: Alexandre Rames <arames@apple.com>
Date: Thu, 5 Nov 2020 12:07:28 -0800
Subject: [PATCH 24/31] Allow searching for prebuilt implicit modules.

The behavior is controlled by the `-fprebuilt-implicit-modules` option, and
allows searching for implicit modules in the prebuilt module cache paths.

The current command-line options for prebuilt modules do not allow to easily
maintain and use multiple versions of modules. Both the producer and users of
prebuilt modules are required to know the relationships between compilation
options and module file paths. Using a particular version of a prebuilt module
requires passing a particular option on the command line (e.g.
`-fmodule-file=[<name>=]<file>` or `-fprebuilt-module-path=<directory>`).

However the compiler already knows how to distinguish and automatically locate
implicit modules. Hence this proposal to introduce the
`-fprebuilt-implicit-modules` option. When set, it enables searching for
implicit modules in the prebuilt module paths (specified via
`-fprebuilt-module-path`). To not modify existing behavior, this search takes
place after the standard search for prebuilt modules. If not

Here is a workflow illustrating how both the producer and consumer of prebuilt
modules would need to know what versions of prebuilt modules are available and
where they are located.

  clang -cc1 -x c modulemap -fmodules -emit-module -fmodule-name=foo -fmodules-cache-path=prebuilt_modules_v1 <config 1 options>
  clang -cc1 -x c modulemap -fmodules -emit-module -fmodule-name=foo -fmodules-cache-path=prebuilt_modules_v2 <config 2 options>
  clang -cc1 -x c modulemap -fmodules -emit-module -fmodule-name=foo -fmodules-cache-path=prebuilt_modules_v3 <config 3 options>

  clang -cc1 -x c use.c -fmodules fmodule-map-file=modulemap -fprebuilt-module-path=prebuilt_modules_v1 <config 1 options>
  clang -cc1 -x c use.c -fmodules fmodule-map-file=modulemap <non-prebuilt config options>

With prebuilt implicit modules, the producer can generate prebuilt modules as
usual, all in the same output directory. The same mechanisms as for implicit
modules take care of incorporating hashes in the path to distinguish between
module versions.

Note that we do not specify the output module filename, so `-o` implicit modules are generated in the cache path `prebuilt_modules`.

  clang -cc1 -x c modulemap -fmodules -emit-module -fmodule-name=foo -fmodules-cache-path=prebuilt_modules <config 1 options>
  clang -cc1 -x c modulemap -fmodules -emit-module -fmodule-name=foo -fmodules-cache-path=prebuilt_modules <config 2 options>
  clang -cc1 -x c modulemap -fmodules -emit-module -fmodule-name=foo -fmodules-cache-path=prebuilt_modules <config 3 options>

The user can now simply enable prebuilt implicit modules and point to the
prebuilt modules cache. No need to "parse" command-line options to decide
what prebuilt modules (paths) to use.

  clang -cc1 -x c use.c -fmodules fmodule-map-file=modulemap -fprebuilt-module-path=prebuilt_modules -fprebuilt-implicit-modules <config 1 options>
  clang -cc1 -x c use.c -fmodules fmodule-map-file=modulemap -fprebuilt-module-path=prebuilt_modules -fprebuilt-implicit-modules <non-prebuilt config options>

This is for example particularly useful in a use-case where compilation is
expensive, and the configurations expected to be used are predictable, but not
controlled by the producer of prebuilt modules. Modules for the set of
predictable configurations can be prebuilt, and using them does not require
"parsing" the configuration (command-line options).

Reviewed By: Bigcheese

Differential Revision: https://reviews.llvm.org/D68997
---
 clang/docs/Modules.rst                        | 122 ++++++++++++++++++
 clang/include/clang/Driver/Options.td         |   3 +
 .../include/clang/Frontend/CompilerInstance.h |   5 +-
 clang/include/clang/Lex/HeaderSearch.h        |  34 +++++
 clang/include/clang/Lex/HeaderSearchOptions.h |   9 +-
 clang/lib/Driver/ToolChains/Clang.cpp         |   2 +
 clang/lib/Frontend/CompilerInstance.cpp       |  18 ++-
 clang/lib/Frontend/CompilerInvocation.cpp     |   2 +
 clang/lib/Lex/HeaderSearch.cpp                |  29 ++++-
 clang/lib/Serialization/ASTReader.cpp         |   1 +
 clang/lib/Serialization/ASTWriter.cpp         |   1 +
 .../Inputs/prebuilt-implicit-module/a.h       |   1 +
 .../prebuilt-implicit-module/module.modulemap |   1 +
 .../test/Modules/prebuilt-implicit-modules.m  |  35 +++++
 14 files changed, 251 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/Modules/Inputs/prebuilt-implicit-module/a.h
 create mode 100644 clang/test/Modules/Inputs/prebuilt-implicit-module/module.modulemap
 create mode 100644 clang/test/Modules/prebuilt-implicit-modules.m

diff --git a/clang/docs/Modules.rst b/clang/docs/Modules.rst
index 63f09f90fe6ccf..703ba86c68a05c 100644
--- a/clang/docs/Modules.rst
+++ b/clang/docs/Modules.rst
@@ -225,6 +225,11 @@ Command-line parameters
 ``-fprebuilt-module-path=<directory>``
   Specify the path to the prebuilt modules. If specified, we will look for modules in this directory for a given top-level module name. We don't need a module map for loading prebuilt modules in this directory and the compiler will not try to rebuild these modules. This can be specified multiple times.
 
+``-fprebuilt-implicit-modules``
+  Enable prebuilt implicit modules. If a prebuilt module is not found in the
+  prebuilt modules paths (specified via ``-fprebuilt-module-path``), we will
+  look for a matching implicit module in the prebuilt modules paths.
+
 -cc1 Options
 ~~~~~~~~~~~~
 
@@ -235,6 +240,123 @@ Command-line parameters
   being built if the command line arguments are not homogeneous across your
   build.
 
+Using Prebuilt Modules
+----------------------
+
+Below are a few examples illustrating uses of prebuilt modules via the different options.
+
+First, let's set up files for our examples.
+
+.. code-block:: c
+
+  /* A.h */
+  #ifdef ENABLE_A
+  void a() {}
+  #endif
+
+.. code-block:: c
+
+  /* B.h */
+  #include "A.h"
+
+.. code-block:: c
+
+  /* use.c */
+  #include "B.h"
+  void use() {
+  #ifdef ENABLE_A
+    a();
+  #endif
+  }
+
+.. code-block:: c
+
+  /* module.modulemap */
+  module A {
+    header "A.h"
+  }
+  module B {
+    header "B.h"
+    export *
+  }
+
+In the examples below, the compilation of ``use.c`` can be done without ``-cc1``, but the commands used to prebuild the modules would need to be updated to take into account the default options passed to ``clang -cc1``. (See ``clang use.c -v``)
+Note also that, since we use ``-cc1``, we specify the ``-fmodule-map-file=`` or ``-fimplicit-module-maps`` options explicitly. When using the clang driver, ``-fimplicit-module-maps`` is implied by ``-fmodules``.
+
+First let us use an explicit mapping from modules to files.
+
+.. code-block:: sh
+
+  rm -rf prebuilt ; mkdir prebuilt
+  clang -cc1 -emit-module -o prebuilt/A.pcm -fmodules module.modulemap -fmodule-name=A
+  clang -cc1 -emit-module -o prebuilt/B.pcm -fmodules module.modulemap -fmodule-name=B -fmodule-file=A=prebuilt/A.pcm
+  clang -cc1 -emit-obj use.c -fmodules -fmodule-map-file=module.modulemap -fmodule-file=A=prebuilt/A.pcm -fmodule-file=B=prebuilt/B.pcm
+
+Instead of of specifying the mappings manually, it can be convenient to use the ``-fprebuilt-module-path`` option. Let's also use ``-fimplicit-module-maps`` instead of manually pointing to our module map.
+
+.. code-block:: sh
+
+  rm -rf prebuilt; mkdir prebuilt
+  clang -cc1 -emit-module -o prebuilt/A.pcm -fmodules module.modulemap -fmodule-name=A
+  clang -cc1 -emit-module -o prebuilt/B.pcm -fmodules module.modulemap -fmodule-name=B -fprebuilt-module-path=prebuilt
+  clang -cc1 -emit-obj use.c -fmodules -fimplicit-module-maps -fprebuilt-module-path=prebuilt
+
+A trick to prebuild all modules required for our source file in one command is to generate implicit modules while using the ``-fdisable-module-hash`` option.
+
+.. code-block:: sh
+
+  rm -rf prebuilt ; mkdir prebuilt
+  clang -cc1 -emit-obj use.c -fmodules -fimplicit-module-maps -fmodules-cache-path=prebuilt -fdisable-module-hash
+  ls prebuilt/*.pcm
+  # prebuilt/A.pcm  prebuilt/B.pcm
+
+Note that with explicit or prebuilt modules, we are responsible for, and should be particularly careful about the compatibility of our modules.
+Using mismatching compilation options and modules may lead to issues.
+
+.. code-block:: sh
+
+  clang -cc1 -emit-obj use.c -fmodules -fimplicit-module-maps -fprebuilt-module-path=prebuilt -DENABLE_A
+  # use.c:4:10: warning: implicit declaration of function 'a' is invalid in C99 [-Wimplicit-function-declaration]
+  #   return a(x);
+  #          ^
+  # 1 warning generated.
+
+So we need to maintain multiple versions of prebuilt modules. We can do so using a manual module mapping, or pointing to a different prebuilt module cache path. For example:
+
+.. code-block:: sh
+
+  rm -rf prebuilt ; mkdir prebuilt ; rm -rf prebuilt_a ; mkdir prebuilt_a
+  clang -cc1 -emit-obj use.c -fmodules -fimplicit-module-maps -fmodules-cache-path=prebuilt -fdisable-module-hash
+  clang -cc1 -emit-obj use.c -fmodules -fimplicit-module-maps -fmodules-cache-path=prebuilt_a -fdisable-module-hash -DENABLE_A
+  clang -cc1 -emit-obj use.c -fmodules -fimplicit-module-maps -fprebuilt-module-path=prebuilt
+  clang -cc1 -emit-obj use.c -fmodules -fimplicit-module-maps -fprebuilt-module-path=prebuilt_a -DENABLE_A
+
+
+Instead of managing the different module versions manually, we can build implicit modules in a given cache path (using ``-fmodules-cache-path``), and reuse them as prebuilt implicit modules by passing ``-fprebuilt-module-path`` and ``-fprebuilt-implicit-modules``.
+
+.. code-block:: sh
+
+  rm -rf prebuilt; mkdir prebuilt
+  clang -cc1 -emit-obj -o use.o use.c -fmodules -fimplicit-module-maps -fmodules-cache-path=prebuilt
+  clang -cc1 -emit-obj -o use.o use.c -fmodules -fimplicit-module-maps -fmodules-cache-path=prebuilt -DENABLE_A
+  find prebuilt -name "*.pcm"
+  # prebuilt/1AYBIGPM8R2GA/A-3L1K4LUA6O31.pcm
+  # prebuilt/1AYBIGPM8R2GA/B-3L1K4LUA6O31.pcm
+  # prebuilt/VH0YZMF1OIRK/A-3L1K4LUA6O31.pcm
+  # prebuilt/VH0YZMF1OIRK/B-3L1K4LUA6O31.pcm
+  clang -cc1 -emit-obj -o use.o use.c -fmodules -fimplicit-module-maps -fprebuilt-module-path=prebuilt -fprebuilt-implicit-modules
+  clang -cc1 -emit-obj -o use.o use.c -fmodules -fimplicit-module-maps -fprebuilt-module-path=prebuilt -fprebuilt-implicit-modules -DENABLE_A
+
+Finally we want to allow implicit modules for configurations that were not prebuilt. When using the clang driver a module cache path is implicitly selected. Using ``-cc1``, we simply add use the ``-fmodules-cache-path`` option.
+
+.. code-block:: sh
+
+  clang -cc1 -emit-obj -o use.o use.c -fmodules -fimplicit-module-maps -fprebuilt-module-path=prebuilt -fprebuilt-implicit-modules -fmodules-cache-path=cache
+  clang -cc1 -emit-obj -o use.o use.c -fmodules -fimplicit-module-maps -fprebuilt-module-path=prebuilt -fprebuilt-implicit-modules -fmodules-cache-path=cache -DENABLE_A
+  clang -cc1 -emit-obj -o use.o use.c -fmodules -fimplicit-module-maps -fprebuilt-module-path=prebuilt -fprebuilt-implicit-modules -fmodules-cache-path=cache -DENABLE_A -DOTHER_OPTIONS
+
+This way, a single directory containing multiple variants of modules can be prepared and reused. The options configuring the module cache are independent of other options.
+
 Module Semantics
 ================
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 33cfa72c0888c8..80d0cf1e077878 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1439,6 +1439,9 @@ def fmodules_user_build_path : Separate<["-"], "fmodules-user-build-path">, Grou
 def fprebuilt_module_path : Joined<["-"], "fprebuilt-module-path=">, Group<i_Group>,
   Flags<[NoXarchOption, CC1Option]>, MetaVarName<"<directory>">,
   HelpText<"Specify the prebuilt module path">;
+def fprebuilt_implicit_modules : Flag <["-"], "fprebuilt-implicit-modules">, Group<f_Group>,
+  Flags<[NoXarchOption, CC1Option]>,
+  HelpText<"Look up implicit modules in the prebuilt module path">;
 def fmodules_prune_interval : Joined<["-"], "fmodules-prune-interval=">, Group<i_Group>,
   Flags<[CC1Option]>, MetaVarName<"<seconds>">,
   HelpText<"Specify the interval (in seconds) between attempts to prune the module cache">;
diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index cb935becaef190..4fc002c6f38310 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -649,7 +649,10 @@ class CompilerInstance : public ModuleLoader {
   /// and replace any existing one with it.
   void createPreprocessor(TranslationUnitKind TUKind);
 
-  std::string getSpecificModuleCachePath();
+  std::string getSpecificModuleCachePath(StringRef ModuleHash);
+  std::string getSpecificModuleCachePath() {
+    return getSpecificModuleCachePath(getInvocation().getModuleHash());
+  }
 
   /// Create the AST context.
   void createASTContext();
diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h
index 28c57dbe3b8e71..93d6ea72270aa2 100644
--- a/clang/include/clang/Lex/HeaderSearch.h
+++ b/clang/include/clang/Lex/HeaderSearch.h
@@ -183,6 +183,9 @@ class HeaderSearch {
   /// a system header.
   std::vector<std::pair<std::string, bool>> SystemHeaderPrefixes;
 
+  /// The hash used for module cache paths.
+  std::string ModuleHash;
+
   /// The path to the module cache.
   std::string ModuleCachePath;
 
@@ -319,11 +322,17 @@ class HeaderSearch {
     return {};
   }
 
+  /// Set the hash to use for module cache paths.
+  void setModuleHash(StringRef Hash) { ModuleHash = std::string(Hash); }
+
   /// Set the path to the module cache.
   void setModuleCachePath(StringRef CachePath) {
     ModuleCachePath = std::string(CachePath);
   }
 
+  /// Retrieve the module hash.
+  StringRef getModuleHash() const { return ModuleHash; }
+
   /// Retrieve the path to the module cache.
   StringRef getModuleCachePath() const { return ModuleCachePath; }
 
@@ -512,6 +521,15 @@ class HeaderSearch {
   std::string getPrebuiltModuleFileName(StringRef ModuleName,
                                         bool FileMapOnly = false);
 
+  /// Retrieve the name of the prebuilt module file that should be used
+  /// to load the given module.
+  ///
+  /// \param Module The module whose module file name will be returned.
+  ///
+  /// \returns The name of the module file that corresponds to this module,
+  /// or an empty string if this module does not correspond to any module file.
+  std::string getPrebuiltImplicitModuleFileName(Module *Module);
+
   /// Retrieve the name of the (to-be-)cached module file that should
   /// be used to load a module with the given name.
   ///
@@ -614,6 +632,22 @@ class HeaderSearch {
   Module *lookupModule(StringRef ModuleName, StringRef SearchName,
                        bool AllowExtraModuleMapSearch = false);
 
+  /// Retrieve the name of the (to-be-)cached module file that should
+  /// be used to load a module with the given name.
+  ///
+  /// \param ModuleName The module whose module file name will be returned.
+  ///
+  /// \param ModuleMapPath A path that when combined with \c ModuleName
+  /// uniquely identifies this module. See Module::ModuleMap.
+  ///
+  /// \param CachePath A path to the module cache.
+  ///
+  /// \returns The name of the module file that corresponds to this module,
+  /// or an empty string if this module does not correspond to any module file.
+  std::string getCachedModuleFileNameImpl(StringRef ModuleName,
+                                          StringRef ModuleMapPath,
+                                          StringRef CachePath);
+
   /// Retrieve a module with the given name, which may be part of the
   /// given framework.
   ///
diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h
index 3af49e17539561..42f3cff8c57ae7 100644
--- a/clang/include/clang/Lex/HeaderSearchOptions.h
+++ b/clang/include/clang/Lex/HeaderSearchOptions.h
@@ -142,6 +142,10 @@ class HeaderSearchOptions {
   /// file.
   unsigned ModuleMapFileHomeIsCwd : 1;
 
+  /// Also search for prebuilt implicit modules in the prebuilt module cache
+  /// path.
+  unsigned EnablePrebuiltImplicitModules : 1;
+
   /// The interval (in seconds) between pruning operations.
   ///
   /// This operation is expensive, because it requires Clang to walk through
@@ -217,8 +221,9 @@ class HeaderSearchOptions {
   HeaderSearchOptions(StringRef _Sysroot = "/")
       : Sysroot(_Sysroot), ModuleFormat("raw"), DisableModuleHash(false),
         ImplicitModuleMaps(false), ModuleMapFileHomeIsCwd(false),
-        UseBuiltinIncludes(true), UseStandardSystemIncludes(true),
-        UseStandardCXXIncludes(true), UseLibcxx(false), Verbose(false),
+        EnablePrebuiltImplicitModules(false), UseBuiltinIncludes(true),
+        UseStandardSystemIncludes(true), UseStandardCXXIncludes(true),
+        UseLibcxx(false), Verbose(false),
         ModulesValidateOncePerBuildSession(false),
         ModulesValidateSystemHeaders(false),
         ValidateASTInputFilesContent(false), UseDebugInfo(false),
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 011e7dfe21c2ae..e2b396c05ebb75 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3379,6 +3379,8 @@ static void RenderModulesOptions(Compilation &C, const Driver &D,
           std::string("-fprebuilt-module-path=") + A->getValue()));
       A->claim();
     }
+    if (Args.hasFlag(options::OPT_fprebuilt_implicit_modules, false))
+      CmdArgs.push_back("-fprebuilt-implicit-modules");
     if (Args.hasFlag(options::OPT_fmodules_validate_input_files_content,
                      options::OPT_fno_modules_validate_input_files_content,
                      false))
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 030ef42fc11d7d..f4b00df1e48658 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -428,8 +428,12 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) {
 
   PP->setPreprocessedOutput(getPreprocessorOutputOpts().ShowCPP);
 
-  if (PP->getLangOpts().Modules && PP->getLangOpts().ImplicitModules)
-    PP->getHeaderSearchInfo().setModuleCachePath(getSpecificModuleCachePath());
+  if (PP->getLangOpts().Modules && PP->getLangOpts().ImplicitModules) {
+    std::string ModuleHash = getInvocation().getModuleHash();
+    PP->getHeaderSearchInfo().setModuleHash(ModuleHash);
+    PP->getHeaderSearchInfo().setModuleCachePath(
+        getSpecificModuleCachePath(ModuleHash));
+  }
 
   // Handle generating dependencies, if requested.
   const DependencyOutputOptions &DepOpts = getDependencyOutputOpts();
@@ -477,13 +481,11 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) {
   }
 }
 
-std::string CompilerInstance::getSpecificModuleCachePath() {
-  // Set up the module path, including the hash for the
-  // module-creation options.
+std::string CompilerInstance::getSpecificModuleCachePath(StringRef ModuleHash) {
+  // Set up the module path, including the hash for the module-creation options.
   SmallString<256> SpecificModuleCache(getHeaderSearchOpts().ModuleCachePath);
   if (!SpecificModuleCache.empty() && !getHeaderSearchOpts().DisableModuleHash)
-    llvm::sys::path::append(SpecificModuleCache,
-                            getInvocation().getModuleHash());
+    llvm::sys::path::append(SpecificModuleCache, ModuleHash);
   return std::string(SpecificModuleCache.str());
 }
 
@@ -1673,6 +1675,8 @@ static ModuleSource selectModuleSource(
   if (!HSOpts.PrebuiltModuleFiles.empty() ||
       !HSOpts.PrebuiltModulePaths.empty()) {
     ModuleFilename = HS.getPrebuiltModuleFileName(ModuleName);
+    if (HSOpts.EnablePrebuiltImplicitModules && ModuleFilename.empty())
+      ModuleFilename = HS.getPrebuiltImplicitModuleFileName(M);
     if (!ModuleFilename.empty())
       return MS_PrebuiltModulePath;
   }
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 7b8554c0fe49a7..e8c694b2f2393d 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2191,6 +2191,8 @@ static void ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args,
       !Args.hasArg(OPT_fmodules_disable_diagnostic_validation);
   Opts.ImplicitModuleMaps = Args.hasArg(OPT_fimplicit_module_maps);
   Opts.ModuleMapFileHomeIsCwd = Args.hasArg(OPT_fmodule_map_file_home_is_cwd);
+  Opts.EnablePrebuiltImplicitModules =
+      Args.hasArg(OPT_fprebuilt_implicit_modules);
   Opts.ModuleCachePruneInterval =
       getLastArgIntValue(Args, OPT_fmodules_prune_interval, 7 * 24 * 60 * 60);
   Opts.ModuleCachePruneAfter =
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index 50c1fb984206d7..99c92e91aad514 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -164,14 +164,39 @@ std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName,
   return {};
 }
 
+std::string HeaderSearch::getPrebuiltImplicitModuleFileName(Module *Module) {
+  const FileEntry *ModuleMap =
+      getModuleMap().getModuleMapFileForUniquing(Module);
+  StringRef ModuleName = Module->Name;
+  StringRef ModuleMapPath = ModuleMap->getName();
+  StringRef ModuleCacheHash = HSOpts->DisableModuleHash ? "" : getModuleHash();
+  for (const std::string &Dir : HSOpts->PrebuiltModulePaths) {
+    SmallString<256> CachePath(Dir);
+    llvm::sys::fs::make_absolute(CachePath);
+    llvm::sys::path::append(CachePath, ModuleCacheHash);
+    std::string FileName =
+        getCachedModuleFileNameImpl(ModuleName, ModuleMapPath, CachePath);
+    if (!FileName.empty() && getFileMgr().getFile(FileName))
+      return FileName;
+  }
+  return {};
+}
+
 std::string HeaderSearch::getCachedModuleFileName(StringRef ModuleName,
                                                   StringRef ModuleMapPath) {
+  return getCachedModuleFileNameImpl(ModuleName, ModuleMapPath,
+                                     getModuleCachePath());
+}
+
+std::string HeaderSearch::getCachedModuleFileNameImpl(StringRef ModuleName,
+                                                      StringRef ModuleMapPath,
+                                                      StringRef CachePath) {
   // If we don't have a module cache path or aren't supposed to use one, we
   // can't do anything.
-  if (getModuleCachePath().empty())
+  if (CachePath.empty())
     return {};
 
-  SmallString<256> Result(getModuleCachePath());
+  SmallString<256> Result(CachePath);
   llvm::sys::fs::make_absolute(Result);
 
   if (HSOpts->DisableModuleHash) {
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index fb397246fd8d8b..aee89733d197f7 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -5844,6 +5844,7 @@ bool ASTReader::ParseHeaderSearchOptions(const RecordData &Record,
   HSOpts.DisableModuleHash = Record[Idx++];
   HSOpts.ImplicitModuleMaps = Record[Idx++];
   HSOpts.ModuleMapFileHomeIsCwd = Record[Idx++];
+  HSOpts.EnablePrebuiltImplicitModules = Record[Idx++];
   HSOpts.UseBuiltinIncludes = Record[Idx++];
   HSOpts.UseStandardSystemIncludes = Record[Idx++];
   HSOpts.UseStandardCXXIncludes = Record[Idx++];
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index f0ce843bd3e4cf..4984a4d221a2e8 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1320,6 +1320,7 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
   Record.push_back(HSOpts.DisableModuleHash);
   Record.push_back(HSOpts.ImplicitModuleMaps);
   Record.push_back(HSOpts.ModuleMapFileHomeIsCwd);
+  Record.push_back(HSOpts.EnablePrebuiltImplicitModules);
   Record.push_back(HSOpts.UseBuiltinIncludes);
   Record.push_back(HSOpts.UseStandardSystemIncludes);
   Record.push_back(HSOpts.UseStandardCXXIncludes);
diff --git a/clang/test/Modules/Inputs/prebuilt-implicit-module/a.h b/clang/test/Modules/Inputs/prebuilt-implicit-module/a.h
new file mode 100644
index 00000000000000..f86587a8396419
--- /dev/null
+++ b/clang/test/Modules/Inputs/prebuilt-implicit-module/a.h
@@ -0,0 +1 @@
+const int a = 1;
diff --git a/clang/test/Modules/Inputs/prebuilt-implicit-module/module.modulemap b/clang/test/Modules/Inputs/prebuilt-implicit-module/module.modulemap
new file mode 100644
index 00000000000000..4ff22de3d241ff
--- /dev/null
+++ b/clang/test/Modules/Inputs/prebuilt-implicit-module/module.modulemap
@@ -0,0 +1 @@
+module module_a { header "a.h" }
diff --git a/clang/test/Modules/prebuilt-implicit-modules.m b/clang/test/Modules/prebuilt-implicit-modules.m
new file mode 100644
index 00000000000000..1773714f56578d
--- /dev/null
+++ b/clang/test/Modules/prebuilt-implicit-modules.m
@@ -0,0 +1,35 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -x objective-c -fmodules %S/Inputs/prebuilt-implicit-module/module.modulemap -emit-module -fmodule-name=module_a -fmodules-cache-path=%t
+// RUN: find %t -name "module_a*.pcm" | grep module_a
+
+// Check we use a prebuilt module when available, and do not build an implicit module.
+// RUN: rm -rf %t1
+// RUN: mkdir -p %t1
+// RUN: %clang_cc1 -x objective-c %s -I%S/Inputs/prebuilt-implicit-module -fmodules -fmodule-map-file=%S/Inputs/prebuilt-implicit-module/module.modulemap -fprebuilt-implicit-modules -fprebuilt-module-path=%t -fmodules-cache-path=%t1
+// RUN: find %t1 -name "module_a*.pcm" | not grep module_a
+
+// Check a module cache path is not required when all modules resolve to
+// prebuilt implicit modules.
+// RUN: rm -rf %t1
+// RUN: mkdir -p %t1
+// RUN: %clang_cc1 -x objective-c %s -I%S/Inputs/prebuilt-implicit-module -fmodules -fmodule-map-file=%S/Inputs/prebuilt-implicit-module/module.modulemap -fprebuilt-implicit-modules -fprebuilt-module-path=%t
+
+// Check that we correctly fall back to implicit modules if the prebuilt implicit module is not found.
+// RUN: rm -rf %t1
+// RUN: mkdir -p %t1
+// RUN: %clang_cc1 -x objective-c %s -I%S/Inputs/prebuilt-implicit-module -fmodules -fmodule-map-file=%S/Inputs/prebuilt-implicit-module/module.modulemap -fprebuilt-implicit-modules -fprebuilt-module-path=%t -fmodules-cache-path=%t1 -fno-signed-char
+// RUN: find %t1 -name "module_a*.pcm" | grep module_a
+
+// Check that non-implicit prebuilt modules are always preferred to prebuilt implicit modules.
+// RUN: rm -rf %t2
+// RUN: mkdir -p %t2
+// RUN: %clang_cc1 -x objective-c -fmodules %S/Inputs/prebuilt-implicit-module/module.modulemap -emit-module -fmodule-name=module_a -fmodules-cache-path=%t
+// RUN: %clang_cc1 -x objective-c -fmodules %S/Inputs/prebuilt-implicit-module/module.modulemap -emit-module -fmodule-name=module_a -o %t/module_a.pcm -fno-signed-char
+// RUN: not %clang_cc1 -x objective-c %s -I%S/Inputs/prebuilt-implicit-module -fmodules -fmodule-map-file=%S/Inputs/prebuilt-implicit-module/module.modulemap -fprebuilt-implicit-modules -fprebuilt-module-path=%t -fmodules-cache-path=%t2
+// RUN: find %t2 -name "module_a*.pcm" | not grep module_a
+
+// expected-no-diagnostics
+@import module_a;
+int test() {
+  return a;
+}

From bd701ab49a2f996e712563acbf7e1a798a036752 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Thu, 5 Nov 2020 21:16:27 +0000
Subject: [PATCH 25/31] Fix MLIR Python bindings build (NFC)

The CMake macro refactoring had a hardcoded value left instead of using
the function argument.
Didn't catch it locally before because it required a clean build to
trigger.
---
 mlir/cmake/modules/AddMLIRPythonExtension.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/cmake/modules/AddMLIRPythonExtension.cmake b/mlir/cmake/modules/AddMLIRPythonExtension.cmake
index 5741f512ab9f40..e5e81d6a87c28d 100644
--- a/mlir/cmake/modules/AddMLIRPythonExtension.cmake
+++ b/mlir/cmake/modules/AddMLIRPythonExtension.cmake
@@ -73,7 +73,7 @@ function(add_mlir_python_extension libname extname)
     # project separation perspective and a discussion on how to better
     # segment MLIR libraries needs to happen.
     LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/python
-    OUTPUT_NAME "_mlirTransforms"
+    OUTPUT_NAME "${extname}"
     PREFIX "${PYTHON_MODULE_PREFIX}"
     SUFFIX "${PYTHON_MODULE_SUFFIX}${PYTHON_MODULE_EXTENSION}"
   )

From 6f288b11dba7f7ef01bdc4a3202e908bdc0fb067 Mon Sep 17 00:00:00 2001
From: "Paul C. Anagnostopoulos" <paul@windfall.com>
Date: Wed, 4 Nov 2020 18:48:53 -0500
Subject: [PATCH 26/31] [TableGen] Clean up documentation toctrees; clarify two
 paragraphs.

Differential Revision: https://reviews.llvm.org/D90804
---
 llvm/docs/TableGen/BackGuide.rst | 11 +++++++----
 llvm/docs/TableGen/ProgRef.rst   |  6 ------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/llvm/docs/TableGen/BackGuide.rst b/llvm/docs/TableGen/BackGuide.rst
index 0463c96b5794b1..b9b9cf83bae9d7 100644
--- a/llvm/docs/TableGen/BackGuide.rst
+++ b/llvm/docs/TableGen/BackGuide.rst
@@ -27,8 +27,8 @@ header file (``record.h``) and/or the Doxygen documentation.
 
 This document assumes that you have read the :doc:`TableGen Programmer's
 Reference <./ProgRef>`, which provides a detailed reference for coding
-TableGen source files. This document and the data structure comments will be
-improved over time.
+TableGen source files. For a description of the existing backends, see
+:doc:`TableGen BackEnds <./BackEnds>`.
 
 Data Structures
 ===============
@@ -738,7 +738,9 @@ The ``PrintRecords`` Backend
 
 The TableGen command option ``--print-records`` invokes a simple backend
 that prints all the classes and records defined in the source files. This is
-the default backend option. The output looks like this:
+the default backend option. The format of the output is guaranteed to be
+constant over time, so that the output can be compared in tests. The output
+looks like this:
 
 .. code-block:: text
 
@@ -776,7 +778,8 @@ The ``PrintDetailedRecords`` Backend
 
 The TableGen command option ``--print-detailed-records`` invokes a backend
 that prints all the global variables, classes, and records defined in the
-source files. The output looks like this.
+source files. The format of the output is *not* guaranteed to be constant
+over time. The output looks like this.
 
 .. code-block:: text
 
diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst
index 4b836513971553..26c0f0793be8db 100644
--- a/llvm/docs/TableGen/ProgRef.rst
+++ b/llvm/docs/TableGen/ProgRef.rst
@@ -7,12 +7,6 @@ TableGen Programmer's Reference
 .. contents::
    :local:
 
-.. toctree::
-   :hidden:
-
-   BackEnds
-   BackGuide
-
 Introduction
 ============
 

From a8a10acba2a769ae0f77e61380e649e7428f68fb Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Thu, 5 Nov 2020 16:21:15 -0500
Subject: [PATCH 27/31] [openacc][openmp] Allow duplicate between required and
 allowed once/exclusive

Validity check introduce in D90241 are a bit too restrict and this patch propose to losen
them a bit. The duplicate clauses is now check only between the three allowed lists and between the
requiredClauses and allowedClauses lists. This allows to enable some check where a clause can be
required but also appear only once on the directive. We found these kind of restriction useful
on the set directive in OpenACC for example.

Reviewed By: kiranchandramohan

Differential Revision: https://reviews.llvm.org/D90770
---
 .../llvm/Frontend/Directive/DirectiveBase.td  |  4 +++
 llvm/test/TableGen/directive3.td              | 17 ++++++++++--
 llvm/utils/TableGen/DirectiveEmitter.cpp      | 26 ++++++++++++++-----
 3 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
index aa415b3f0abceb..95514f05afbe34 100644
--- a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
+++ b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
@@ -131,6 +131,10 @@ class Directive<string d> {
   // function.
   string alternativeName = "";
 
+  // Clauses cannot appear twice in the three allowed lists below. Also, since
+  // required implies allowed, the same clause cannot appear in both the
+  // allowedClauses and requiredClauses lists.
+
   // List of allowed clauses for the directive.
   list<VersionedClause> allowedClauses = [];
 
diff --git a/llvm/test/TableGen/directive3.td b/llvm/test/TableGen/directive3.td
index 6deece1d57dc9a..8af4594f299414 100644
--- a/llvm/test/TableGen/directive3.td
+++ b/llvm/test/TableGen/directive3.td
@@ -15,16 +15,29 @@ def TDLC_ClauseA : Clause<"clausea"> {
 def TDLC_ClauseB : Clause<"clauseb"> {
 }
 
+def TDLC_ClauseC : Clause<"clausec"> {
+}
+
+def TDLC_ClauseD : Clause<"claused"> {
+}
+
 def TDL_DirA : Directive<"dira"> {
   let allowedClauses = [
     VersionedClause<TDLC_ClauseA>,
-    VersionedClause<TDLC_ClauseB>
+    VersionedClause<TDLC_ClauseB>,
+    VersionedClause<TDLC_ClauseD>
   ];
   let allowedOnceClauses = [
-    VersionedClause<TDLC_ClauseA>
+    VersionedClause<TDLC_ClauseA>,
+    VersionedClause<TDLC_ClauseC>
+  ];
+  let requiredClauses = [
+    VersionedClause<TDLC_ClauseC>,
+    VersionedClause<TDLC_ClauseD>
   ];
   let isDefault = 1;
 }
 
 // CHECK: error: Clause TDLC_ClauseA already defined on directive TDL_DirA
+// CHECK: error: Clause TDLC_ClauseD already defined on directive TDL_DirA
 // CHECK: error: One or more clauses are defined multiple times on directive TDL_DirA
diff --git a/llvm/utils/TableGen/DirectiveEmitter.cpp b/llvm/utils/TableGen/DirectiveEmitter.cpp
index 2b77c8f81ad22d..9a4b3bf17e37b5 100644
--- a/llvm/utils/TableGen/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/DirectiveEmitter.cpp
@@ -127,28 +127,40 @@ bool HasDuplicateClauses(const std::vector<Record *> &Clauses,
   return hasError;
 }
 
+// Check for duplicate clauses in lists. Clauses cannot appear twice in the
+// three allowed list. Also, since required implies allowed, clauses cannot
+// appear in both the allowedClauses and requiredClauses lists.
 bool HasDuplicateClausesInDirectives(const std::vector<Record *> &Directives) {
+  bool hasDuplicate = false;
   for (const auto &D : Directives) {
     Directive Dir{D};
     llvm::StringSet<> Clauses;
+    // Check for duplicates in the three allowed lists.
     if (HasDuplicateClauses(Dir.getAllowedClauses(), Dir, Clauses) ||
         HasDuplicateClauses(Dir.getAllowedOnceClauses(), Dir, Clauses) ||
-        HasDuplicateClauses(Dir.getAllowedExclusiveClauses(), Dir, Clauses) ||
+        HasDuplicateClauses(Dir.getAllowedExclusiveClauses(), Dir, Clauses)) {
+      hasDuplicate = true;
+    }
+    // Check for duplicate between allowedClauses and required
+    Clauses.clear();
+    if (HasDuplicateClauses(Dir.getAllowedClauses(), Dir, Clauses) ||
         HasDuplicateClauses(Dir.getRequiredClauses(), Dir, Clauses)) {
-      PrintFatalError(
-          "One or more clauses are defined multiple times on directive " +
-          Dir.getRecordName());
-      return true;
+      hasDuplicate = true;
     }
+    if (hasDuplicate)
+      PrintFatalError("One or more clauses are defined multiple times on"
+                      " directive " +
+                      Dir.getRecordName());
   }
-  return false;
+
+  return hasDuplicate;
 }
 
 // Check consitency of records. Return true if an error has been detected.
 // Return false if the records are valid.
 bool DirectiveLanguage::CheckRecordsValidity() const {
   if (getDirectiveLanguages().size() != 1) {
-    PrintError("A single definition of DirectiveLanguage is needed.");
+    PrintFatalError("A single definition of DirectiveLanguage is needed.");
     return true;
   }
 

From e55157874cf20acef55ca20a87699bf77b7cfd3a Mon Sep 17 00:00:00 2001
From: Saleem Abdulrasool <compnerd@compnerd.org>
Date: Thu, 5 Nov 2020 21:17:15 +0000
Subject: [PATCH 28/31] APINotes: repair the Windows builders

Disable the test on Windows, which should've been obvious as being
needed.  The differences in diff implementations and line-endings make
this test difficult to execute on Windows.
---
 clang/test/APINotes/yaml-roundtrip.test | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/clang/test/APINotes/yaml-roundtrip.test b/clang/test/APINotes/yaml-roundtrip.test
index 3379cbf3b6db81..bd4c89d2cdd9d8 100644
--- a/clang/test/APINotes/yaml-roundtrip.test
+++ b/clang/test/APINotes/yaml-roundtrip.test
@@ -1,6 +1,10 @@
 RUN: apinotes-test %S/Inputs/Frameworks/Simple.framework/Headers/Simple.apinotes > %t.result
 RUN: not diff --strip-trailing-cr %S/Inputs/Frameworks/Simple.framework/Headers/Simple.apinotes %t.result | FileCheck %s
 
+Avoid Windows as the diff output differs due to line-endings and different diff
+implementations.
+UNSUPPORTED: system-windows
+
 We expect only the nullability to be different as it is canonicalized during the
 roudtrip.
 

From ca17571051d4e0a63e702371984dbd3671261f79 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@gmail.com>
Date: Thu, 5 Nov 2020 20:53:16 +0000
Subject: [PATCH 29/31] [LLDB-lua] modify Lua's 'print' to respect 'io.stdout'

This patch changes the implementation of Lua's `print()` function to
respect `io.stdout`.

The original implementation uses `lua_writestring()` internally, which is
hardcoded to `stdout`.

Reviewed By: JDevlieghere

Differential Revision: https://reviews.llvm.org/D90787
---
 .../Plugins/ScriptInterpreter/Lua/Lua.cpp     | 28 +++++++++++++++++++
 .../Plugins/ScriptInterpreter/Lua/Lua.h       | 12 ++------
 .../Shell/ScriptInterpreter/Lua/print.test    | 23 +++++++++++++++
 3 files changed, 53 insertions(+), 10 deletions(-)
 create mode 100644 lldb/test/Shell/ScriptInterpreter/Lua/print.test

diff --git a/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.cpp b/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.cpp
index 2db44f2d29d0b2..dc3fd84a3bfba8 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.cpp
@@ -14,6 +14,34 @@
 using namespace lldb_private;
 using namespace lldb;
 
+static int lldb_print(lua_State *L) {
+  int n = lua_gettop(L);
+  lua_getglobal(L, "io");
+  lua_getfield(L, -1, "stdout");
+  lua_getfield(L, -1, "write");
+  for (int i = 1; i <= n; i++) {
+    lua_pushvalue(L, -1); // write()
+    lua_pushvalue(L, -3); // io.stdout
+    luaL_tolstring(L, i, nullptr);
+    lua_pushstring(L, i != n ? "\t" : "\n");
+    lua_call(L, 3, 0);
+  }
+  return 0;
+}
+
+Lua::Lua() : m_lua_state(luaL_newstate()) {
+  assert(m_lua_state);
+  luaL_openlibs(m_lua_state);
+  luaopen_lldb(m_lua_state);
+  lua_pushcfunction(m_lua_state, lldb_print);
+  lua_setglobal(m_lua_state, "print");
+}
+
+Lua::~Lua() {
+  assert(m_lua_state);
+  lua_close(m_lua_state);
+}
+
 llvm::Error Lua::Run(llvm::StringRef buffer) {
   int error =
       luaL_loadbuffer(m_lua_state, buffer.data(), buffer.size(), "buffer") ||
diff --git a/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.h b/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.h
index ff055d0cbb9e33..83f836d8d78ab6 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.h
@@ -25,16 +25,8 @@ int luaopen_lldb(lua_State *L);
 
 class Lua {
 public:
-  Lua() : m_lua_state(luaL_newstate()) {
-    assert(m_lua_state);
-    luaL_openlibs(m_lua_state);
-    luaopen_lldb(m_lua_state);
-  }
-
-  ~Lua() {
-    assert(m_lua_state);
-    lua_close(m_lua_state);
-  }
+  Lua();
+  ~Lua();
 
   llvm::Error Run(llvm::StringRef buffer);
   llvm::Error LoadModule(llvm::StringRef filename);
diff --git a/lldb/test/Shell/ScriptInterpreter/Lua/print.test b/lldb/test/Shell/ScriptInterpreter/Lua/print.test
new file mode 100644
index 00000000000000..fd457ecccec1e0
--- /dev/null
+++ b/lldb/test/Shell/ScriptInterpreter/Lua/print.test
@@ -0,0 +1,23 @@
+# REQUIRES: lua
+# UNSUPPORTED: lldb-repro
+#
+# RUN: rm -rf %t.stderr %t.stdout
+# RUN: cat %s | %lldb --script-language lua 2> %t.stderr > %t.stdout
+# RUN: cat %t.stdout | FileCheck %s --check-prefix STDOUT
+# RUN: cat %t.stderr | FileCheck %s --check-prefix STDERR
+script
+file = lldb.SBFile(2, "w", false)
+lldb.debugger:SetOutputFile(file)
+print(95000 + 126, nil, 'a')
+quit
+script
+print({})
+quit
+
+# STDOUT: 95126	nil	a
+# STDOUT-NOT: table: {{0x[[:xdigit:]]+}}
+# STDERR: table: {{0x[[:xdigit:]]+}}
+
+# RUN: rm -rf %t.stderr %t.stdout
+# RUN: %lldb --script-language lua -o 'script print(95000 + 126, nil, "a")' 2> %t.stderr > %t.stdout
+# RUN: cat %t.stdout | FileCheck %s --check-prefix STDOUT

From 4a4f4f78cb96c3e144929a416a9ee4ab7b419607 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 5 Nov 2020 13:32:33 -0800
Subject: [PATCH 30/31] [RISCV] Add test cases to show missed opportunities to
 use fnmadd/fnmsub if the second operand to the fma is negated rather than the
 first. NFC

We need to add more isel patterns to handle this.
---
 llvm/test/CodeGen/RISCV/double-arith.ll | 84 +++++++++++++++++++++++++
 llvm/test/CodeGen/RISCV/float-arith.ll  | 64 +++++++++++++++++++
 2 files changed, 148 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll
index 604911ae49f2c7..1055f2ad4d50fe 100644
--- a/llvm/test/CodeGen/RISCV/double-arith.ll
+++ b/llvm/test/CodeGen/RISCV/double-arith.ll
@@ -527,6 +527,50 @@ define double @fnmadd_d(double %a, double %b, double %c) nounwind {
   ret double %1
 }
 
+define double @fnmadd_d_2(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmadd_d_2:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw a0, 8(sp)
+; RV32IFD-NEXT:    sw a1, 12(sp)
+; RV32IFD-NEXT:    fld ft0, 8(sp)
+; RV32IFD-NEXT:    sw a4, 8(sp)
+; RV32IFD-NEXT:    sw a5, 12(sp)
+; RV32IFD-NEXT:    fld ft1, 8(sp)
+; RV32IFD-NEXT:    sw a2, 8(sp)
+; RV32IFD-NEXT:    sw a3, 12(sp)
+; RV32IFD-NEXT:    fld ft2, 8(sp)
+; RV32IFD-NEXT:    fcvt.d.w ft3, zero
+; RV32IFD-NEXT:    fadd.d ft2, ft2, ft3
+; RV32IFD-NEXT:    fadd.d ft1, ft1, ft3
+; RV32IFD-NEXT:    fneg.d ft2, ft2
+; RV32IFD-NEXT:    fmsub.d ft0, ft0, ft2, ft1
+; RV32IFD-NEXT:    fsd ft0, 8(sp)
+; RV32IFD-NEXT:    lw a0, 8(sp)
+; RV32IFD-NEXT:    lw a1, 12(sp)
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: fnmadd_d_2:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    fmv.d.x ft0, a0
+; RV64IFD-NEXT:    fmv.d.x ft1, a2
+; RV64IFD-NEXT:    fmv.d.x ft2, a1
+; RV64IFD-NEXT:    fmv.d.x ft3, zero
+; RV64IFD-NEXT:    fadd.d ft2, ft2, ft3
+; RV64IFD-NEXT:    fadd.d ft1, ft1, ft3
+; RV64IFD-NEXT:    fneg.d ft2, ft2
+; RV64IFD-NEXT:    fmsub.d ft0, ft0, ft2, ft1
+; RV64IFD-NEXT:    fmv.x.d a0, ft0
+; RV64IFD-NEXT:    ret
+  %b_ = fadd double 0.0, %b
+  %c_ = fadd double 0.0, %c
+  %negb = fsub double -0.0, %b_
+  %negc = fsub double -0.0, %c_
+  %1 = call double @llvm.fma.f64(double %a, double %negb, double %negc)
+  ret double %1
+}
+
 define double @fnmsub_d(double %a, double %b, double %c) nounwind {
 ; RV32IFD-LABEL: fnmsub_d:
 ; RV32IFD:       # %bb.0:
@@ -564,3 +608,43 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind {
   %1 = call double @llvm.fma.f64(double %nega, double %b, double %c)
   ret double %1
 }
+
+define double @fnmsub_d_2(double %a, double %b, double %c) nounwind {
+; RV32IFD-LABEL: fnmsub_d_2:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw a4, 8(sp)
+; RV32IFD-NEXT:    sw a5, 12(sp)
+; RV32IFD-NEXT:    fld ft0, 8(sp)
+; RV32IFD-NEXT:    sw a0, 8(sp)
+; RV32IFD-NEXT:    sw a1, 12(sp)
+; RV32IFD-NEXT:    fld ft1, 8(sp)
+; RV32IFD-NEXT:    sw a2, 8(sp)
+; RV32IFD-NEXT:    sw a3, 12(sp)
+; RV32IFD-NEXT:    fld ft2, 8(sp)
+; RV32IFD-NEXT:    fcvt.d.w ft3, zero
+; RV32IFD-NEXT:    fadd.d ft2, ft2, ft3
+; RV32IFD-NEXT:    fneg.d ft2, ft2
+; RV32IFD-NEXT:    fmadd.d ft0, ft1, ft2, ft0
+; RV32IFD-NEXT:    fsd ft0, 8(sp)
+; RV32IFD-NEXT:    lw a0, 8(sp)
+; RV32IFD-NEXT:    lw a1, 12(sp)
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: fnmsub_d_2:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    fmv.d.x ft0, a2
+; RV64IFD-NEXT:    fmv.d.x ft1, a0
+; RV64IFD-NEXT:    fmv.d.x ft2, a1
+; RV64IFD-NEXT:    fmv.d.x ft3, zero
+; RV64IFD-NEXT:    fadd.d ft2, ft2, ft3
+; RV64IFD-NEXT:    fneg.d ft2, ft2
+; RV64IFD-NEXT:    fmadd.d ft0, ft1, ft2, ft0
+; RV64IFD-NEXT:    fmv.x.d a0, ft0
+; RV64IFD-NEXT:    ret
+  %b_ = fadd double 0.0, %b
+  %negb = fsub double -0.0, %b_
+  %1 = call double @llvm.fma.f64(double %a, double %negb, double %c)
+  ret double %1
+}
diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll
index f22f85d5d79080..9aaa6f48289be0 100644
--- a/llvm/test/CodeGen/RISCV/float-arith.ll
+++ b/llvm/test/CodeGen/RISCV/float-arith.ll
@@ -396,6 +396,40 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind {
   ret float %1
 }
 
+define float @fnmadd_s_2(float %a, float %b, float %c) nounwind {
+; RV32IF-LABEL: fnmadd_s_2:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    fmv.w.x ft0, a0
+; RV32IF-NEXT:    fmv.w.x ft1, a2
+; RV32IF-NEXT:    fmv.w.x ft2, a1
+; RV32IF-NEXT:    fmv.w.x ft3, zero
+; RV32IF-NEXT:    fadd.s ft2, ft2, ft3
+; RV32IF-NEXT:    fadd.s ft1, ft1, ft3
+; RV32IF-NEXT:    fneg.s ft2, ft2
+; RV32IF-NEXT:    fmsub.s ft0, ft0, ft2, ft1
+; RV32IF-NEXT:    fmv.x.w a0, ft0
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: fnmadd_s_2:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    fmv.w.x ft0, a0
+; RV64IF-NEXT:    fmv.w.x ft1, a2
+; RV64IF-NEXT:    fmv.w.x ft2, a1
+; RV64IF-NEXT:    fmv.w.x ft3, zero
+; RV64IF-NEXT:    fadd.s ft2, ft2, ft3
+; RV64IF-NEXT:    fadd.s ft1, ft1, ft3
+; RV64IF-NEXT:    fneg.s ft2, ft2
+; RV64IF-NEXT:    fmsub.s ft0, ft0, ft2, ft1
+; RV64IF-NEXT:    fmv.x.w a0, ft0
+; RV64IF-NEXT:    ret
+  %b_ = fadd float 0.0, %b
+  %c_ = fadd float 0.0, %c
+  %negb = fsub float -0.0, %b_
+  %negc = fsub float -0.0, %c_
+  %1 = call float @llvm.fma.f32(float %a, float %negb, float %negc)
+  ret float %1
+}
+
 define float @fnmsub_s(float %a, float %b, float %c) nounwind {
 ; RV32IF-LABEL: fnmsub_s:
 ; RV32IF:       # %bb.0:
@@ -423,3 +457,33 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind {
   %1 = call float @llvm.fma.f32(float %nega, float %b, float %c)
   ret float %1
 }
+
+define float @fnmsub_s_2(float %a, float %b, float %c) nounwind {
+; RV32IF-LABEL: fnmsub_s_2:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    fmv.w.x ft0, a2
+; RV32IF-NEXT:    fmv.w.x ft1, a0
+; RV32IF-NEXT:    fmv.w.x ft2, a1
+; RV32IF-NEXT:    fmv.w.x ft3, zero
+; RV32IF-NEXT:    fadd.s ft2, ft2, ft3
+; RV32IF-NEXT:    fneg.s ft2, ft2
+; RV32IF-NEXT:    fmadd.s ft0, ft1, ft2, ft0
+; RV32IF-NEXT:    fmv.x.w a0, ft0
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: fnmsub_s_2:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    fmv.w.x ft0, a2
+; RV64IF-NEXT:    fmv.w.x ft1, a0
+; RV64IF-NEXT:    fmv.w.x ft2, a1
+; RV64IF-NEXT:    fmv.w.x ft3, zero
+; RV64IF-NEXT:    fadd.s ft2, ft2, ft3
+; RV64IF-NEXT:    fneg.s ft2, ft2
+; RV64IF-NEXT:    fmadd.s ft0, ft1, ft2, ft0
+; RV64IF-NEXT:    fmv.x.w a0, ft0
+; RV64IF-NEXT:    ret
+  %b_ = fadd float 0.0, %b
+  %negb = fsub float -0.0, %b_
+  %1 = call float @llvm.fma.f32(float %a, float %negb, float %c)
+  ret float %1
+}

From defe11866a326491ee9767f84bb3f70cfc4f4bcb Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 5 Nov 2020 13:56:52 -0800
Subject: [PATCH 31/31] [RISCV] Add isel patterns for fnmadd/fnmsub with an
 fneg on the second operand instead of the first.

The multiply part of FMA is commutable, but TargetSelectionDAG.td
doesn't have it marked as commutable so tablegen won't automatically
create the additional patterns.

So manually add commuted patterns.
---
 llvm/lib/Target/RISCV/RISCVInstrInfoD.td |  4 ++++
 llvm/lib/Target/RISCV/RISCVInstrInfoF.td |  4 ++++
 llvm/test/CodeGen/RISCV/double-arith.ll  | 12 ++++--------
 llvm/test/CodeGen/RISCV/float-arith.ll   | 12 ++++--------
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 6c36f53cd563d8..ddd86d9a7a9a0b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -276,10 +276,14 @@ def : Pat<(fma FPR64:$rs1, FPR64:$rs2, (fneg FPR64:$rs3)),
 // fnmsub: -rs1 * rs2 + rs3
 def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3),
           (FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+def : Pat<(fma FPR64:$rs1, (fneg FPR64:$rs2), FPR64:$rs3),
+          (FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
 
 // fnmadd: -rs1 * rs2 - rs3
 def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)),
           (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+def : Pat<(fma FPR64:$rs1, (fneg FPR64:$rs2), (fneg FPR64:$rs3)),
+          (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
 
 // The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the
 // canonical NaN when giving a signaling NaN. This doesn't match the LLVM
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index ce5c3abb6a0662..c03d08d752a67b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -336,10 +336,14 @@ def : Pat<(fma FPR32:$rs1, FPR32:$rs2, (fneg FPR32:$rs3)),
 // fnmsub: -rs1 * rs2 + rs3
 def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3),
           (FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+def : Pat<(fma FPR32:$rs1, (fneg FPR32:$rs2), FPR32:$rs3),
+          (FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
 
 // fnmadd: -rs1 * rs2 - rs3
 def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)),
           (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+def : Pat<(fma FPR32:$rs1, (fneg FPR32:$rs2), (fneg FPR32:$rs3)),
+          (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
 
 // The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the
 // canonical NaN when given a signaling NaN. This doesn't match the LLVM
diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll
index 1055f2ad4d50fe..7c5f4393ef34b2 100644
--- a/llvm/test/CodeGen/RISCV/double-arith.ll
+++ b/llvm/test/CodeGen/RISCV/double-arith.ll
@@ -543,8 +543,7 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind {
 ; RV32IFD-NEXT:    fcvt.d.w ft3, zero
 ; RV32IFD-NEXT:    fadd.d ft2, ft2, ft3
 ; RV32IFD-NEXT:    fadd.d ft1, ft1, ft3
-; RV32IFD-NEXT:    fneg.d ft2, ft2
-; RV32IFD-NEXT:    fmsub.d ft0, ft0, ft2, ft1
+; RV32IFD-NEXT:    fnmadd.d ft0, ft0, ft2, ft1
 ; RV32IFD-NEXT:    fsd ft0, 8(sp)
 ; RV32IFD-NEXT:    lw a0, 8(sp)
 ; RV32IFD-NEXT:    lw a1, 12(sp)
@@ -559,8 +558,7 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind {
 ; RV64IFD-NEXT:    fmv.d.x ft3, zero
 ; RV64IFD-NEXT:    fadd.d ft2, ft2, ft3
 ; RV64IFD-NEXT:    fadd.d ft1, ft1, ft3
-; RV64IFD-NEXT:    fneg.d ft2, ft2
-; RV64IFD-NEXT:    fmsub.d ft0, ft0, ft2, ft1
+; RV64IFD-NEXT:    fnmadd.d ft0, ft0, ft2, ft1
 ; RV64IFD-NEXT:    fmv.x.d a0, ft0
 ; RV64IFD-NEXT:    ret
   %b_ = fadd double 0.0, %b
@@ -624,8 +622,7 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind {
 ; RV32IFD-NEXT:    fld ft2, 8(sp)
 ; RV32IFD-NEXT:    fcvt.d.w ft3, zero
 ; RV32IFD-NEXT:    fadd.d ft2, ft2, ft3
-; RV32IFD-NEXT:    fneg.d ft2, ft2
-; RV32IFD-NEXT:    fmadd.d ft0, ft1, ft2, ft0
+; RV32IFD-NEXT:    fnmsub.d ft0, ft1, ft2, ft0
 ; RV32IFD-NEXT:    fsd ft0, 8(sp)
 ; RV32IFD-NEXT:    lw a0, 8(sp)
 ; RV32IFD-NEXT:    lw a1, 12(sp)
@@ -639,8 +636,7 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind {
 ; RV64IFD-NEXT:    fmv.d.x ft2, a1
 ; RV64IFD-NEXT:    fmv.d.x ft3, zero
 ; RV64IFD-NEXT:    fadd.d ft2, ft2, ft3
-; RV64IFD-NEXT:    fneg.d ft2, ft2
-; RV64IFD-NEXT:    fmadd.d ft0, ft1, ft2, ft0
+; RV64IFD-NEXT:    fnmsub.d ft0, ft1, ft2, ft0
 ; RV64IFD-NEXT:    fmv.x.d a0, ft0
 ; RV64IFD-NEXT:    ret
   %b_ = fadd double 0.0, %b
diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll
index 9aaa6f48289be0..f665975ab4f141 100644
--- a/llvm/test/CodeGen/RISCV/float-arith.ll
+++ b/llvm/test/CodeGen/RISCV/float-arith.ll
@@ -405,8 +405,7 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind {
 ; RV32IF-NEXT:    fmv.w.x ft3, zero
 ; RV32IF-NEXT:    fadd.s ft2, ft2, ft3
 ; RV32IF-NEXT:    fadd.s ft1, ft1, ft3
-; RV32IF-NEXT:    fneg.s ft2, ft2
-; RV32IF-NEXT:    fmsub.s ft0, ft0, ft2, ft1
+; RV32IF-NEXT:    fnmadd.s ft0, ft0, ft2, ft1
 ; RV32IF-NEXT:    fmv.x.w a0, ft0
 ; RV32IF-NEXT:    ret
 ;
@@ -418,8 +417,7 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind {
 ; RV64IF-NEXT:    fmv.w.x ft3, zero
 ; RV64IF-NEXT:    fadd.s ft2, ft2, ft3
 ; RV64IF-NEXT:    fadd.s ft1, ft1, ft3
-; RV64IF-NEXT:    fneg.s ft2, ft2
-; RV64IF-NEXT:    fmsub.s ft0, ft0, ft2, ft1
+; RV64IF-NEXT:    fnmadd.s ft0, ft0, ft2, ft1
 ; RV64IF-NEXT:    fmv.x.w a0, ft0
 ; RV64IF-NEXT:    ret
   %b_ = fadd float 0.0, %b
@@ -466,8 +464,7 @@ define float @fnmsub_s_2(float %a, float %b, float %c) nounwind {
 ; RV32IF-NEXT:    fmv.w.x ft2, a1
 ; RV32IF-NEXT:    fmv.w.x ft3, zero
 ; RV32IF-NEXT:    fadd.s ft2, ft2, ft3
-; RV32IF-NEXT:    fneg.s ft2, ft2
-; RV32IF-NEXT:    fmadd.s ft0, ft1, ft2, ft0
+; RV32IF-NEXT:    fnmsub.s ft0, ft1, ft2, ft0
 ; RV32IF-NEXT:    fmv.x.w a0, ft0
 ; RV32IF-NEXT:    ret
 ;
@@ -478,8 +475,7 @@ define float @fnmsub_s_2(float %a, float %b, float %c) nounwind {
 ; RV64IF-NEXT:    fmv.w.x ft2, a1
 ; RV64IF-NEXT:    fmv.w.x ft3, zero
 ; RV64IF-NEXT:    fadd.s ft2, ft2, ft3
-; RV64IF-NEXT:    fneg.s ft2, ft2
-; RV64IF-NEXT:    fmadd.s ft0, ft1, ft2, ft0
+; RV64IF-NEXT:    fnmsub.s ft0, ft1, ft2, ft0
 ; RV64IF-NEXT:    fmv.x.w a0, ft0
 ; RV64IF-NEXT:    ret
   %b_ = fadd float 0.0, %b