From 0afdbb4d2dead42df14361ca9f5613d56667481c Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski@arm.com>
Date: Thu, 14 Jan 2021 16:39:16 +0000
Subject: [PATCH 01/17] [flang][driver] Use __FLANG_VERISION__ in f18.cpp (nfc)

Just a minor improvement suggested in a post-commit review here:
https://reviews.llvm.org/D94422
---
 flang/tools/f18/f18.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp
index bdf48d0711422..9a10aeda7b249 100644
--- a/flang/tools/f18/f18.cpp
+++ b/flang/tools/f18/f18.cpp
@@ -389,8 +389,7 @@ void Link(std::vector<std::string> &liblist, std::vector<std::string> &objects,
 
 int printVersion() {
   llvm::errs() << "\nf18 compiler (under development), version "
-               << FLANG_VERSION_MAJOR << "." << FLANG_VERSION_MINOR << "."
-               << FLANG_VERSION_PATCHLEVEL << "\n";
+               << FLANG_VERSION_STRING << "\n";
   return exitStatus;
 }
 

From e21bf875c0f709a721d98450203781a605483a1d Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Wed, 13 Jan 2021 11:02:10 -0800
Subject: [PATCH 02/17] [NFC] Disallow unused prefixes under MC/ARM

Differential Revision: https://reviews.llvm.org/D94620
---
 llvm/test/MC/ARM/ldr-pseudo-cond-darwin.s |  4 ++--
 llvm/test/MC/ARM/ldr-pseudo-cond.s        |  4 ++--
 llvm/test/MC/ARM/lit.local.cfg            |  8 ++++++++
 llvm/test/MC/ARM/lsl-zero-errors.s        |  6 +++---
 llvm/test/MC/ARM/lsl-zero.s               |  6 +++---
 llvm/test/MC/ARM/mve-fp-registers.s       | 22 +++++++++++-----------
 llvm/test/MC/ARM/thumbv8m.s               | 12 ++++++------
 7 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/llvm/test/MC/ARM/ldr-pseudo-cond-darwin.s b/llvm/test/MC/ARM/ldr-pseudo-cond-darwin.s
index 915b883bc7556..34dda32352765 100644
--- a/llvm/test/MC/ARM/ldr-pseudo-cond-darwin.s
+++ b/llvm/test/MC/ARM/ldr-pseudo-cond-darwin.s
@@ -1,5 +1,5 @@
-@RUN: llvm-mc -triple armv7-base-apple-darwin %s | FileCheck --check-prefix=CHECK-ARM --check-prefix=CHECK %s
-@RUN: llvm-mc -triple thumbv7-base-apple-darwin %s | FileCheck --check-prefix=CHECK-THUMB2 --check-prefix=CHECK %s
+@RUN: llvm-mc -triple armv7-base-apple-darwin %s | FileCheck --check-prefix=CHECK %s
+@RUN: llvm-mc -triple thumbv7-base-apple-darwin %s | FileCheck --check-prefix=CHECK %s
 
 @
 @ Check that ldr to constant pool correctly transfers the condition codes
diff --git a/llvm/test/MC/ARM/ldr-pseudo-cond.s b/llvm/test/MC/ARM/ldr-pseudo-cond.s
index fa78311965c59..2785247f2256c 100644
--- a/llvm/test/MC/ARM/ldr-pseudo-cond.s
+++ b/llvm/test/MC/ARM/ldr-pseudo-cond.s
@@ -1,5 +1,5 @@
-@RUN: llvm-mc -triple armv7-unknown-linux-gnueabi %s | FileCheck --check-prefix=CHECK-ARM --check-prefix=CHECK %s
-@RUN: llvm-mc -triple thumbv7-unknown-linux-gnueabi %s | FileCheck --check-prefix=CHECK-THUMB2 --check-prefix=CHECK %s
+@RUN: llvm-mc -triple armv7-unknown-linux-gnueabi %s | FileCheck --check-prefix=CHECK %s
+@RUN: llvm-mc -triple thumbv7-unknown-linux-gnueabi %s | FileCheck --check-prefix=CHECK %s
 
 @
 @ Check that ldr to constant pool correctly transfers the condition codes
diff --git a/llvm/test/MC/ARM/lit.local.cfg b/llvm/test/MC/ARM/lit.local.cfg
index 236e1d3441665..b305cf706e778 100644
--- a/llvm/test/MC/ARM/lit.local.cfg
+++ b/llvm/test/MC/ARM/lit.local.cfg
@@ -1,2 +1,10 @@
+from lit.llvm.subst import ToolSubst
+
 if not 'ARM' in config.root.targets:
     config.unsupported = True
+
+fc = ToolSubst('FileCheck', unresolved='fatal')
+# Insert this first. Then, we'll first update the blank FileCheck command; then,
+# the default substitution of FileCheck will replace it to its full path.
+config.substitutions.insert(0, (fc.regex,
+    'FileCheck --allow-unused-prefixes=false'))
diff --git a/llvm/test/MC/ARM/lsl-zero-errors.s b/llvm/test/MC/ARM/lsl-zero-errors.s
index 937b50f62da04..ad39470a4a501 100644
--- a/llvm/test/MC/ARM/lsl-zero-errors.s
+++ b/llvm/test/MC/ARM/lsl-zero-errors.s
@@ -1,6 +1,6 @@
-// RUN: not llvm-mc -triple=thumbv7 -show-encoding < %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONARM --check-prefix=CHECK-THUMBV7 %s
-// RUN: not llvm-mc -triple=thumbv8 -show-encoding < %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONARM --check-prefix=CHECK-THUMBV8 %s
-// RUN: llvm-mc -triple=armv7 -show-encoding < %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM %s
+// RUN: not llvm-mc -triple=thumbv7 -show-encoding < %s 2>&1 | FileCheck --check-prefixes=CHECK-NONARM,CHECK-THUMBV7 %s
+// RUN: not llvm-mc -triple=thumbv8 -show-encoding < %s 2>&1 | FileCheck --check-prefixes=CHECK-NONARM,CHECK-THUMBV8 %s
+// RUN: llvm-mc -triple=armv7 -show-encoding < %s 2>&1 | FileCheck --check-prefix=CHECK-ARM %s
 
         // lsl #0 is actually mov, so here we check that it behaves the same as
         // mov with regards to the permitted registers
diff --git a/llvm/test/MC/ARM/lsl-zero.s b/llvm/test/MC/ARM/lsl-zero.s
index 6e64e00123624..81a599d684176 100644
--- a/llvm/test/MC/ARM/lsl-zero.s
+++ b/llvm/test/MC/ARM/lsl-zero.s
@@ -1,6 +1,6 @@
-// RUN: llvm-mc -triple=thumbv7 -show-encoding < %s 2>/dev/null | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONARM --check-prefix=CHECK-THUMBV7 %s
-// RUN: llvm-mc -triple=thumbv8 -show-encoding < %s 2>/dev/null | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONARM --check-prefix=CHECK-THUMBV8 %s
-// RUN: llvm-mc -triple=armv7 -show-encoding < %s 2>/dev/null | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM %s
+// RUN: llvm-mc -triple=thumbv7 -show-encoding < %s 2>/dev/null | FileCheck --check-prefix=CHECK-NONARM %s
+// RUN: llvm-mc -triple=thumbv8 -show-encoding < %s 2>/dev/null | FileCheck --check-prefix=CHECK-NONARM %s
+// RUN: llvm-mc -triple=armv7 -show-encoding < %s 2>/dev/null | FileCheck --check-prefix=CHECK-ARM %s
 
         // lsl #0 is actually mov, so here we check that it behaves the same as
         // mov with regards to the permitted registers and how it behaves in an
diff --git a/llvm/test/MC/ARM/mve-fp-registers.s b/llvm/test/MC/ARM/mve-fp-registers.s
index 745c464183fa6..886de8c4797e7 100644
--- a/llvm/test/MC/ARM/mve-fp-registers.s
+++ b/llvm/test/MC/ARM/mve-fp-registers.s
@@ -10,36 +10,36 @@
 // All of these instructions are rejected if no VFP or MVE features are
 // present.
 // RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding 2>%t < %s
-// RUN: FileCheck %s < %t --check-prefix=NOFP16 --check-prefix=NOFP32 --check-prefix=NOFP64
+// RUN: FileCheck %s < %t --check-prefixes=NOFP16,NOFP32,NOFP64
 
 // VFP and NEON implementations by default have FP32 and FP64, but not FP16.
 // The VFPv3 FP16 extension just added conversion instructions, which we don't
 // care about here.
-// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp2 2>%t < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp2 2>%t < %s | FileCheck %s --check-prefixes=FP32,FP64
 // RUN: FileCheck %s < %t --check-prefix=NOFP16
-// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+fp-armv8,+neon 2>%t < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+fp-armv8,+neon 2>%t < %s | FileCheck %s --check-prefixes=FP32,FP64
 // RUN: FileCheck %s < %t --check-prefix=NOFP16
 
 // The v8.2A FP16 extension added loads, stores and moves for FP16.
-// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+fp-armv8,+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16 --check-prefix=FP32 --check-prefix=FP64
+// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+fp-armv8,+fullfp16 < %s | FileCheck %s --check-prefixes=FP16,FP32,FP64
 
 // M-profile FPUs (e.g. Cortex-M4/M7/M33) do not have FP16 instructions, and
 // the FP64 instructions are optional. They are also limited to 16 D registers,
 // but we don't test that here.
-// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp4d16sp 2>%t < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP32
-// RUN: FileCheck %s < %t --check-prefix=NOFP16 --check-prefix=NOFP64
-// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp4,-d32 2>%t < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp4d16sp 2>%t < %s | FileCheck %s --check-prefix=FP32
+// RUN: FileCheck %s < %t --check-prefixes=NOFP16,NOFP64
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp4,-d32 2>%t < %s | FileCheck %s --check-prefixes=FP32,FP64
 // RUN: FileCheck %s < %t --check-prefix=NOFP16
 
 // Integer-only MVE, which can be combined with different options for scalar
 // FPU (or lack thereof), and has all of the move and store instructions
 // regardless of the scalar FPU.
-// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve 2>%t < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16 --check-prefix=FP32 --check-prefix=FP64
-// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve,+fp-armv8-sp,+fullfp16 2>%t < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16 --check-prefix=FP32 --check-prefix=FP64
-// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve,+fp-armv8,+fullfp16 2>%t < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16 --check-prefix=FP32 --check-prefix=FP64
+// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve 2>%t < %s | FileCheck %s --check-prefixes=FP16,FP32,FP64
+// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve,+fp-armv8-sp,+fullfp16 2>%t < %s | FileCheck %s --check-prefixes=FP16,FP32,FP64
+// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve,+fp-armv8,+fullfp16 2>%t < %s | FileCheck %s --check-prefixes=FP16,FP32,FP64
 
 // Maximal v8.1M target: MVE with FP, and scalar FP with double-precision.
-// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16 --check-prefix=FP32 --check-prefix=FP64
+// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=FP16,FP32,FP64
 
 vldmia  r0, {d0}
 # FP32: vldmia  r0, {d0}               @ encoding: [0x90,0xec,0x02,0x0b]
diff --git a/llvm/test/MC/ARM/thumbv8m.s b/llvm/test/MC/ARM/thumbv8m.s
index 88ca22fcdb937..0e9ab4a9b3bf9 100644
--- a/llvm/test/MC/ARM/thumbv8m.s
+++ b/llvm/test/MC/ARM/thumbv8m.s
@@ -1,12 +1,12 @@
 // RUN: not llvm-mc -triple=thumbv8m.base -show-encoding < %s 2>%t \
-// RUN:   | FileCheck --check-prefix=CHECK-BASELINE --check-prefix=CHECK %s
-// RUN:     FileCheck --check-prefix=UNDEF-BASELINE --check-prefix=UNDEF < %t %s
+// RUN:   | FileCheck --check-prefix=CHECK %s
+// RUN:     FileCheck --check-prefixes=UNDEF-BASELINE,UNDEF < %t %s
 // RUN: not llvm-mc -triple=thumbv8m.main -show-encoding < %s 2>%t \
-// RUN:   | FileCheck --check-prefix=CHECK-MAINLINE --check-prefix=CHECK %s
-// RUN:     FileCheck --check-prefix=UNDEF-MAINLINE --check-prefix=UNDEF < %t %s
+// RUN:   | FileCheck --check-prefixes=CHECK-MAINLINE,CHECK %s
+// RUN:     FileCheck --check-prefixes=UNDEF-MAINLINE,UNDEF < %t %s
 // RUN: not llvm-mc -triple=thumbv8m.main -mattr=+dsp -show-encoding < %s 2>%t \
-// RUN:   | FileCheck --check-prefix=CHECK-MAINLINE_DSP --check-prefix=CHECK %s
-// RUN:     FileCheck --check-prefix=UNDEF-MAINLINE_DSP --check-prefix=UNDEF < %t %s
+// RUN:   | FileCheck --check-prefixes=CHECK-MAINLINE_DSP,CHECK %s
+// RUN:     FileCheck --check-prefixes=UNDEF-MAINLINE_DSP,UNDEF < %t %s
 
 // Simple check that baseline is v6M and mainline is v7M
 // UNDEF-BASELINE: error: instruction requires: thumb2

From 2f395b7092bdac0e39bb4e2bb5e6b03e521a45dd Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usx@google.com>
Date: Sun, 10 Jan 2021 16:23:03 +0100
Subject: [PATCH 03/17] [clangd] Make AST-based signals available to
 runWithPreamble.

Many useful signals can be derived from a valid AST which is regularly updated by
the ASTWorker. `runWithPreamble` does not have access to the ParsedAST
but it can be provided access to some signals derived from a (possibly
stale) AST.

Differential Revision: https://reviews.llvm.org/D94424
---
 clang-tools-extra/clangd/ASTSignals.cpp       | 42 +++++++++++
 clang-tools-extra/clangd/ASTSignals.h         | 39 ++++++++++
 clang-tools-extra/clangd/CMakeLists.txt       |  1 +
 clang-tools-extra/clangd/TUScheduler.cpp      | 73 +++++++++++-------
 clang-tools-extra/clangd/TUScheduler.h        |  3 +
 .../clangd/unittests/ASTSignalsTests.cpp      | 75 +++++++++++++++++++
 .../clangd/unittests/CMakeLists.txt           |  1 +
 .../clangd/unittests/TUSchedulerTests.cpp     | 47 +++++++++++-
 8 files changed, 252 insertions(+), 29 deletions(-)
 create mode 100644 clang-tools-extra/clangd/ASTSignals.cpp
 create mode 100644 clang-tools-extra/clangd/ASTSignals.h
 create mode 100644 clang-tools-extra/clangd/unittests/ASTSignalsTests.cpp

diff --git a/clang-tools-extra/clangd/ASTSignals.cpp b/clang-tools-extra/clangd/ASTSignals.cpp
new file mode 100644
index 0000000000000..da849287bbf69
--- /dev/null
+++ b/clang-tools-extra/clangd/ASTSignals.cpp
@@ -0,0 +1,42 @@
+//===--- ASTSignals.cpp - LSP server -----------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ASTSignals.h"
+#include "AST.h"
+#include "FindTarget.h"
+
+namespace clang {
+namespace clangd {
+ASTSignals ASTSignals::derive(const ParsedAST &AST) {
+  ASTSignals Signals;
+  const SourceManager &SM = AST.getSourceManager();
+  findExplicitReferences(AST.getASTContext(), [&](ReferenceLoc Ref) {
+    for (const NamedDecl *ND : Ref.Targets) {
+      if (!isInsideMainFile(Ref.NameLoc, SM))
+        continue;
+      SymbolID ID = getSymbolID(ND);
+      if (!ID)
+        continue;
+      unsigned &SymbolCount = Signals.ReferencedSymbols[ID];
+      SymbolCount++;
+      // Process namespace only when we see the symbol for the first time.
+      if (SymbolCount != 1)
+        continue;
+      if (const auto *NSD = dyn_cast<NamespaceDecl>(ND->getDeclContext())) {
+        if (NSD->isAnonymousNamespace())
+          continue;
+        std::string NS = printNamespaceScope(*NSD);
+        if (!NS.empty())
+          Signals.RelatedNamespaces[NS]++;
+      }
+    }
+  });
+  return Signals;
+}
+} // namespace clangd
+} // namespace clang
diff --git a/clang-tools-extra/clangd/ASTSignals.h b/clang-tools-extra/clangd/ASTSignals.h
new file mode 100644
index 0000000000000..bc70cd17310a1
--- /dev/null
+++ b/clang-tools-extra/clangd/ASTSignals.h
@@ -0,0 +1,39 @@
+//===--- ASTSignals.h - LSP server -------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_ASTSIGNALS_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_ASTSIGNALS_H
+
+#include "ParsedAST.h"
+#include "index/SymbolID.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+
+namespace clang {
+namespace clangd {
+
+/// Signals derived from a valid AST of a file.
+/// Provides information that can only be extracted from the AST to actions that
+/// can't access an AST. The signals are computed and updated asynchronously by
+/// the ASTWorker and thus they are always stale and also can be absent.
+/// Example usage: Information about the declarations used in a file affects
+/// code-completion ranking in that file.
+struct ASTSignals {
+  /// Number of occurrences of each symbol present in the file.
+  llvm::DenseMap<SymbolID, unsigned> ReferencedSymbols;
+  /// Namespaces whose symbols are used in the file, and the number of such
+  /// distinct symbols.
+  llvm::StringMap<unsigned> RelatedNamespaces;
+
+  static ASTSignals derive(const ParsedAST &AST);
+};
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_ASTSIGNALS_H
diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt
index 9e62e09480274..1d12e7e2355df 100644
--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -46,6 +46,7 @@ include_directories(BEFORE "${CMAKE_CURRENT_BINARY_DIR}/../clang-tidy")
 
 add_clang_library(clangDaemon
   AST.cpp
+  ASTSignals.cpp
   ClangdLSPServer.cpp
   ClangdServer.cpp
   CodeComplete.cpp
diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp
index 7a858664faa5a..16c186c34738d 100644
--- a/clang-tools-extra/clangd/TUScheduler.cpp
+++ b/clang-tools-extra/clangd/TUScheduler.cpp
@@ -392,7 +392,8 @@ class ASTWorker {
              TUScheduler::ASTActionInvalidation);
   bool blockUntilIdle(Deadline Timeout) const;
 
-  std::shared_ptr<const PreambleData> getPossiblyStalePreamble() const;
+  std::shared_ptr<const PreambleData> getPossiblyStalePreamble(
+      std::shared_ptr<const ASTSignals> *ASTSignals = nullptr) const;
 
   /// Used to inform ASTWorker about a new preamble build by PreambleThread.
   /// Diagnostics are only published through this callback. This ensures they
@@ -437,6 +438,8 @@ class ASTWorker {
   void generateDiagnostics(std::unique_ptr<CompilerInvocation> Invocation,
                            ParseInputs Inputs, std::vector<Diag> CIDiags);
 
+  void updateASTSignals(ParsedAST &AST);
+
   // Must be called exactly once on processing thread. Will return after
   // stop() is called on a separate thread and all pending requests are
   // processed.
@@ -499,6 +502,7 @@ class ASTWorker {
   /// Signalled whenever a new request has been scheduled or processing of a
   /// request has completed.
   mutable std::condition_variable RequestsCV;
+  std::shared_ptr<const ASTSignals> LatestASTSignals; /* GUARDED_BY(Mutex) */
   /// Latest build preamble for current TU.
   /// None means no builds yet, null means there was an error while building.
   /// Only written by ASTWorker's thread.
@@ -830,6 +834,16 @@ void ASTWorker::updatePreamble(std::unique_ptr<CompilerInvocation> CI,
   RequestsCV.notify_all();
 }
 
+void ASTWorker::updateASTSignals(ParsedAST &AST) {
+  auto Signals = std::make_shared<const ASTSignals>(ASTSignals::derive(AST));
+  // Existing readers of ASTSignals will have their copy preserved until the
+  // read is completed. The last reader deletes the old ASTSignals.
+  {
+    std::lock_guard<std::mutex> Lock(Mutex);
+    std::swap(LatestASTSignals, Signals);
+  }
+}
+
 void ASTWorker::generateDiagnostics(
     std::unique_ptr<CompilerInvocation> Invocation, ParseInputs Inputs,
     std::vector<Diag> CIDiags) {
@@ -908,6 +922,7 @@ void ASTWorker::generateDiagnostics(
   if (*AST) {
     trace::Span Span("Running main AST callback");
     Callbacks.onMainAST(FileName, **AST, RunPublish);
+    updateASTSignals(**AST);
   } else {
     // Failed to build the AST, at least report diagnostics from the
     // command line if there were any.
@@ -925,9 +940,11 @@ void ASTWorker::generateDiagnostics(
   }
 }
 
-std::shared_ptr<const PreambleData>
-ASTWorker::getPossiblyStalePreamble() const {
+std::shared_ptr<const PreambleData> ASTWorker::getPossiblyStalePreamble(
+    std::shared_ptr<const ASTSignals> *ASTSignals) const {
   std::lock_guard<std::mutex> Lock(Mutex);
+  if (ASTSignals)
+    *ASTSignals = LatestASTSignals;
   return LatestPreamble ? *LatestPreamble : nullptr;
 }
 
@@ -1364,38 +1381,40 @@ void TUScheduler::runWithPreamble(llvm::StringRef Name, PathRef File,
   if (!PreambleTasks) {
     trace::Span Tracer(Name);
     SPAN_ATTACH(Tracer, "file", File);
+    std::shared_ptr<const ASTSignals> Signals;
     std::shared_ptr<const PreambleData> Preamble =
-        It->second->Worker->getPossiblyStalePreamble();
+        It->second->Worker->getPossiblyStalePreamble(&Signals);
     WithContext WithProvidedContext(Opts.ContextProvider(File));
     Action(InputsAndPreamble{It->second->Contents,
                              It->second->Worker->getCurrentCompileCommand(),
-                             Preamble.get()});
+                             Preamble.get(), Signals.get()});
     return;
   }
 
   std::shared_ptr<const ASTWorker> Worker = It->second->Worker.lock();
-  auto Task =
-      [Worker, Consistency, Name = Name.str(), File = File.str(),
-       Contents = It->second->Contents,
-       Command = Worker->getCurrentCompileCommand(),
-       Ctx = Context::current().derive(kFileBeingProcessed, std::string(File)),
-       Action = std::move(Action), this]() mutable {
-        std::shared_ptr<const PreambleData> Preamble;
-        if (Consistency == PreambleConsistency::Stale) {
-          // Wait until the preamble is built for the first time, if preamble
-          // is required. This avoids extra work of processing the preamble
-          // headers in parallel multiple times.
-          Worker->waitForFirstPreamble();
-        }
-        Preamble = Worker->getPossiblyStalePreamble();
-
-        std::lock_guard<Semaphore> BarrierLock(Barrier);
-        WithContext Guard(std::move(Ctx));
-        trace::Span Tracer(Name);
-        SPAN_ATTACH(Tracer, "file", File);
-        WithContext WithProvidedContext(Opts.ContextProvider(File));
-        Action(InputsAndPreamble{Contents, Command, Preamble.get()});
-      };
+  auto Task = [Worker, Consistency, Name = Name.str(), File = File.str(),
+               Contents = It->second->Contents,
+               Command = Worker->getCurrentCompileCommand(),
+               Ctx = Context::current().derive(kFileBeingProcessed,
+                                               std::string(File)),
+               Action = std::move(Action), this]() mutable {
+    std::shared_ptr<const PreambleData> Preamble;
+    if (Consistency == PreambleConsistency::Stale) {
+      // Wait until the preamble is built for the first time, if preamble
+      // is required. This avoids extra work of processing the preamble
+      // headers in parallel multiple times.
+      Worker->waitForFirstPreamble();
+    }
+    std::shared_ptr<const ASTSignals> Signals;
+    Preamble = Worker->getPossiblyStalePreamble(&Signals);
+
+    std::lock_guard<Semaphore> BarrierLock(Barrier);
+    WithContext Guard(std::move(Ctx));
+    trace::Span Tracer(Name);
+    SPAN_ATTACH(Tracer, "file", File);
+    WithContext WithProvidedContext(Opts.ContextProvider(File));
+    Action(InputsAndPreamble{Contents, Command, Preamble.get(), Signals.get()});
+  };
 
   PreambleTasks->runAsync("task:" + llvm::sys::path::filename(File),
                           std::move(Task));
diff --git a/clang-tools-extra/clangd/TUScheduler.h b/clang-tools-extra/clangd/TUScheduler.h
index cc38db8071aba..5a8f4d3b817a9 100644
--- a/clang-tools-extra/clangd/TUScheduler.h
+++ b/clang-tools-extra/clangd/TUScheduler.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_TUSCHEDULER_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_TUSCHEDULER_H
 
+#include "ASTSignals.h"
 #include "Compiler.h"
 #include "Diagnostics.h"
 #include "GlobalCompilationDatabase.h"
@@ -43,6 +44,8 @@ struct InputsAndPreamble {
   const tooling::CompileCommand &Command;
   // This can be nullptr if no preamble is available.
   const PreambleData *Preamble;
+  // This can be nullptr if no ASTSignals are available.
+  const ASTSignals *Signals;
 };
 
 /// Determines whether diagnostics should be generated for a file snapshot.
diff --git a/clang-tools-extra/clangd/unittests/ASTSignalsTests.cpp b/clang-tools-extra/clangd/unittests/ASTSignalsTests.cpp
new file mode 100644
index 0000000000000..2d8c1846a8ae8
--- /dev/null
+++ b/clang-tools-extra/clangd/unittests/ASTSignalsTests.cpp
@@ -0,0 +1,75 @@
+//===-- ASTSignalsTests.cpp -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "AST.h"
+
+#include "ParsedAST.h"
+#include "TestIndex.h"
+#include "TestTU.h"
+#include "llvm/ADT/StringRef.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+using ::testing::_;
+using ::testing::Pair;
+using ::testing::UnorderedElementsAre;
+
+TEST(ASTSignals, Derive) {
+  TestTU TU = TestTU::withCode(R"cpp(
+  namespace ns1 {
+  namespace ns2 {
+  namespace {
+  int func() {
+    tar::X a;
+    a.Y = 1;
+    return ADD(tar::kConst, a.Y, tar::foo()) + fooInNS2() + tar::foo();
+  }
+  } // namespace
+  } // namespace ns2
+  } // namespace ns1
+  )cpp");
+
+  TU.HeaderCode = R"cpp(
+  #define ADD(x, y, z) (x + y + z)
+  namespace tar {  // A related namespace.
+  int kConst = 5;
+  int foo();
+  void bar();  // Unused symbols are not recorded.
+  class X {
+    public: int Y;
+  };
+  } // namespace tar
+  namespace ns1::ns2 { int fooInNS2(); }}
+  )cpp";
+  ASTSignals Signals = ASTSignals::derive(TU.build());
+  std::vector<std::pair<StringRef, int>> NS;
+  for (const auto &P : Signals.RelatedNamespaces)
+    NS.emplace_back(P.getKey(), P.getValue());
+  EXPECT_THAT(NS, UnorderedElementsAre(Pair("ns1::", 1), Pair("ns1::ns2::", 1),
+                                       Pair("tar::", /*foo, kConst, X*/ 3)));
+
+  std::vector<std::pair<SymbolID, int>> Sym;
+  for (const auto &P : Signals.ReferencedSymbols)
+    Sym.emplace_back(P.getFirst(), P.getSecond());
+  EXPECT_THAT(
+      Sym,
+      UnorderedElementsAre(
+          Pair(ns("tar").ID, 4), Pair(ns("ns1").ID, 1),
+          Pair(ns("ns1::ns2").ID, 1), Pair(_ /*int func();*/, 1),
+          Pair(cls("tar::X").ID, 1), Pair(var("tar::kConst").ID, 1),
+          Pair(func("tar::foo").ID, 2), Pair(func("ns1::ns2::fooInNS2").ID, 1),
+          Pair(sym("Y", index::SymbolKind::Variable, "@N@tar@S@X@FI@\\0").ID,
+               2),
+          Pair(_ /*a*/, 3)));
+}
+} // namespace
+} // namespace clangd
+} // namespace clang
diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt
index 10f10f200471d..adf4ac827cce3 100644
--- a/clang-tools-extra/clangd/unittests/CMakeLists.txt
+++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt
@@ -35,6 +35,7 @@ add_custom_target(ClangdUnitTests)
 add_unittest(ClangdUnitTests ClangdTests
   Annotations.cpp
   ASTTests.cpp
+  ASTSignalsTests.cpp
   BackgroundIndexTests.cpp
   CallHierarchyTests.cpp
   CanonicalIncludesTests.cpp
diff --git a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
index c87c1be6f8e9a..0c9455f0eaf63 100644
--- a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
+++ b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
@@ -14,6 +14,7 @@
 #include "Preamble.h"
 #include "TUScheduler.h"
 #include "TestFS.h"
+#include "TestIndex.h"
 #include "support/Cancellation.h"
 #include "support/Context.h"
 #include "support/Path.h"
@@ -48,6 +49,7 @@ using ::testing::ElementsAre;
 using ::testing::Eq;
 using ::testing::Field;
 using ::testing::IsEmpty;
+using ::testing::Pair;
 using ::testing::Pointee;
 using ::testing::SizeIs;
 using ::testing::UnorderedElementsAre;
@@ -679,12 +681,12 @@ TEST_F(TUSchedulerTests, EmptyPreamble) {
             cantFail(std::move(Preamble)).Preamble->Preamble.getBounds().Size,
             0u);
       });
-  // Wait for the preamble is being built.
+  // Wait while the preamble is being built.
   ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10)));
 
   // Update the file which results in an empty preamble.
   S.update(Foo, getInputs(Foo, WithEmptyPreamble), WantDiagnostics::Auto);
-  // Wait for the preamble is being built.
+  // Wait while the preamble is being built.
   ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10)));
   S.runWithPreamble(
       "getEmptyPreamble", Foo, TUScheduler::Stale,
@@ -696,6 +698,47 @@ TEST_F(TUSchedulerTests, EmptyPreamble) {
       });
 }
 
+TEST_F(TUSchedulerTests, ASTSignalsSmokeTests) {
+  TUScheduler S(CDB, optsForTest());
+  auto Foo = testPath("foo.cpp");
+  auto Header = testPath("foo.h");
+
+  FS.Files[Header] = "namespace tar { int foo(); }";
+  const char *Contents = R"cpp(
+  #include "foo.h"
+  namespace ns {
+  int func() {
+    return tar::foo());
+  }
+  } // namespace ns
+  )cpp";
+  // Update the file which results in an empty preamble.
+  S.update(Foo, getInputs(Foo, Contents), WantDiagnostics::Yes);
+  // Wait while the preamble is being built.
+  ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10)));
+  Notification TaskRun;
+  S.runWithPreamble(
+      "ASTSignals", Foo, TUScheduler::Stale,
+      [&](Expected<InputsAndPreamble> IP) {
+        ASSERT_FALSE(!IP);
+        std::vector<std::pair<StringRef, int>> NS;
+        for (const auto &P : IP->Signals->RelatedNamespaces)
+          NS.emplace_back(P.getKey(), P.getValue());
+        EXPECT_THAT(NS,
+                    UnorderedElementsAre(Pair("ns::", 1), Pair("tar::", 1)));
+
+        std::vector<std::pair<SymbolID, int>> Sym;
+        for (const auto &P : IP->Signals->ReferencedSymbols)
+          Sym.emplace_back(P.getFirst(), P.getSecond());
+        EXPECT_THAT(Sym, UnorderedElementsAre(Pair(ns("tar").ID, 1),
+                                              Pair(ns("ns").ID, 1),
+                                              Pair(func("tar::foo").ID, 1),
+                                              Pair(func("ns::func").ID, 1)));
+        TaskRun.notify();
+      });
+  TaskRun.wait();
+}
+
 TEST_F(TUSchedulerTests, RunWaitsForPreamble) {
   // Testing strategy: we update the file and schedule a few preamble reads at
   // the same time. All reads should get the same non-null preamble.

From 8b09cf7956d8abc722fa736874e4cea667a9d3cb Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usx@google.com>
Date: Thu, 14 Jan 2021 18:38:42 +0100
Subject: [PATCH 04/17] [clangd] Trivial: Documentation fix in ASTSignals.

---
 clang-tools-extra/clangd/ASTSignals.cpp | 2 +-
 clang-tools-extra/clangd/ASTSignals.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clangd/ASTSignals.cpp b/clang-tools-extra/clangd/ASTSignals.cpp
index da849287bbf69..b8cc7f05927a7 100644
--- a/clang-tools-extra/clangd/ASTSignals.cpp
+++ b/clang-tools-extra/clangd/ASTSignals.cpp
@@ -1,4 +1,4 @@
-//===--- ASTSignals.cpp - LSP server -----------------------------*- C++-*-===//
+//===--- ASTSignals.cpp ------------------------------------------*- C++-*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/clang-tools-extra/clangd/ASTSignals.h b/clang-tools-extra/clangd/ASTSignals.h
index bc70cd17310a1..fd31be38ce8ba 100644
--- a/clang-tools-extra/clangd/ASTSignals.h
+++ b/clang-tools-extra/clangd/ASTSignals.h
@@ -1,4 +1,4 @@
-//===--- ASTSignals.h - LSP server -------------------------------*- C++-*-===//
+//===--- ASTSignals.h --------------------------------------------*- C++-*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.

From b4e083b0ef7ca86851b5b1d043004ae632a63f8d Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Thu, 14 Jan 2021 17:39:58 +0000
Subject: [PATCH 05/17] [gn build] Port 2f395b7092bd

---
 llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn        | 1 +
 .../gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn
index c07a61fe61e43..88a5195806180 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn
@@ -67,6 +67,7 @@ static_library("clangd") {
   ]
   sources = [
     "AST.cpp",
+    "ASTSignals.cpp",
     "ClangdLSPServer.cpp",
     "ClangdServer.cpp",
     "CodeComplete.cpp",
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn
index 34ba224fa7e03..26cc183b132eb 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn
@@ -42,6 +42,7 @@ unittest("ClangdTests") {
     target_gen_dir,
   ]
   sources = [
+    "ASTSignalsTests.cpp",
     "ASTTests.cpp",
     "Annotations.cpp",
     "BackgroundIndexTests.cpp",

From 4864d9f7e91fdd58a84e4ae576f1ad16f71f9d91 Mon Sep 17 00:00:00 2001
From: peter klausler <pklausler@nvidia.com>
Date: Wed, 13 Jan 2021 14:12:23 -0800
Subject: [PATCH 06/17] [flang] Fix some module file issues exposed by Whizard

Generic type-bound interfaces for user-defined operators need to be formatted
as "OPERATOR(.op.)", not just ".op."

PRIVATE generics need to be marked as such.

Declaration ordering: when a generic interface shadows a
derived type of the same name, it needs to be emitted to the
module file at the point of definition of the derived type;
otherwise, the derived type's definition may appear after its
first use.

The module symbol for a module read from a module file needs
to be marked as coming from a module file before semantic
processing is performed on the contents of the module so that
any special handling for declarations in module files can be
properly activated.

IMPORT statements were sometimes missing for use-associated
symbols in surrounding scopes; fine-tune NeedImport().

Differential Revision: https://reviews.llvm.org/D94636
---
 flang/lib/Semantics/mod-file.cpp   | 79 +++++++++++++++++++++---------
 flang/test/Semantics/modfile35.f90 |  6 +--
 flang/test/Semantics/modfile37.f90 | 32 ++++++++++++
 flang/test/Semantics/modfile38.f90 | 35 +++++++++++++
 4 files changed, 125 insertions(+), 27 deletions(-)
 create mode 100644 flang/test/Semantics/modfile37.f90
 create mode 100644 flang/test/Semantics/modfile38.f90

diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp
index 23733f944d8cd..af3267a1c9a08 100644
--- a/flang/lib/Semantics/mod-file.cpp
+++ b/flang/lib/Semantics/mod-file.cpp
@@ -198,6 +198,15 @@ bool ModFileWriter::PutSymbols(const Scope &scope) {
   }
 }
 
+static llvm::raw_ostream &PutGenericName(
+    llvm::raw_ostream &os, const Symbol &symbol) {
+  if (IsGenericDefinedOp(symbol)) {
+    return os << "operator(" << symbol.name() << ')';
+  } else {
+    return os << symbol.name();
+  }
+}
+
 // Emit a symbol to decls_, except for bindings in a derived type (type-bound
 // procedures, type-bound generics, final procedures) which go to typeBindings.
 void ModFileWriter::PutSymbol(
@@ -210,8 +219,8 @@ void ModFileWriter::PutSymbol(
                    if (symbol.owner().IsDerivedType()) {
                      // generic binding
                      for (const Symbol &proc : x.specificProcs()) {
-                       typeBindings << "generic::" << symbol.name() << "=>"
-                                    << proc.name() << '\n';
+                       PutGenericName(typeBindings << "generic::", symbol)
+                           << "=>" << proc.name() << '\n';
                      }
                    } else {
                      PutGeneric(symbol);
@@ -392,15 +401,6 @@ static bool IsIntrinsicOp(const Symbol &symbol) {
   }
 }
 
-static llvm::raw_ostream &PutGenericName(
-    llvm::raw_ostream &os, const Symbol &symbol) {
-  if (IsGenericDefinedOp(symbol)) {
-    return os << "operator(" << symbol.name() << ')';
-  } else {
-    return os << symbol.name();
-  }
-}
-
 void ModFileWriter::PutGeneric(const Symbol &symbol) {
   const auto &genericOwner{symbol.owner()};
   auto &details{symbol.get<GenericDetails>()};
@@ -427,9 +427,11 @@ void ModFileWriter::PutUse(const Symbol &symbol) {
     PutGenericName(uses_ << "=>", use);
   }
   uses_ << '\n';
-  PutUseExtraAttr(Attr::PRIVATE, symbol, use);
   PutUseExtraAttr(Attr::VOLATILE, symbol, use);
   PutUseExtraAttr(Attr::ASYNCHRONOUS, symbol, use);
+  if (symbol.attrs().test(Attr::PRIVATE)) {
+    PutGenericName(useExtraAttrs_ << "private::", symbol) << '\n';
+  }
 }
 
 // We have "USE local => use" in this module. If attr was added locally
@@ -442,6 +444,31 @@ void ModFileWriter::PutUseExtraAttr(
   }
 }
 
+// When a generic interface has the same name as a derived type
+// in the same scope, the generic shadows the derived type.
+// If the derived type were declared first, emit the generic
+// interface at the position of derived type's declaration.
+// (ReplaceName() is not used for this purpose because doing so
+// would confusingly position error messages pertaining to the generic
+// interface upon the derived type's declaration.)
+static inline SourceName NameInModuleFile(const Symbol &symbol) {
+  if (const auto *generic{symbol.detailsIf<GenericDetails>()}) {
+    if (const auto *derivedTypeOverload{generic->derivedType()}) {
+      if (derivedTypeOverload->name().begin() < symbol.name().begin()) {
+        return derivedTypeOverload->name();
+      }
+    }
+  } else if (const auto *use{symbol.detailsIf<UseDetails>()}) {
+    if (use->symbol().attrs().test(Attr::PRIVATE)) {
+      // Avoid the use in sorting of names created to access private
+      // specific procedures as a result of generic resolution;
+      // they're not in the cooked source.
+      return use->symbol().name();
+    }
+  }
+  return symbol.name();
+}
+
 // Collect the symbols of this scope sorted by their original order, not name.
 // Namelists are an exception: they are sorted after other symbols.
 void CollectSymbols(
@@ -465,7 +492,7 @@ void CollectSymbols(
   // Sort most symbols by name: use of Symbol::ReplaceName ensures the source
   // location of a symbol's name is the first "real" use.
   std::sort(sorted.begin(), sorted.end(), [](SymbolRef x, SymbolRef y) {
-    return x->name().begin() < y->name().begin();
+    return NameInModuleFile(x).begin() < NameInModuleFile(y).begin();
   });
   sorted.insert(sorted.end(), namelist.begin(), namelist.end());
   for (const auto &pair : scope.commonBlocks()) {
@@ -819,13 +846,15 @@ Scope *ModFileReader::Read(const SourceName &name, Scope *ancestor) {
   } else {
     parentScope = ancestor;
   }
-  ResolveNames(context_, *parseTree);
-  const auto &it{parentScope->find(name)};
-  if (it == parentScope->end()) {
+  auto pair{parentScope->try_emplace(name, UnknownDetails{})};
+  if (!pair.second) {
     return nullptr;
   }
-  auto &modSymbol{*it->second};
+  Symbol &modSymbol{*pair.first->second};
   modSymbol.set(Symbol::Flag::ModFile);
+  ResolveNames(context_, *parseTree);
+  CHECK(modSymbol.has<ModuleDetails>());
+  CHECK(modSymbol.test(Symbol::Flag::ModFile));
   return modSymbol.scope();
 }
 
@@ -974,14 +1003,16 @@ bool SubprogramSymbolCollector::NeedImport(
     const SourceName &name, const Symbol &symbol) {
   if (!isInterface_) {
     return false;
-  } else if (symbol.owner() != scope_.parent()) {
-    // detect import from parent of use-associated symbol
-    // can be null in the case of a use-associated derived type's parent type
-    const auto *found{scope_.FindSymbol(name)};
-    CHECK(found || symbol.has<DerivedTypeDetails>());
-    return found && found->has<UseDetails>() && found->owner() != scope_;
-  } else {
+  } else if (symbol.owner().Contains(scope_)) {
     return true;
+  } else if (const Symbol * found{scope_.FindSymbol(name)}) {
+    // detect import from ancestor of use-associated symbol
+    return found->has<UseDetails>() && found->owner() != scope_;
+  } else {
+    // "found" can be null in the case of a use-associated derived type's parent
+    // type
+    CHECK(symbol.has<DerivedTypeDetails>());
+    return false;
   }
 }
 
diff --git a/flang/test/Semantics/modfile35.f90 b/flang/test/Semantics/modfile35.f90
index 928b6c5472bba..1c50bfed25e3c 100644
--- a/flang/test/Semantics/modfile35.f90
+++ b/flang/test/Semantics/modfile35.f90
@@ -205,13 +205,13 @@ subroutine test4(x, y, a)
 !  contains
 !    procedure,pass(x)::p1=>f1
 !    procedure::p3=>f3
-!    generic::.binary.=>p1
-!    generic::.unary.=>p3
+!    generic::operator(.binary.)=>p1
+!    generic::operator(.unary.)=>p3
 !  end type
 !  type,extends(t1)::t2
 !  contains
 !    procedure,pass(y)::p2=>f2
-!    generic::.binary.=>p2
+!    generic::operator(.binary.)=>p2
 !  end type
 !contains
 !  pure function f1(x,y)
diff --git a/flang/test/Semantics/modfile37.f90 b/flang/test/Semantics/modfile37.f90
new file mode 100644
index 0000000000000..61f64fbb9169c
--- /dev/null
+++ b/flang/test/Semantics/modfile37.f90
@@ -0,0 +1,32 @@
+! RUN: %S/test_modfile.sh %s %t %f18
+
+! Ensure that a dummy procedure's interface's imports
+! appear in the module file.
+
+module m
+  type :: t
+  end type
+ contains
+  subroutine s1(s2)
+    interface
+      subroutine s2(x)
+        import
+        class(t) :: x
+      end subroutine
+    end interface
+  end subroutine
+end module
+!Expect: m.mod
+!module m
+!type::t
+!end type
+!contains
+!subroutine s1(s2)
+!interface
+!subroutine s2(x)
+!import::t
+!class(t)::x
+!end
+!end interface
+!end
+!end
diff --git a/flang/test/Semantics/modfile38.f90 b/flang/test/Semantics/modfile38.f90
new file mode 100644
index 0000000000000..c234568444b3d
--- /dev/null
+++ b/flang/test/Semantics/modfile38.f90
@@ -0,0 +1,35 @@
+! RUN: %S/test_modfile.sh %s %t %f18
+
+! Ensure that an interface with the same name as a derived type
+! does not cause that shadowed name to be emitted later than its
+! uses in the module file.
+
+module m
+  type :: t
+  end type
+  type :: t2
+    type(t) :: c
+  end type
+  interface t
+    module procedure f
+  end interface
+ contains
+  type(t) function f
+  end function
+end module
+
+!Expect: m.mod
+!module m
+!interface t
+!procedure::f
+!end interface
+!type::t
+!end type
+!type::t2
+!type(t)::c
+!end type
+!contains
+!function f()
+!type(t)::f
+!end
+!end

From 35c8a6cbf5ff0b525e2c01e5d746067bdda1dde7 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Wed, 13 Jan 2021 10:39:25 -0800
Subject: [PATCH 07/17] [NFC] Disallow unused prefixes under MC/AArch64

Differential Revision: https://reviews.llvm.org/D94616
---
 llvm/test/MC/AArch64/armv8.7a-ls64.s | 4 ++--
 llvm/test/MC/AArch64/armv8.7a-xs.s   | 6 +++---
 llvm/test/MC/AArch64/lit.local.cfg   | 8 ++++++++
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/llvm/test/MC/AArch64/armv8.7a-ls64.s b/llvm/test/MC/AArch64/armv8.7a-ls64.s
index c647ecce53f09..d4684e38cbea2 100644
--- a/llvm/test/MC/AArch64/armv8.7a-ls64.s
+++ b/llvm/test/MC/AArch64/armv8.7a-ls64.s
@@ -1,7 +1,7 @@
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+ls64 < %s 2>%t | FileCheck %s
-// RUN: FileCheck --check-prefix=CHECK-ERR --check-prefix=CHECK-LS64-ERR %s < %t
+// RUN: FileCheck --check-prefix=CHECK-ERR %s < %t
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t
-// RUN: FileCheck --check-prefix=CHECK-ERR --check-prefix=CHECK-NO-LS64-ERR %s < %t
+// RUN: FileCheck --check-prefixes=CHECK-ERR,CHECK-NO-LS64-ERR %s < %t
 
   ld64b x0, [x13]
   st64b x14, [x13]
diff --git a/llvm/test/MC/AArch64/armv8.7a-xs.s b/llvm/test/MC/AArch64/armv8.7a-xs.s
index 6193c1f15f534..e3a1e12aae9a5 100644
--- a/llvm/test/MC/AArch64/armv8.7a-xs.s
+++ b/llvm/test/MC/AArch64/armv8.7a-xs.s
@@ -1,9 +1,9 @@
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a,+xs < %s 2>%t | FileCheck %s
-// RUN: FileCheck --check-prefix=CHECK-ERR --check-prefix=CHECK-XS-ERR %s < %t
+// RUN: FileCheck --check-prefix=CHECK-ERR %s < %t
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.7a < %s 2>%t | FileCheck %s
-// RUN: FileCheck --check-prefix=CHECK-ERR --check-prefix=CHECK-XS-ERR %s < %t
+// RUN: FileCheck --check-prefix=CHECK-ERR %s < %t
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.4a < %s 2> %t
-// RUN: FileCheck --check-prefix=CHECK-ERR --check-prefix=CHECK-NO-XS-ERR %s < %t
+// RUN: FileCheck --check-prefixes=CHECK-ERR,CHECK-NO-XS-ERR %s < %t
 
   dsb #16
   dsb #20
diff --git a/llvm/test/MC/AArch64/lit.local.cfg b/llvm/test/MC/AArch64/lit.local.cfg
index 5822b72266874..ab829130e2696 100644
--- a/llvm/test/MC/AArch64/lit.local.cfg
+++ b/llvm/test/MC/AArch64/lit.local.cfg
@@ -1,2 +1,10 @@
+from lit.llvm.subst import ToolSubst
+
 if 'AArch64' not in config.root.targets:
     config.unsupported = True
+
+fc = ToolSubst('FileCheck', unresolved='fatal')
+# Insert this first. Then, we'll first update the blank FileCheck command; then,
+# the default substitution of FileCheck will replace it to its full path.
+config.substitutions.insert(0, (fc.regex,
+    'FileCheck --allow-unused-prefixes=false'))

From a03ffa98503bb6d5a990e61df060ed480c3e3f3b Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Wed, 13 Jan 2021 21:46:25 -0800
Subject: [PATCH 08/17] [NewPM] Fix placement of LoopFlatten

https://reviews.llvm.org/D90402 was inconsistent with where it put
LoopFlatten between the two pass managers. It also missed adding it to
the non-O1 function simplification pipeline.

PR48738

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D94650
---
 llvm/lib/Passes/PassBuilder.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index d5c0c47bd9a64..7f3f132ab82b1 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -562,8 +562,6 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
   LPM1.addPass(SimpleLoopUnswitchPass());
 
-  if (EnableLoopFlatten)
-    FPM.addPass(LoopFlattenPass());
   LPM2.addPass(LoopIdiomRecognizePass());
   LPM2.addPass(IndVarSimplifyPass());
 
@@ -594,6 +592,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
       DebugLogging));
   FPM.addPass(SimplifyCFGPass());
   FPM.addPass(InstCombinePass());
+  if (EnableLoopFlatten)
+    FPM.addPass(LoopFlattenPass());
   // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
   // *All* loop passes must preserve it, in order to be able to use it.
   FPM.addPass(createFunctionToLoopPassAdaptor(
@@ -756,6 +756,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
       DebugLogging));
   FPM.addPass(SimplifyCFGPass());
   FPM.addPass(InstCombinePass());
+  if (EnableLoopFlatten)
+    FPM.addPass(LoopFlattenPass());
   // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
   // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
   // *All* loop passes must preserve it, in order to be able to use it.

From b99782cf7850a481fa36fd95ae04923739e0da6d Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 14 Jan 2021 17:51:06 +0000
Subject: [PATCH 09/17] [X86][AVX] Adjust unsigned saturation downconvert
 negative test

D87145 was showing that this test (added in D45315) could always be constant folded (with suitable value tracking).

What we actually needed was smax(smin()) negative test coverage, the invert of negative_test2_smax_usat_trunc_wb_256_mem, so I've tweaked the test to provide that instead.
---
 llvm/test/CodeGen/X86/avx512-trunc.ll | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/llvm/test/CodeGen/X86/avx512-trunc.ll b/llvm/test/CodeGen/X86/avx512-trunc.ll
index 0b2a47c2772cc..d61ada4e5d055 100644
--- a/llvm/test/CodeGen/X86/avx512-trunc.ll
+++ b/llvm/test/CodeGen/X86/avx512-trunc.ll
@@ -1007,10 +1007,8 @@ define <16 x i16> @smax_usat_trunc_dw_512(<16 x i32> %i) {
 define void @negative_test1_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
 ; KNL-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
-; KNL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; KNL-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    vpminsw {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT:    vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
 ; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
 ; KNL-NEXT:    vpmovdb %zmm0, (%rdi)
 ; KNL-NEXT:    vzeroupper
@@ -1018,17 +1016,15 @@ define void @negative_test1_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>*
 ;
 ; SKX-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; SKX-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
-; SKX-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; SKX-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
+; SKX-NEXT:    vpminsw {{.*}}(%rip), %ymm0, %ymm0
+; SKX-NEXT:    vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
 ; SKX-NEXT:    vpmovwb %ymm0, (%rdi)
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
-  %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
-  %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
-  %x3 = icmp slt <16 x i16> %x2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-  %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %x1 = icmp slt <16 x i16> %i, <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
+  %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
+  %x3 = icmp sgt <16 x i16> %x2, <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
+  %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
   %x6 = trunc <16 x i16> %x5 to <16 x i8>
   store <16 x i8> %x6, <16 x i8>* %res, align 1
   ret void

From 5d165f0b893d4fc5fb5caeb2b05c566dd26e4d89 Mon Sep 17 00:00:00 2001
From: Jon Chesterfield <jonathanchesterfield@gmail.com>
Date: Thu, 14 Jan 2021 18:13:22 +0000
Subject: [PATCH 10/17] [libomptarget][amdgpu] Fix kernel launch tracing to
 match previous behavior

Restore control of kernel launch tracing to be >= 1 as it was before

export LIBOMPTARGET_KERNEL_TRACE=1

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D94695
---
 openmp/libomptarget/plugins/amdgpu/src/rtl.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index bd450f9898faf..9453171e13780 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -1762,7 +1762,7 @@ int32_t __tgt_rtl_run_target_team_region_locked(
                 loop_tripcount, // From run_region arg
                 KernelInfo->device_id);
 
-  if (print_kernel_trace == 4)
+  if (print_kernel_trace >= 1)
     // enum modes are SPMD, GENERIC, NONE 0,1,2
     fprintf(stderr,
             "DEVID:%2d SGN:%1d ConstWGSize:%-4d args:%2d teamsXthrds:(%4dX%4d) "

From 868da2ea939baf8c71a6dcb878cf6094ede9486e Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Thu, 3 Sep 2020 19:22:33 +0100
Subject: [PATCH 11/17] [SelectionDAG] Remove an early-out from
 computeKnownBits for smin/smax

Even if we know nothing about LHS, it can still be useful to know that
smax(LHS, RHS) >= RHS and smin(LHS, RHS) <= RHS.

Differential Revision: https://reviews.llvm.org/D87145
---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  1 -
 llvm/test/CodeGen/X86/known-bits-vector.ll     | 12 ++----------
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7084ab68524b5..82da553954d2f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3416,7 +3416,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
     }
 
     Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-    if (Known.isUnknown()) break; // Early-out
     Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
     if (IsMax)
       Known = KnownBits::smax(Known, Known2);
diff --git a/llvm/test/CodeGen/X86/known-bits-vector.ll b/llvm/test/CodeGen/X86/known-bits-vector.ll
index 3b6912a9d9461..05bf984101abc 100644
--- a/llvm/test/CodeGen/X86/known-bits-vector.ll
+++ b/llvm/test/CodeGen/X86/known-bits-vector.ll
@@ -435,11 +435,7 @@ define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) {
 ; X32-NEXT:    vpminsd {{\.LCPI.*}}, %xmm0, %xmm0
 ; X32-NEXT:    vpmaxsd {{\.LCPI.*}}, %xmm0, %xmm0
 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
-; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
-; X32-NEXT:    vpsrld $16, %xmm0, %xmm0
-; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
-; X32-NEXT:    vsubps {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: knownbits_smax_smin_shuffle_uitofp:
@@ -447,11 +443,7 @@ define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) {
 ; X64-NEXT:    vpminsd {{.*}}(%rip), %xmm0, %xmm0
 ; X64-NEXT:    vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
-; X64-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
-; X64-NEXT:    vpsrld $16, %xmm0, %xmm0
-; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
-; X64-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
-; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
 ; X64-NEXT:    retq
   %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> <i32 0, i32 -65535, i32 -65535, i32 0>)
   %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 131071>)

From 85dfcaadc5f0920dc8ecbece6c786701b8f45ab4 Mon Sep 17 00:00:00 2001
From: Joseph Tremoulet <jotrem@microsoft.com>
Date: Thu, 14 Jan 2021 13:17:02 -0500
Subject: [PATCH 12/17] [LLDB] MinidumpParser: Prefer executable module even at
 higher address

When a program maps one of its own modules for reading, and then
crashes, breakpad can emit two entries for that module in the
ModuleList.  We have logic to identify this case by checking permissions
on mapped memory regions and report just the module with an executable
region.  As currently written, though, the check is asymmetric -- the
entry with the executable region must be the second one encountered for
the preference to kick in.

This change makes the logic symmetric, so that the first-encountered
module will similarly be preferred if it has an executable region but
the second-encountered module does not.  This happens for example when
the module in question is the executable itself, which breakpad likes to
report first -- we need to ignore the other entry for that module when
we see it later, even though it may be mapped at a lower virtual
address.

Reviewed By: clayborg

Differential Revision: https://reviews.llvm.org/D94629
---
 .../Process/minidump/MinidumpParser.cpp       | 26 +++++++-----
 .../Process/minidump/MinidumpParserTest.cpp   | 41 +++++++++++++++++++
 2 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp b/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp
index e16f86cca1c21..61106ebcc4303 100644
--- a/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp
+++ b/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp
@@ -391,19 +391,23 @@ std::vector<const minidump::Module *> MinidumpParser::GetFilteredModuleList() {
       filtered_modules.push_back(&module);
     } else {
       // We have a duplicate module entry. Check the linux regions to see if
-      // the module we already have is not really a mapped executable. If it
-      // isn't check to see if the current duplicate module entry is a real
-      // mapped executable, and if so, replace it. This can happen when a
-      // process mmap's in the file for an executable in order to read bytes
-      // from the executable file. A memory region mapping will exist for the
-      // mmap'ed version and for the loaded executable, but only one will have
-      // a consecutive region that is executable in the memory regions.
+      // either module is not really a mapped executable. If one but not the
+      // other is a real mapped executable, prefer the executable one. This
+      // can happen when a process mmap's in the file for an executable in
+      // order to read bytes from the executable file. A memory region mapping
+      // will exist for the mmap'ed version and for the loaded executable, but
+      // only one will have a consecutive region that is executable in the
+      // memory regions.
       auto dup_module = filtered_modules[iter->second];
       ConstString name(*ExpectedName);
-      if (!CheckForLinuxExecutable(name, linux_regions,
-                                   dup_module->BaseOfImage) &&
-          CheckForLinuxExecutable(name, linux_regions, module.BaseOfImage)) {
-        filtered_modules[iter->second] = &module;
+      bool is_executable =
+          CheckForLinuxExecutable(name, linux_regions, module.BaseOfImage);
+      bool dup_is_executable =
+          CheckForLinuxExecutable(name, linux_regions, dup_module->BaseOfImage);
+
+      if (is_executable != dup_is_executable) {
+        if (is_executable)
+          filtered_modules[iter->second] = &module;
         continue;
       }
       // This module has been seen. Modules are sometimes mentioned multiple
diff --git a/lldb/unittests/Process/minidump/MinidumpParserTest.cpp b/lldb/unittests/Process/minidump/MinidumpParserTest.cpp
index 69046af283eba..e3f23c5fe33ad 100644
--- a/lldb/unittests/Process/minidump/MinidumpParserTest.cpp
+++ b/lldb/unittests/Process/minidump/MinidumpParserTest.cpp
@@ -792,6 +792,47 @@ TEST_F(MinidumpParserTest, MinidumpDuplicateModuleMappedSecond) {
   EXPECT_EQ(0x400d0000u, filtered_modules[0]->BaseOfImage);
 }
 
+TEST_F(MinidumpParserTest, MinidumpDuplicateModuleMappedSecondHigh) {
+  ASSERT_THAT_ERROR(SetUpFromYaml(R"(
+--- !minidump
+Streams:
+  - Type:            ModuleList
+    Modules:
+      - Base of Image:   0x400d3000
+        Size of Image:   0x00002000
+        Module Name:     '/usr/lib/libc.so'
+        CodeView Record: ''
+      - Base of Image:   0x400d0000
+        Size of Image:   0x00001000
+        Module Name:     '/usr/lib/libc.so'
+        CodeView Record: ''
+  - Type:            LinuxMaps
+    Text:             |
+      400d0000-400d2000 r--p 00000000 b3:04 227        /usr/lib/libc.so
+      400d2000-400d3000 rw-p 00000000 00:00 0
+      400d3000-400d4000 r-xp 00010000 b3:04 227        /usr/lib/libc.so
+      400d4000-400d5000 rwxp 00001000 b3:04 227        /usr/lib/libc.so
+...
+)"),
+                    llvm::Succeeded());
+  // If we have a module mentioned twice in the module list, and we have full
+  // linux maps for all of the memory regions, make sure we pick the one that
+  // has a consecutive region with a matching path that has executable
+  // permissions. If clients open an object file with mmap, breakpad can create
+  // multiple mappings for a library errnoneously and the lowest address isn't
+  // always the right address. In this case we check the consective memory
+  // regions whose path matches starting at the base of image address and make
+  // sure one of the regions is executable and prefer that one.
+  //
+  // This test will make sure that if the executable is first in the module
+  // list, that it will remain the correctly selected module in the filtered
+  // list, even if the non-executable module was loaded at a lower base address.
+  std::vector<const minidump::Module *> filtered_modules =
+      parser->GetFilteredModuleList();
+  ASSERT_EQ(1u, filtered_modules.size());
+  EXPECT_EQ(0x400d3000u, filtered_modules[0]->BaseOfImage);
+}
+
 TEST_F(MinidumpParserTest, MinidumpDuplicateModuleSeparateCode) {
   ASSERT_THAT_ERROR(SetUpFromYaml(R"(
 --- !minidump

From be40c12040a0d5551bf3430cbb184b5ef23e25fd Mon Sep 17 00:00:00 2001
From: Aaron En Ye Shi <enye.shi@gmail.com>
Date: Thu, 14 Jan 2021 17:52:27 +0000
Subject: [PATCH 13/17] [HIP] Add signbit(long double) decl

An _MSC_VER version of signbit(long double) is required for MSVC headers.

Fixes: SWDEV-256409

Differential Revision: https://reviews.llvm.org/D93062
---
 clang/lib/Headers/__clang_cuda_math_forward_declares.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/lib/Headers/__clang_cuda_math_forward_declares.h b/clang/lib/Headers/__clang_cuda_math_forward_declares.h
index 8a270859e4a58..c0f1f47cc9930 100644
--- a/clang/lib/Headers/__clang_cuda_math_forward_declares.h
+++ b/clang/lib/Headers/__clang_cuda_math_forward_declares.h
@@ -160,6 +160,9 @@ __DEVICE__ double scalbln(double, long);
 __DEVICE__ float scalbln(float, long);
 __DEVICE__ double scalbn(double, int);
 __DEVICE__ float scalbn(float, int);
+#ifdef _MSC_VER
+__DEVICE__ bool signbit(long double);
+#endif
 __DEVICE__ bool signbit(double);
 __DEVICE__ bool signbit(float);
 __DEVICE__ double sin(double);

From 6ebeba88f51959d763a8f274cdfecea46d51d28c Mon Sep 17 00:00:00 2001
From: Arjun P <arjunpitchanathan@gmail.com>
Date: Thu, 14 Jan 2021 19:29:51 +0100
Subject: [PATCH 14/17] Support emptiness checks for unbounded
 FlatAffineConstraints.

With this, we have complete support for emptiness checks. This also paves the way for future support to check if two FlatAffineConstraints are equal.

Reviewed By: ftynse

Differential Revision: https://reviews.llvm.org/D94272
---
 mlir/include/mlir/Analysis/AffineStructures.h |   7 +
 mlir/include/mlir/Analysis/LinearTransform.h  |  48 +++++
 .../mlir/Analysis/Presburger/Fraction.h       |   2 +
 .../include/mlir/Analysis/Presburger/Matrix.h |   6 +
 .../mlir/Analysis/Presburger/Simplex.h        |  30 ++-
 mlir/lib/Analysis/AffineStructures.cpp        | 151 ++++++++++++-
 mlir/lib/Analysis/CMakeLists.txt              |   2 +
 mlir/lib/Analysis/LinearTransform.cpp         | 156 ++++++++++++++
 mlir/lib/Analysis/Presburger/CMakeLists.txt   |   2 +-
 mlir/lib/Analysis/Presburger/Matrix.cpp       |  13 ++
 mlir/lib/Analysis/Presburger/Simplex.cpp      |  31 ++-
 .../Analysis/AffineStructuresTest.cpp         | 203 +++++++++++++++---
 mlir/unittests/Analysis/CMakeLists.txt        |   1 +
 .../Analysis/LinearTransformTest.cpp          |  87 ++++++++
 14 files changed, 697 insertions(+), 42 deletions(-)
 create mode 100644 mlir/include/mlir/Analysis/LinearTransform.h
 create mode 100644 mlir/lib/Analysis/LinearTransform.cpp
 create mode 100644 mlir/unittests/Analysis/LinearTransformTest.cpp

diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h
index 25071db100e32..fa80db7d4b639 100644
--- a/mlir/include/mlir/Analysis/AffineStructures.h
+++ b/mlir/include/mlir/Analysis/AffineStructures.h
@@ -13,6 +13,7 @@
 #ifndef MLIR_ANALYSIS_AFFINE_STRUCTURES_H
 #define MLIR_ANALYSIS_AFFINE_STRUCTURES_H
 
+#include "mlir/Analysis/Presburger/Matrix.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/Support/LogicalResult.h"
@@ -153,6 +154,12 @@ class FlatAffineConstraints {
   /// false if a solution exists or all tests were inconclusive.
   bool isIntegerEmpty() const;
 
+  // Returns a matrix where each row is a vector along which the polytope is
+  // bounded. The span of the returned vectors is guaranteed to contain all
+  // such vectors. The returned vectors are NOT guaranteed to be linearly
+  // independent. This function should not be called on empty sets.
+  Matrix getBoundedDirections() const;
+
   /// Find a sample point satisfying the constraints. This uses a branch and
   /// bound algorithm with generalized basis reduction, which always works if
   /// the set is bounded. This should not be called for unbounded sets.
diff --git a/mlir/include/mlir/Analysis/LinearTransform.h b/mlir/include/mlir/Analysis/LinearTransform.h
new file mode 100644
index 0000000000000..0850f5a006097
--- /dev/null
+++ b/mlir/include/mlir/Analysis/LinearTransform.h
@@ -0,0 +1,48 @@
+//===- LinearTransform.h - MLIR LinearTransform Class -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Support for linear transforms and applying them to FlatAffineConstraints.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_ANALYSIS_LINEARTRANSFORM_H
+#define MLIR_ANALYSIS_LINEARTRANSFORM_H
+
+#include "mlir/Analysis/AffineStructures.h"
+#include "mlir/Analysis/Presburger/Matrix.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace mlir {
+
+class LinearTransform {
+public:
+  explicit LinearTransform(Matrix &&oMatrix);
+  explicit LinearTransform(const Matrix &oMatrix);
+
+  // Returns a linear transform T such that MT is M in column echelon form.
+  // Also returns the number of non-zero columns in MT.
+  //
+  // Specifically, T is such that in every column the first non-zero row is
+  // strictly below that of the previous column, and all columns which have only
+  // zeros are at the end.
+  static std::pair<unsigned, LinearTransform>
+  makeTransformToColumnEchelon(Matrix m);
+
+  // Returns a FlatAffineConstraints having a constraint vector vT for every
+  // constraint vector v in fac, where T is this transform.
+  FlatAffineConstraints applyTo(const FlatAffineConstraints &fac);
+
+  // Post-multiply the given vector v with this transform, say T, returning vT.
+  SmallVector<int64_t, 8> applyTo(ArrayRef<int64_t> v);
+
+private:
+  Matrix matrix;
+};
+
+} // namespace mlir
+#endif // MLIR_ANALYSIS_LINEARTRANSFORM_H
diff --git a/mlir/include/mlir/Analysis/Presburger/Fraction.h b/mlir/include/mlir/Analysis/Presburger/Fraction.h
index 09996c486ef33..61b0915e559ec 100644
--- a/mlir/include/mlir/Analysis/Presburger/Fraction.h
+++ b/mlir/include/mlir/Analysis/Presburger/Fraction.h
@@ -64,6 +64,8 @@ inline bool operator<=(Fraction x, Fraction y) { return compare(x, y) <= 0; }
 
 inline bool operator==(Fraction x, Fraction y) { return compare(x, y) == 0; }
 
+inline bool operator!=(Fraction x, Fraction y) { return compare(x, y) != 0; }
+
 inline bool operator>(Fraction x, Fraction y) { return compare(x, y) > 0; }
 
 inline bool operator>=(Fraction x, Fraction y) { return compare(x, y) >= 0; }
diff --git a/mlir/include/mlir/Analysis/Presburger/Matrix.h b/mlir/include/mlir/Analysis/Presburger/Matrix.h
index 7bc29f81a8346..8ed40bb9c0266 100644
--- a/mlir/include/mlir/Analysis/Presburger/Matrix.h
+++ b/mlir/include/mlir/Analysis/Presburger/Matrix.h
@@ -58,6 +58,12 @@ class Matrix {
   /// Add `scale` multiples of the source row to the target row.
   void addToRow(unsigned sourceRow, unsigned targetRow, int64_t scale);
 
+  /// Add `scale` multiples of the source column to the target column.
+  void addToColumn(unsigned sourceColumn, unsigned targetColumn, int64_t scale);
+
+  /// Negate the specified column.
+  void negateColumn(unsigned column);
+
   /// Resize the matrix to the specified dimensions. If a dimension is smaller,
   /// the values are truncated; if it is bigger, the new values are default
   /// initialized.
diff --git a/mlir/include/mlir/Analysis/Presburger/Simplex.h b/mlir/include/mlir/Analysis/Presburger/Simplex.h
index 05d241e60958c..370035cbc7ba1 100644
--- a/mlir/include/mlir/Analysis/Presburger/Simplex.h
+++ b/mlir/include/mlir/Analysis/Presburger/Simplex.h
@@ -17,10 +17,12 @@
 #include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Analysis/Presburger/Fraction.h"
 #include "mlir/Analysis/Presburger/Matrix.h"
+#include "mlir/IR/Location.h"
 #include "mlir/Support/LogicalResult.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace mlir {
@@ -84,7 +86,7 @@ class GBRSimplex;
 ///
 /// The unknowns in row position are represented in terms of the basis unknowns.
 /// If the basis unknowns are u_1, u_2, ... u_m, and a row in the tableau is
-/// d, c, a_1, a_2, ... a_m, this representats the unknown for that row as
+/// d, c, a_1, a_2, ... a_m, this represents the unknown for that row as
 /// (c + a_1*u_1 + a_2*u_2 + ... + a_m*u_m)/d. In our running example, if the
 /// basis is the initial basis of x, y, then the constraint 1 + 2x + 3y >= 0
 /// would be represented by the row [1, 1, 2, 3].
@@ -173,20 +175,25 @@ class Simplex {
   void intersectFlatAffineConstraints(const FlatAffineConstraints &fac);
 
   /// Compute the maximum or minimum value of the given row, depending on
-  /// direction. The specified row is never pivoted.
+  /// direction. The specified row is never pivoted. On return, the row may
+  /// have a negative sample value if the direction is down.
   ///
-  /// Returns a (num, den) pair denoting the optimum, or None if no
-  /// optimum exists, i.e., if the expression is unbounded in this direction.
+  /// Returns a Fraction denoting the optimum, or a null value if no optimum
+  /// exists, i.e., if the expression is unbounded in this direction.
   Optional<Fraction> computeRowOptimum(Direction direction, unsigned row);
 
   /// Compute the maximum or minimum value of the given expression, depending on
-  /// direction.
+  /// direction. Should not be called when the Simplex is empty.
   ///
-  /// Returns a (num, den) pair denoting the optimum, or a null value if no
-  /// optimum exists, i.e., if the expression is unbounded in this direction.
+  /// Returns a Fraction denoting the optimum, or a null value if no optimum
+  /// exists, i.e., if the expression is unbounded in this direction.
   Optional<Fraction> computeOptimum(Direction direction,
                                     ArrayRef<int64_t> coeffs);
 
+  /// Returns whether the perpendicular of the specified constraint is a
+  /// is a direction along which the polytope is bounded.
+  bool isBoundedAlongConstraint(unsigned constraintIndex);
+
   /// Returns whether the specified constraint has been marked as redundant.
   /// Constraints are numbered from 0 starting at the first added inequality.
   /// Equalities are added as a pair of inequalities and so correspond to two
@@ -299,6 +306,15 @@ class Simplex {
   /// sample value, false otherwise.
   LogicalResult restoreRow(Unknown &u);
 
+  /// Compute the maximum or minimum of the specified Unknown, depending on
+  /// direction. The specified unknown may be pivoted. If the unknown is
+  /// restricted, it will have a non-negative sample value on return.
+  /// Should not be called if the Simplex is empty.
+  ///
+  /// Returns a Fraction denoting the optimum, or a null value if no optimum
+  /// exists, i.e., if the expression is unbounded in this direction.
+  Optional<Fraction> computeOptimum(Direction direction, Unknown &u);
+
   /// Mark the specified unknown redundant. This operation is added to the undo
   /// log and will be undone by rollbacks. The specified unknown must be in row
   /// orientation.
diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp
index 51141e6f61841..12c90fbcfc54c 100644
--- a/mlir/lib/Analysis/AffineStructures.cpp
+++ b/mlir/lib/Analysis/AffineStructures.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Analysis/AffineStructures.h"
+#include "mlir/Analysis/LinearTransform.h"
 #include "mlir/Analysis/Presburger/Simplex.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
@@ -20,6 +21,7 @@
 #include "mlir/Support/LLVM.h"
 #include "mlir/Support/MathExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -1034,21 +1036,152 @@ bool FlatAffineConstraints::isEmptyByGCDTest() const {
   return false;
 }
 
-// First, try the GCD test heuristic.
+// Returns a matrix where each row is a vector along which the polytope is
+// bounded. The span of the returned vectors is guaranteed to contain all
+// such vectors. The returned vectors are NOT guaranteed to be linearly
+// independent. This function should not be called on empty sets.
 //
-// If that doesn't find the set empty, check if the set is unbounded. If it is,
-// we cannot use the GBR algorithm and we conservatively return false.
-//
-// If the set is bounded, we use the complete emptiness check for this case
-// provided by Simplex::findIntegerSample(), which gives a definitive answer.
+// It is sufficient to check the perpendiculars of the constraints, as the set
+// of perpendiculars which are bounded must span all bounded directions.
+Matrix FlatAffineConstraints::getBoundedDirections() const {
+  // Note that it is necessary to add the equalities too (which the constructor
+  // does) even though we don't need to check if they are bounded; whether an
+  // inequality is bounded or not depends on what other constraints, including
+  // equalities, are present.
+  Simplex simplex(*this);
+
+  assert(!simplex.isEmpty() && "It is not meaningful to ask whether a "
+                               "direction is bounded in an empty set.");
+
+  SmallVector<unsigned, 8> boundedIneqs;
+  // The constructor adds the inequalities to the simplex first, so this
+  // processes all the inequalities.
+  for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) {
+    if (simplex.isBoundedAlongConstraint(i))
+      boundedIneqs.push_back(i);
+  }
+
+  // The direction vector is given by the coefficients and does not include the
+  // constant term, so the matrix has one fewer column.
+  unsigned dirsNumCols = getNumCols() - 1;
+  Matrix dirs(boundedIneqs.size() + getNumEqualities(), dirsNumCols);
+
+  // Copy the bounded inequalities.
+  unsigned row = 0;
+  for (unsigned i : boundedIneqs) {
+    for (unsigned col = 0; col < dirsNumCols; ++col)
+      dirs(row, col) = atIneq(i, col);
+    ++row;
+  }
+
+  // Copy the equalities. All the equalities' perpendiculars are bounded.
+  for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) {
+    for (unsigned col = 0; col < dirsNumCols; ++col)
+      dirs(row, col) = atEq(i, col);
+    ++row;
+  }
+
+  return dirs;
+}
+
+bool eqInvolvesSuffixDims(const FlatAffineConstraints &fac, unsigned eqIndex,
+                          unsigned numDims) {
+  for (unsigned e = fac.getNumDimIds(), j = e - numDims; j < e; ++j)
+    if (fac.atEq(eqIndex, j) != 0)
+      return true;
+  return false;
+}
+bool ineqInvolvesSuffixDims(const FlatAffineConstraints &fac,
+                            unsigned ineqIndex, unsigned numDims) {
+  for (unsigned e = fac.getNumDimIds(), j = e - numDims; j < e; ++j)
+    if (fac.atIneq(ineqIndex, j) != 0)
+      return true;
+  return false;
+}
+
+void removeConstraintsInvolvingSuffixDims(FlatAffineConstraints &fac,
+                                          unsigned unboundedDims) {
+  // We iterate backwards so that whether we remove constraint i - 1 or not, the
+  // next constraint to be tested is always i - 2.
+  for (unsigned i = fac.getNumEqualities(); i > 0; i--)
+    if (eqInvolvesSuffixDims(fac, i - 1, unboundedDims))
+      fac.removeEquality(i - 1);
+  for (unsigned i = fac.getNumInequalities(); i > 0; i--)
+    if (ineqInvolvesSuffixDims(fac, i - 1, unboundedDims))
+      fac.removeInequality(i - 1);
+}
+
+/// Let this set be S. If S is bounded then we directly call into the GBR
+/// sampling algorithm. Otherwise, there are some unbounded directions, i.e.,
+/// vectors v such that S extends to infininty along v or -v. In this case we
+/// use an algorithm described in the integer set library (isl) manual and used
+/// by the isl_set_sample function in that library. The algorithm is:
+///
+/// 1) Apply a unimodular transform T to S to obtain S*T, such that all
+/// dimensions in which S*T is bounded lie in the linear span of a prefix of the
+/// dimensions.
+///
+/// 2) Construct a set transformedSet by removing all constraints that involve
+/// the unbounded dimensions and also deleting the unbounded dimensions. Note
+/// that this is a bounded set.
+///
+/// 3) Check if transformedSet is empty using the GBR sampling algorithm.
+///
+/// 4) return S is empty iff transformedSet is empty.
+///
+/// Since T is unimodular, a vector v is a solution to S*T iff T*v is a
+/// solution to S. The following is a sketch of a proof that S*T is empty
+/// iff transformedSet is empty:
+///
+/// If transformedSet is empty, then S*T is certainly empty since transformedSet
+/// was obtained by removing constraints and deleting dimensions from S*T.
+///
+/// If transformedSet contains a sample, consider the set C obtained by
+/// substituting the sample for the bounded dimensions of S*T. All the
+/// constraints of S*T that did not involve unbounded dimensions are
+/// satisfied by this substitution.
+///
+/// In step 1, all dimensions in the linear span of the dimensions outside the
+/// prefix are unbounded in S*T. Substituting values for the bounded dimensions
+/// cannot makes these dimensions bounded, and these are the only remaining
+/// dimensions in C, so C is unbounded along every vector. C is hence a
+/// full-dimensional cone and therefore always contains an integer point, which
+/// we can then substitute to get a full solution to S*T.
 bool FlatAffineConstraints::isIntegerEmpty() const {
+  // First, try the GCD test heuristic.
   if (isEmptyByGCDTest())
     return true;
 
   Simplex simplex(*this);
-  if (simplex.isUnbounded())
-    return false;
-  return !simplex.findIntegerSample().hasValue();
+  if (simplex.isEmpty())
+    return true;
+
+  // For a bounded set, we directly call into the GBR sampling algorithm.
+  if (!simplex.isUnbounded())
+    return !simplex.findIntegerSample().hasValue();
+
+  // The set is unbounded. We cannot directly use the GBR algorithm.
+  //
+  // m is a matrix containing, in each row, a vector in which S is
+  // bounded, such that the linear span of all these dimensions contains all
+  // bounded dimensions in S.
+  Matrix m = getBoundedDirections();
+  // In column echelon form, each row of m occupies only the first rank(m)
+  // columns and has zeros on the other columns. The transform T that brings S
+  // to column echelon form is unimodular as well, so this is a suitable
+  // transform to use in step 1 of the algorithm.
+  std::pair<unsigned, LinearTransform> result =
+      LinearTransform::makeTransformToColumnEchelon(std::move(m));
+  FlatAffineConstraints transformedSet = result.second.applyTo(*this);
+
+  unsigned numBoundedDims = result.first;
+  unsigned numUnboundedDims = getNumIds() - numBoundedDims;
+  removeConstraintsInvolvingSuffixDims(transformedSet, numUnboundedDims);
+
+  // Remove all the unbounded dimensions.
+  transformedSet.removeIdRange(numBoundedDims, transformedSet.getNumIds());
+
+  return !Simplex(transformedSet).findIntegerSample().hasValue();
 }
 
 Optional<SmallVector<int64_t, 8>>
diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt
index 3247ef1f56b02..585ba2aa8baf9 100644
--- a/mlir/lib/Analysis/CMakeLists.txt
+++ b/mlir/lib/Analysis/CMakeLists.txt
@@ -3,6 +3,7 @@ set(LLVM_OPTIONAL_SOURCES
   AffineStructures.cpp
   BufferAliasAnalysis.cpp
   CallGraph.cpp
+  LinearTransform.cpp
   Liveness.cpp
   LoopAnalysis.cpp
   NestedMatcher.cpp
@@ -36,6 +37,7 @@ add_mlir_library(MLIRAnalysis
 add_mlir_library(MLIRLoopAnalysis
   AffineAnalysis.cpp
   AffineStructures.cpp
+  LinearTransform.cpp
   LoopAnalysis.cpp
   NestedMatcher.cpp
   PresburgerSet.cpp
diff --git a/mlir/lib/Analysis/LinearTransform.cpp b/mlir/lib/Analysis/LinearTransform.cpp
new file mode 100644
index 0000000000000..7176cb01231f4
--- /dev/null
+++ b/mlir/lib/Analysis/LinearTransform.cpp
@@ -0,0 +1,156 @@
+//===- LinearTransform.cpp - MLIR LinearTransform Class -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Analysis/LinearTransform.h"
+#include "mlir/Analysis/AffineStructures.h"
+
+namespace mlir {
+
+LinearTransform::LinearTransform(Matrix &&oMatrix) : matrix(oMatrix) {}
+LinearTransform::LinearTransform(const Matrix &oMatrix) : matrix(oMatrix) {}
+
+// Set M(row, targetCol) to its remainder on division by M(row, sourceCol)
+// by subtracting from column targetCol an appropriate integer multiple of
+// sourceCol. This brings M(row, targetCol) to the range [0, M(row, sourceCol)).
+// Apply the same column operation to otherMatrix, with the same integer
+// multiple.
+static void modEntryColumnOperation(Matrix &m, unsigned row, unsigned sourceCol,
+                                    unsigned targetCol, Matrix &otherMatrix) {
+  assert(m(row, sourceCol) != 0 && "Cannot divide by zero!");
+  assert((m(row, sourceCol) > 0 && m(row, targetCol) > 0) &&
+         "Operands must be positive!");
+  int64_t ratio = m(row, targetCol) / m(row, sourceCol);
+  m.addToColumn(sourceCol, targetCol, -ratio);
+  otherMatrix.addToColumn(sourceCol, targetCol, -ratio);
+}
+
+std::pair<unsigned, LinearTransform>
+LinearTransform::makeTransformToColumnEchelon(Matrix m) {
+  // We start with an identity result matrix and perform operations on m
+  // until m is in column echelon form. We apply the same sequence of operations
+  // on resultMatrix to obtain a transform that takes m to column echelon
+  // form.
+  Matrix resultMatrix = Matrix::identity(m.getNumColumns());
+
+  unsigned echelonCol = 0;
+  // Invariant: in all rows above row, all columns from echelonCol onwards
+  // are all zero elements. In an iteration, if the curent row has any non-zero
+  // elements echelonCol onwards, we bring one to echelonCol and use it to
+  // make all elements echelonCol + 1 onwards zero.
+  for (unsigned row = 0; row < m.getNumRows(); ++row) {
+    // Search row for a non-empty entry, starting at echelonCol.
+    unsigned nonZeroCol = echelonCol;
+    for (unsigned e = m.getNumColumns(); nonZeroCol < e; ++nonZeroCol) {
+      if (m(row, nonZeroCol) == 0)
+        continue;
+      break;
+    }
+
+    // Continue to the next row with the same echelonCol if this row is all
+    // zeros from echelonCol onwards.
+    if (nonZeroCol == m.getNumColumns())
+      continue;
+
+    // Bring the non-zero column to echelonCol. This doesn't affect rows
+    // above since they are all zero at these columns.
+    if (nonZeroCol != echelonCol) {
+      m.swapColumns(nonZeroCol, echelonCol);
+      resultMatrix.swapColumns(nonZeroCol, echelonCol);
+    }
+
+    // Make m(row, echelonCol) non-negative.
+    if (m(row, echelonCol) < 0) {
+      m.negateColumn(echelonCol);
+      resultMatrix.negateColumn(echelonCol);
+    }
+
+    // Make all the entries in row after echelonCol zero.
+    for (unsigned i = echelonCol + 1, e = m.getNumColumns(); i < e; ++i) {
+      // We make m(row, i) non-negative, and then apply the Euclidean GCD
+      // algorithm to (row, i) and (row, echelonCol). At the end, one of them
+      // has value equal to the gcd of the two entries, and the other is zero.
+
+      if (m(row, i) < 0) {
+        m.negateColumn(i);
+        resultMatrix.negateColumn(i);
+      }
+
+      unsigned targetCol = i, sourceCol = echelonCol;
+      // At every step, we set m(row, targetCol) %= m(row, sourceCol), and
+      // swap the indices sourceCol and targetCol. (not the columns themselves)
+      // This modulo is implemented as a subtraction
+      // m(row, targetCol) -= quotient * m(row, sourceCol),
+      // where quotient = floor(m(row, targetCol) / m(row, sourceCol)),
+      // which brings m(row, targetCol) to the range [0, m(row, sourceCol)).
+      //
+      // We are only allowed column operations; we perform the above
+      // for every row, i.e., the above subtraction is done as a column
+      // operation. This does not affect any rows above us since they are
+      // guaranteed to be zero at these columns.
+      while (m(row, targetCol) != 0 && m(row, sourceCol) != 0) {
+        modEntryColumnOperation(m, row, sourceCol, targetCol, resultMatrix);
+        std::swap(targetCol, sourceCol);
+      }
+
+      // One of (row, echelonCol) and (row, i) is zero and the other is the gcd.
+      // Make it so that (row, echelonCol) holds the non-zero value.
+      if (m(row, echelonCol) == 0) {
+        m.swapColumns(i, echelonCol);
+        resultMatrix.swapColumns(i, echelonCol);
+      }
+    }
+
+    ++echelonCol;
+  }
+
+  return {echelonCol, LinearTransform(std::move(resultMatrix))};
+}
+
+SmallVector<int64_t, 8> LinearTransform::applyTo(ArrayRef<int64_t> v) {
+  assert(v.size() == matrix.getNumRows() &&
+         "vector dimension should be matrix output dimension");
+
+  SmallVector<int64_t, 8> result;
+  result.reserve(v.size());
+  for (unsigned col = 0, e = matrix.getNumColumns(); col < e; ++col) {
+    int64_t elem = 0;
+    for (unsigned i = 0, e = matrix.getNumRows(); i < e; ++i)
+      elem += v[i] * matrix(i, col);
+    result.push_back(elem);
+  }
+  return result;
+}
+
+FlatAffineConstraints
+LinearTransform::applyTo(const FlatAffineConstraints &fac) {
+  FlatAffineConstraints result(fac.getNumDimIds());
+
+  for (unsigned i = 0, e = fac.getNumEqualities(); i < e; ++i) {
+    ArrayRef<int64_t> eq = fac.getEquality(i);
+
+    int64_t c = eq.back();
+
+    SmallVector<int64_t, 8> newEq = applyTo(eq.drop_back());
+    newEq.push_back(c);
+    result.addEquality(newEq);
+  }
+
+  for (unsigned i = 0, e = fac.getNumInequalities(); i < e; ++i) {
+    ArrayRef<int64_t> ineq = fac.getInequality(i);
+
+    int64_t c = ineq.back();
+
+    SmallVector<int64_t, 8> newIneq = applyTo(ineq.drop_back());
+    newIneq.push_back(c);
+    result.addInequality(newIneq);
+  }
+
+  return result;
+}
+
+} // namespace mlir
diff --git a/mlir/lib/Analysis/Presburger/CMakeLists.txt b/mlir/lib/Analysis/Presburger/CMakeLists.txt
index 49cdd5ac14312..2561013696d9b 100644
--- a/mlir/lib/Analysis/Presburger/CMakeLists.txt
+++ b/mlir/lib/Analysis/Presburger/CMakeLists.txt
@@ -1,4 +1,4 @@
 add_mlir_library(MLIRPresburger
   Simplex.cpp
   Matrix.cpp
-  )
\ No newline at end of file
+  )
diff --git a/mlir/lib/Analysis/Presburger/Matrix.cpp b/mlir/lib/Analysis/Presburger/Matrix.cpp
index 213f1111e2a3a..4a5a53921548c 100644
--- a/mlir/lib/Analysis/Presburger/Matrix.cpp
+++ b/mlir/lib/Analysis/Presburger/Matrix.cpp
@@ -79,6 +79,19 @@ void Matrix::addToRow(unsigned sourceRow, unsigned targetRow, int64_t scale) {
   return;
 }
 
+void Matrix::addToColumn(unsigned sourceColumn, unsigned targetColumn,
+                         int64_t scale) {
+  if (scale == 0)
+    return;
+  for (unsigned row = 0, e = getNumRows(); row < e; ++row)
+    at(row, targetColumn) += scale * at(row, sourceColumn);
+}
+
+void Matrix::negateColumn(unsigned column) {
+  for (unsigned row = 0, e = getNumRows(); row < e; ++row)
+    at(row, column) = -at(row, column);
+}
+
 void Matrix::print(raw_ostream &os) const {
   for (unsigned row = 0; row < nRows; ++row) {
     for (unsigned column = 0; column < nColumns; ++column)
diff --git a/mlir/lib/Analysis/Presburger/Simplex.cpp b/mlir/lib/Analysis/Presburger/Simplex.cpp
index 47e199baba2af..2cfe5929e21dc 100644
--- a/mlir/lib/Analysis/Presburger/Simplex.cpp
+++ b/mlir/lib/Analysis/Presburger/Simplex.cpp
@@ -9,6 +9,7 @@
 #include "mlir/Analysis/Presburger/Simplex.h"
 #include "mlir/Analysis/Presburger/Matrix.h"
 #include "mlir/Support/MathExtras.h"
+#include "llvm/ADT/Optional.h"
 
 namespace mlir {
 using Direction = Simplex::Direction;
@@ -482,7 +483,7 @@ Optional<Fraction> Simplex::computeRowOptimum(Direction direction,
 /// or None if it is unbounded.
 Optional<Fraction> Simplex::computeOptimum(Direction direction,
                                            ArrayRef<int64_t> coeffs) {
-  assert(!empty && "Tableau should not be empty");
+  assert(!empty && "Simplex should not be empty");
 
   unsigned snapshot = getSnapshot();
   unsigned conIndex = addRow(coeffs);
@@ -492,6 +493,34 @@ Optional<Fraction> Simplex::computeOptimum(Direction direction,
   return optimum;
 }
 
+Optional<Fraction> Simplex::computeOptimum(Direction direction, Unknown &u) {
+  assert(!empty && "Simplex should not be empty!");
+  if (u.orientation == Orientation::Column) {
+    unsigned column = u.pos;
+    Optional<unsigned> pivotRow = findPivotRow({}, direction, column);
+    // If no pivot is returned, the constraint is unbounded in the specified
+    // direction.
+    if (!pivotRow)
+      return {};
+    pivot(*pivotRow, column);
+  }
+
+  unsigned row = u.pos;
+  Optional<Fraction> optimum = computeRowOptimum(direction, row);
+  if (u.restricted && direction == Direction::Down &&
+      (!optimum || *optimum < Fraction(0, 1)))
+    restoreRow(u);
+  return optimum;
+}
+
+bool Simplex::isBoundedAlongConstraint(unsigned constraintIndex) {
+  assert(!empty && "It is not meaningful to ask whether a direction is bounded "
+                   "in an empty set.");
+  // The constraint's perpendicular is already bounded below, since it is a
+  // constraint. If it is also bounded above, we can return true.
+  return computeOptimum(Direction::Up, con[constraintIndex]).hasValue();
+}
+
 /// Redundant constraints are those that are in row orientation and lie in
 /// rows 0 to nRedundant - 1.
 bool Simplex::isMarkedRedundant(unsigned constraintIndex) const {
diff --git a/mlir/unittests/Analysis/AffineStructuresTest.cpp b/mlir/unittests/Analysis/AffineStructuresTest.cpp
index 6fcb1c489cfcb..ac11c90ec15b3 100644
--- a/mlir/unittests/Analysis/AffineStructuresTest.cpp
+++ b/mlir/unittests/Analysis/AffineStructuresTest.cpp
@@ -15,22 +15,36 @@
 
 namespace mlir {
 
-/// If 'hasValue' is true, check that findIntegerSample returns a valid sample
+enum class TestFunction { Sample, Empty };
+
+/// If fn is TestFunction::Sample (default):
+/// If hasSample is true, check that findIntegerSample returns a valid sample
 /// for the FlatAffineConstraints fac.
+/// If hasSample is false, check that findIntegerSample returns None.
 ///
-/// If hasValue is false, check that findIntegerSample does not return None.
-static void checkSample(bool hasValue, const FlatAffineConstraints &fac) {
-  Optional<SmallVector<int64_t, 8>> maybeSample = fac.findIntegerSample();
-  if (!hasValue) {
-    EXPECT_FALSE(maybeSample.hasValue());
-    if (maybeSample.hasValue()) {
-      for (auto x : *maybeSample)
-        llvm::errs() << x << ' ';
-      llvm::errs() << '\n';
+/// If fn is TestFunction::Empty, check that isIntegerEmpty returns the
+/// opposite of hasSample.
+static void checkSample(bool hasSample, const FlatAffineConstraints &fac,
+                        TestFunction fn = TestFunction::Sample) {
+  Optional<SmallVector<int64_t, 8>> maybeSample;
+  switch (fn) {
+  case TestFunction::Sample:
+    maybeSample = fac.findIntegerSample();
+    if (!hasSample) {
+      EXPECT_FALSE(maybeSample.hasValue());
+      if (maybeSample.hasValue()) {
+        for (auto x : *maybeSample)
+          llvm::errs() << x << ' ';
+        llvm::errs() << '\n';
+      }
+    } else {
+      ASSERT_TRUE(maybeSample.hasValue());
+      EXPECT_TRUE(fac.containsPoint(*maybeSample));
     }
-  } else {
-    ASSERT_TRUE(maybeSample.hasValue());
-    EXPECT_TRUE(fac.containsPoint(*maybeSample));
+    break;
+  case TestFunction::Empty:
+    EXPECT_EQ(!hasSample, fac.isIntegerEmpty());
+    break;
   }
 }
 
@@ -52,9 +66,11 @@ makeFACFromConstraints(unsigned dims, ArrayRef<SmallVector<int64_t, 4>> ineqs,
 /// orderings may cause the algorithm to proceed differently. At least some of
 ///.these permutations should make it past the heuristics and test the
 /// implementation of the GBR algorithm itself.
-static void checkPermutationsSample(bool hasValue, unsigned nDim,
+/// Use TestFunction fn to test.
+static void checkPermutationsSample(bool hasSample, unsigned nDim,
                                     ArrayRef<SmallVector<int64_t, 4>> ineqs,
-                                    ArrayRef<SmallVector<int64_t, 4>> eqs) {
+                                    ArrayRef<SmallVector<int64_t, 4>> eqs,
+                                    TestFunction fn = TestFunction::Sample) {
   SmallVector<unsigned, 4> perm(nDim);
   std::iota(perm.begin(), perm.end(), 0);
   auto permute = [&perm](ArrayRef<int64_t> coeffs) {
@@ -71,8 +87,8 @@ static void checkPermutationsSample(bool hasValue, unsigned nDim,
     for (const auto &eq : eqs)
       permutedEqs.push_back(permute(eq));
 
-    checkSample(hasValue,
-                makeFACFromConstraints(nDim, permutedIneqs, permutedEqs));
+    checkSample(hasSample,
+                makeFACFromConstraints(nDim, permutedIneqs, permutedEqs), fn);
   } while (std::next_permutation(perm.begin(), perm.end()));
 }
 
@@ -206,19 +222,158 @@ TEST(FlatAffineConstraintsTest, IsIntegerEmptyTest) {
   EXPECT_FALSE(
       makeFACFromConstraints(1, {{5, -1}, {-5, 9}}, {}).isIntegerEmpty());
 
-  // An unbounded set, which isIntegerEmpty should detect as unbounded and
-  // return without calling findIntegerSample.
+  // Unbounded sets.
+  EXPECT_TRUE(makeFACFromConstraints(3,
+                                     {
+                                         {2, 0, 0, -1}, // 2x >= 1
+                                         {-2, 0, 0, 1}, // 2x <= 1
+                                         {0, 2, 0, -1}, // 2y >= 1
+                                         {0, -2, 0, 1}, // 2y <= 1
+                                         {0, 0, 2, -1}, // 2z >= 1
+                                     },
+                                     {})
+                  .isIntegerEmpty());
+
   EXPECT_FALSE(makeFACFromConstraints(3,
                                       {
-                                          {2, 0, 0, -1},
-                                          {-2, 0, 0, 1},
-                                          {0, 2, 0, -1},
-                                          {0, -2, 0, 1},
-                                          {0, 0, 2, -1},
+                                          {2, 0, 0, -1},  // 2x >= 1
+                                          {-3, 0, 0, 3},  // 3x <= 3
+                                          {0, 0, 5, -6},  // 5z >= 6
+                                          {0, 0, -7, 17}, // 7z <= 17
+                                          {0, 3, 0, -2},  // 3y >= 2
                                       },
                                       {})
                    .isIntegerEmpty());
 
+  // 2D cone with apex at (10000, 10000) and
+  // edges passing through (1/3, 0) and (2/3, 0).
+  EXPECT_FALSE(
+      makeFACFromConstraints(
+          2, {{300000, -299999, -100000}, {-300000, 299998, 200000}}, {})
+          .isIntegerEmpty());
+
+  // Cartesian product of a tetrahedron and a 2D cone.
+  // The tetrahedron has vertices at
+  // (1/3, 0, 0), (2/3, 0, 0), (2/3, 0, 10000), and (10000, 10000, 10000).
+  // The first three points form a triangular base on the xz plane with the
+  // apex at the fourth point, which is the only integer point.
+  // The cone has apex at (10000, 10000) and
+  // edges passing through (1/3, 0) and (2/3, 0).
+  checkPermutationsSample(
+      true /* not empty */, 5,
+      {
+          // Tetrahedron contraints:
+          {0, 1, 0, 0, 0, 0},  // y >= 0
+          {0, -1, 1, 0, 0, 0}, // z >= y
+                               // -300000x + 299998y + 100000 + z <= 0.
+          {300000, -299998, -1, 0, 0, -100000},
+          // -150000x + 149999y + 100000 >= 0.
+          {-150000, 149999, 0, 0, 0, 100000},
+
+          // Triangle constraints:
+          // 300000p - 299999q >= 100000
+          {0, 0, 0, 300000, -299999, -100000},
+          // -300000p + 299998q + 200000 >= 0
+          {0, 0, 0, -300000, 299998, 200000},
+      },
+      {}, TestFunction::Empty);
+
+  // Cartesian product of same tetrahedron as above and {(p, q) : 1/3 <= p <=
+  // 2/3}. Since the second set is empty, the whole set is too.
+  checkPermutationsSample(
+      false /* empty */, 5,
+      {
+          // Tetrahedron contraints:
+          {0, 1, 0, 0, 0, 0},  // y >= 0
+          {0, -1, 1, 0, 0, 0}, // z >= y
+                               // -300000x + 299998y + 100000 + z <= 0.
+          {300000, -299998, -1, 0, 0, -100000},
+          // -150000x + 149999y + 100000 >= 0.
+          {-150000, 149999, 0, 0, 0, 100000},
+
+          // Second set constraints:
+          // 3p >= 1
+          {0, 0, 0, 3, 0, -1},
+          // 3p <= 2
+          {0, 0, 0, -3, 0, 2},
+      },
+      {}, TestFunction::Empty);
+
+  // Cartesian product of same tetrahedron as above and
+  // {(p, q, r) : 1 <= p <= 2 and p = 3q + 3r}.
+  // Since the second set is empty, the whole set is too.
+  checkPermutationsSample(
+      false /* empty */, 5,
+      {
+          // Tetrahedron contraints:
+          {0, 1, 0, 0, 0, 0, 0},  // y >= 0
+          {0, -1, 1, 0, 0, 0, 0}, // z >= y
+                                  // -300000x + 299998y + 100000 + z <= 0.
+          {300000, -299998, -1, 0, 0, 0, -100000},
+          // -150000x + 149999y + 100000 >= 0.
+          {-150000, 149999, 0, 0, 0, 0, 100000},
+
+          // Second set constraints:
+          // p >= 1
+          {0, 0, 0, 1, 0, 0, -1},
+          // p <= 2
+          {0, 0, 0, -1, 0, 0, 2},
+      },
+      {
+          {0, 0, 0, 1, -3, -3, 0}, // p = 3q + 3r
+      },
+      TestFunction::Empty);
+
+  // Cartesian product of a tetrahedron and a 2D cone.
+  // The tetrahedron is empty and has vertices at
+  // (1/3, 0, 0), (2/3, 0, 0), (2/3, 0, 100), and (100, 100 - 1/3, 100).
+  // The cone has apex at (10000, 10000) and
+  // edges passing through (1/3, 0) and (2/3, 0).
+  // Since the tetrahedron is empty, the Cartesian product is too.
+  checkPermutationsSample(false /* empty */, 5,
+                          {
+                              // Tetrahedron contraints:
+                              {0, 1, 0, 0, 0, 0},
+                              {0, -300, 299, 0, 0, 0},
+                              {300 * 299, -89400, -299, 0, 0, -100 * 299},
+                              {-897, 894, 0, 0, 0, 598},
+
+                              // Triangle constraints:
+                              // 300000p - 299999q >= 100000
+                              {0, 0, 0, 300000, -299999, -100000},
+                              // -300000p + 299998q + 200000 >= 0
+                              {0, 0, 0, -300000, 299998, 200000},
+                          },
+                          {}, TestFunction::Empty);
+
+  // Cartesian product of same tetrahedron as above and
+  // {(p, q) : 1/3 <= p <= 2/3}.
+  checkPermutationsSample(false /* empty */, 5,
+                          {
+                              // Tetrahedron contraints:
+                              {0, 1, 0, 0, 0, 0},
+                              {0, -300, 299, 0, 0, 0},
+                              {300 * 299, -89400, -299, 0, 0, -100 * 299},
+                              {-897, 894, 0, 0, 0, 598},
+
+                              // Second set constraints:
+                              // 3p >= 1
+                              {0, 0, 0, 3, 0, -1},
+                              // 3p <= 2
+                              {0, 0, 0, -3, 0, 2},
+                          },
+                          {}, TestFunction::Empty);
+
+  EXPECT_FALSE(makeFACFromConstraints(3,
+                                      {
+                                          {2, 0, 0, -1}, // 2x >= 1
+                                      },
+                                      {{
+                                          {1, -1, 0, -1}, // y = x - 1
+                                          {0, 1, -1, 0},  // z = y
+                                      }})
+                   .isIntegerEmpty());
+
   // FlatAffineConstraints::isEmpty() does not detect the following sets to be
   // empty.
 
diff --git a/mlir/unittests/Analysis/CMakeLists.txt b/mlir/unittests/Analysis/CMakeLists.txt
index 6317aeb8df892..0df0af866d662 100644
--- a/mlir/unittests/Analysis/CMakeLists.txt
+++ b/mlir/unittests/Analysis/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_mlir_unittest(MLIRAnalysisTests
   AffineStructuresTest.cpp
+  LinearTransformTest.cpp
   PresburgerSetTest.cpp
 )
 
diff --git a/mlir/unittests/Analysis/LinearTransformTest.cpp b/mlir/unittests/Analysis/LinearTransformTest.cpp
new file mode 100644
index 0000000000000..598c84920d5df
--- /dev/null
+++ b/mlir/unittests/Analysis/LinearTransformTest.cpp
@@ -0,0 +1,87 @@
+//===- LinearTransformTest.cpp - Tests for LinearTransform ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Analysis/LinearTransform.h"
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace mlir {
+
+void testColumnEchelonForm(const Matrix &m, unsigned expectedRank) {
+  unsigned lastAllowedNonZeroCol = 0;
+  std::pair<unsigned, LinearTransform> result =
+      LinearTransform::makeTransformToColumnEchelon(m);
+  unsigned rank = result.first;
+  EXPECT_EQ(rank, expectedRank);
+  LinearTransform transform = result.second;
+  // In column echelon form, each row's last non-zero value can be at most one
+  // column to the right of the last non-zero column among the previous rows.
+  for (unsigned row = 0, nRows = m.getNumRows(); row < nRows; ++row) {
+    SmallVector<int64_t, 8> rowVec = transform.applyTo(m.getRow(row));
+    for (unsigned col = lastAllowedNonZeroCol + 1, nCols = m.getNumColumns();
+         col < nCols; ++col) {
+      EXPECT_EQ(rowVec[col], 0);
+      if (rowVec[col] != 0) {
+        llvm::errs() << "Failed at input matrix:\n";
+        m.dump();
+      }
+    }
+    if (rowVec[lastAllowedNonZeroCol] != 0)
+      lastAllowedNonZeroCol++;
+  }
+  // The final value of lastAllowedNonZeroCol is the index of the first
+  // all-zeros column, so it must be equal to the rank.
+  EXPECT_EQ(lastAllowedNonZeroCol, rank);
+}
+
+TEST(LinearTransformTest, transformToColumnEchelonTest) {
+  // m1, m2, m3 are rank 1 matrices -- the first and second rows are identical.
+  Matrix m1(2, 2);
+  m1(0, 0) = 4;
+  m1(0, 1) = -7;
+  m1(1, 0) = 4;
+  m1(1, 1) = -7;
+  testColumnEchelonForm(m1, 1u);
+
+  Matrix m2(2, 2);
+  m2(0, 0) = -4;
+  m2(0, 1) = 7;
+  m2(1, 0) = 4;
+  m2(1, 1) = -7;
+  testColumnEchelonForm(m2, 1u);
+
+  Matrix m3(2, 2);
+  m3(0, 0) = -4;
+  m3(0, 1) = -7;
+  m3(1, 0) = -4;
+  m3(1, 1) = -7;
+  testColumnEchelonForm(m3, 1u);
+
+  // m4, m5, m6 are rank 2 matrices -- the first and second rows are different.
+  Matrix m4(2, 2);
+  m4(0, 0) = 4;
+  m4(0, 1) = -7;
+  m4(1, 0) = -4;
+  m4(1, 1) = -7;
+  testColumnEchelonForm(m4, 2u);
+
+  Matrix m5(2, 2);
+  m5(0, 0) = -4;
+  m5(0, 1) = 7;
+  m5(1, 0) = 4;
+  m5(1, 1) = 7;
+  testColumnEchelonForm(m5, 2u);
+
+  Matrix m6(2, 2);
+  m6(0, 0) = -4;
+  m6(0, 1) = -7;
+  m6(1, 0) = 4;
+  m6(1, 1) = -7;
+  testColumnEchelonForm(m5, 2u);
+}
+} // namespace mlir

From 763c1f9933463c40c39c04b68bbe4d296823b003 Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Thu, 14 Jan 2021 13:34:18 -0500
Subject: [PATCH 15/17] [OpenMP] Drop the static library libomptarget-nvptx

For NVPTX target, OpenMP provides a static library `libomptarget-nvptx`
built by NVCC, and another bitcode `libomptarget-nvptx-sm_{$sm}.bc` generated by
Clang. When compiling an OpenMP program, the `.bc` file will be fed to `clang`
in the second run on the program that compiles the target part. Then the generated
PTX file will be fed to `ptxas` to generate the object file, and finally the driver
invokes `nvlink` to generate the binary, where the static library will be appened
to `nvlink`.

One question is, why do we need two libraries? The only difference is, the static
library contains `omp_data.cu` and the bitcode library doesn't. It's unclear why
they were implemented in this way, but per D94565, there is no issue if we also
include the file into the bitcode library. Therefore, we can safely drop the
static library.

This patch is about the change in OpenMP. The driver will be updated as well if
this patch is accepted.

Reviewed By: jdoerfert, JonChesterfield

Differential Revision: https://reviews.llvm.org/D94573
---
 .../deviceRTLs/nvptx/CMakeLists.txt           | 96 ++++---------------
 1 file changed, 19 insertions(+), 77 deletions(-)

diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index ea11c8114166e..200c6401d6284 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -10,31 +10,6 @@
 #
 ##===----------------------------------------------------------------------===##
 
-set(LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER "" CACHE STRING
-  "Path to alternate NVCC host compiler to be used by the NVPTX device RTL.")
-
-if(LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER)
-  find_program(ALTERNATE_CUDA_HOST_COMPILER NAMES ${LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER})
-  if(NOT ALTERNATE_CUDA_HOST_COMPILER)
-    libomptarget_say("Not building CUDA offloading device RTL: invalid NVPTX alternate host compiler.")
-  endif()
-  set(CUDA_HOST_COMPILER ${ALTERNATE_CUDA_HOST_COMPILER} CACHE FILEPATH "" FORCE)
-endif()
-
-# We can't use clang as nvcc host preprocessor, so we attempt to replace it with
-# gcc.
-if(CUDA_HOST_COMPILER MATCHES clang)
-
-  find_program(LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER NAMES gcc)
-
-  if(NOT LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER)
-    libomptarget_say("Not building CUDA offloading device RTL: clang is not supported as NVCC host compiler.")
-    libomptarget_say("Please include gcc in your path or set LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER to the full path of of valid compiler.")
-    return()
-  endif()
-  set(CUDA_HOST_COMPILER "${LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER}" CACHE FILEPATH "" FORCE)
-endif()
-
 get_filename_component(devicertl_base_directory
   ${CMAKE_CURRENT_SOURCE_DIR}
   DIRECTORY)
@@ -44,28 +19,6 @@ set(devicertl_nvptx_directory
   ${devicertl_base_directory}/nvptx)
 
 if(LIBOMPTARGET_DEP_CUDA_FOUND)
-  libomptarget_say("Building CUDA offloading device RTL.")
-
-  # We really don't have any host code, so we don't need to care about
-  # propagating host flags.
-  set(CUDA_PROPAGATE_HOST_FLAGS OFF)
-
-  set(cuda_src_files
-      ${devicertl_common_directory}/src/cancel.cu
-      ${devicertl_common_directory}/src/critical.cu
-      ${devicertl_common_directory}/src/data_sharing.cu
-      ${devicertl_common_directory}/src/libcall.cu
-      ${devicertl_common_directory}/src/loop.cu
-      ${devicertl_common_directory}/src/omp_data.cu
-      ${devicertl_common_directory}/src/omptarget.cu
-      ${devicertl_common_directory}/src/parallel.cu
-      ${devicertl_common_directory}/src/reduction.cu
-      ${devicertl_common_directory}/src/support.cu
-      ${devicertl_common_directory}/src/sync.cu
-      ${devicertl_common_directory}/src/task.cu
-      src/target_impl.cu
-  )
-
   # Build library support for the highest compute capability the system supports
   # and always build support for sm_35 by default
   if (${LIBOMPTARGET_DEP_CUDA_ARCH} EQUAL 35)
@@ -94,24 +47,6 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
   # Activate RTL message dumps if requested by the user.
   set(LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL
     "Activate NVPTX device RTL debug messages.")
-  if(${LIBOMPTARGET_NVPTX_DEBUG})
-    set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1 -g --ptxas-options=-v)
-  endif()
-
-  # NVPTX runtime library has to be statically linked. Dynamic linking is not
-  # yet supported by the CUDA toolchain on the device.
-  set(BUILD_SHARED_LIBS OFF)
-  set(CUDA_SEPARABLE_COMPILATION ON)
-  list(APPEND CUDA_NVCC_FLAGS -I${devicertl_base_directory}
-                              -I${devicertl_nvptx_directory}/src)
-  cuda_add_library(omptarget-nvptx STATIC ${cuda_src_files}
-      OPTIONS ${CUDA_ARCH} ${CUDA_DEBUG} ${MAX_SM_DEFINITION})
-
-  # Install device RTL under the lib destination folder.
-  install(TARGETS omptarget-nvptx ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}")
-
-  target_link_libraries(omptarget-nvptx ${CUDA_LIBRARIES})
-
 
   # Check if we can create an LLVM bitcode implementation of the runtime library
   # that could be inlined in the user application. For that we need to find
@@ -124,18 +59,25 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
 
   include(LibomptargetNVPTXBitcodeLibrary)
 
-  set(bclib_default FALSE)
-  if (${LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED})
-    set(bclib_default TRUE)
-  endif()
-  set(LIBOMPTARGET_NVPTX_ENABLE_BCLIB ${bclib_default} CACHE BOOL
-    "Enable CUDA LLVM bitcode offloading device RTL.")
-  if (${LIBOMPTARGET_NVPTX_ENABLE_BCLIB})
-    if (NOT ${LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED})
-      libomptarget_error_say("Cannot build CUDA LLVM bitcode offloading device RTL!")
-    endif()
+  if (LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED)
     libomptarget_say("Building CUDA LLVM bitcode offloading device RTL.")
 
+    set(cuda_src_files
+      ${devicertl_common_directory}/src/cancel.cu
+      ${devicertl_common_directory}/src/critical.cu
+      ${devicertl_common_directory}/src/data_sharing.cu
+      ${devicertl_common_directory}/src/libcall.cu
+      ${devicertl_common_directory}/src/loop.cu
+      ${devicertl_common_directory}/src/omp_data.cu
+      ${devicertl_common_directory}/src/omptarget.cu
+      ${devicertl_common_directory}/src/parallel.cu
+      ${devicertl_common_directory}/src/reduction.cu
+      ${devicertl_common_directory}/src/support.cu
+      ${devicertl_common_directory}/src/sync.cu
+      ${devicertl_common_directory}/src/task.cu
+      src/target_impl.cu
+    )
+
     # Set flags for LLVM Bitcode compilation.
     set(bc_flags ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS}
                  -I${devicertl_base_directory}
@@ -195,7 +137,7 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
       # Copy library to destination.
       add_custom_command(TARGET omptarget-nvptx-${sm}-bc POST_BUILD
                          COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc
-                         $<TARGET_FILE_DIR:omptarget-nvptx>)
+                         ${LIBOMPTARGET_LIBRARY_DIR})
 
       # Install bitcode library under the lib destination folder.
       install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc DESTINATION "${OPENMP_INSTALL_LIBDIR}")
@@ -204,5 +146,5 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
 
   add_subdirectory(test)
 else()
-  libomptarget_say("Not building CUDA offloading device RTL: CUDA tools not found in the system.")
+  libomptarget_say("Not building CUDA offloading device RTL: tools to build bc lib not found in the system.")
 endif()

From 4fffbc150cca1638051b8ad2a20f4b8240df0869 Mon Sep 17 00:00:00 2001
From: Zequan Wu <zequanwu@google.com>
Date: Wed, 13 Jan 2021 19:14:25 -0800
Subject: [PATCH 16/17] [clang][MSVC] Fix missing MSInheritanceAttr in template
 specialization.

Fix PR48687.

Differential Revision: https://reviews.llvm.org/D94646
---
 clang/lib/Sema/SemaTemplate.cpp                        |  5 +++++
 .../test/CodeGenCXX/microsoft-abi-member-pointers.cpp  | 10 ++++++++++
 2 files changed, 15 insertions(+)

diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 64259767d98a6..12880b95b9c63 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -9764,6 +9764,11 @@ DeclResult Sema::ActOnExplicitInstantiation(
       dllExportImportClassTemplateSpecialization(*this, Def);
     }
 
+    if (Def->hasAttr<MSInheritanceAttr>()) {
+      Specialization->addAttr(Def->getAttr<MSInheritanceAttr>());
+      Consumer.AssignInheritanceModel(Specialization);
+    }
+
     // Set the template specialization kind. Make sure it is set before
     // instantiating the members which will trigger ASTConsumer callbacks.
     Specialization->setTemplateSpecializationKind(TSK);
diff --git a/clang/test/CodeGenCXX/microsoft-abi-member-pointers.cpp b/clang/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
index bfe620df5ce32..527363a6ff8fd 100644
--- a/clang/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
+++ b/clang/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
@@ -148,6 +148,16 @@ const C table[] = {
 // CHECK-SAME:  %"struct.pr43803::C" { { i32, i32, i32 } { i32 8, i32 0, i32 0 }, [4 x i8] undef }]
 }
 
+namespace pr48687 {
+template <typename T> struct A {
+  T value;
+  static constexpr auto address = &A<T>::value;
+};
+extern template class A<float>;
+template class A<float>;
+// CHECK: @"?address@?$A@M@pr48687@@2QQ12@MQ12@" = weak_odr dso_local constant i32 0, comdat, align 4
+}
+
 struct PR26313_Y;
 typedef void (PR26313_Y::*PR26313_FUNC)();
 struct PR26313_X {

From 202d359753d1f130a228c3ad52dfaabf384250d1 Mon Sep 17 00:00:00 2001
From: Hiroshi Yamauchi <yamauchi@google.com>
Date: Mon, 11 Jan 2021 11:02:37 -0800
Subject: [PATCH 17/17] [X86] Add the FSRM feature (Fast Short Rep Mov) to
 Zen3.

Note -x86-use-fsrm-for-memcpy is still disabled by default and there's no
default behavior change.

Differential Revision: https://reviews.llvm.org/D94436
---
 llvm/lib/Target/X86/X86.td                  | 3 ++-
 llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 9096d9d544529..c492d686c52e1 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1071,7 +1071,8 @@ def ProcessorFeatures {
   list<SubtargetFeature> ZN2Tuning = ZNTuning;
   list<SubtargetFeature> ZN2Features =
     !listconcat(ZNFeatures, ZN2AdditionalFeatures);
-  list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureINVPCID,
+  list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
+                                                  FeatureINVPCID,
                                                   FeaturePKU,
                                                   FeatureVAES,
                                                   FeatureVPCLMULQDQ];
diff --git a/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll b/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll
index 9480d74723fcc..77e97626b1c60 100644
--- a/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll
+++ b/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll
@@ -4,6 +4,7 @@
 ; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=haswell < %s | FileCheck %s --check-prefix=NOFSRM
 ; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-client < %s | FileCheck %s --check-prefix=FSRM
 ; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-server < %s | FileCheck %s --check-prefix=FSRM
+; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=znver3 < %s | FileCheck %s --check-prefix=FSRM
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind