From 0afdbb4d2dead42df14361ca9f5613d56667481c Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Thu, 14 Jan 2021 16:39:16 +0000 Subject: [PATCH 01/17] [flang][driver] Use __FLANG_VERISION__ in f18.cpp (nfc) Just a minor improvement suggested in a post-commit review here: https://reviews.llvm.org/D94422 --- flang/tools/f18/f18.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp index bdf48d0711422b..9a10aeda7b2498 100644 --- a/flang/tools/f18/f18.cpp +++ b/flang/tools/f18/f18.cpp @@ -389,8 +389,7 @@ void Link(std::vector &liblist, std::vector &objects, int printVersion() { llvm::errs() << "\nf18 compiler (under development), version " - << FLANG_VERSION_MAJOR << "." << FLANG_VERSION_MINOR << "." - << FLANG_VERSION_PATCHLEVEL << "\n"; + << FLANG_VERSION_STRING << "\n"; return exitStatus; } From e21bf875c0f709a721d98450203781a605483a1d Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 13 Jan 2021 11:02:10 -0800 Subject: [PATCH 02/17] [NFC] Disallow unused prefixes under MC/ARM Differential Revision: https://reviews.llvm.org/D94620 --- llvm/test/MC/ARM/ldr-pseudo-cond-darwin.s | 4 ++-- llvm/test/MC/ARM/ldr-pseudo-cond.s | 4 ++-- llvm/test/MC/ARM/lit.local.cfg | 8 ++++++++ llvm/test/MC/ARM/lsl-zero-errors.s | 6 +++--- llvm/test/MC/ARM/lsl-zero.s | 6 +++--- llvm/test/MC/ARM/mve-fp-registers.s | 22 +++++++++++----------- llvm/test/MC/ARM/thumbv8m.s | 12 ++++++------ 7 files changed, 35 insertions(+), 27 deletions(-) diff --git a/llvm/test/MC/ARM/ldr-pseudo-cond-darwin.s b/llvm/test/MC/ARM/ldr-pseudo-cond-darwin.s index 915b883bc75565..34dda323527654 100644 --- a/llvm/test/MC/ARM/ldr-pseudo-cond-darwin.s +++ b/llvm/test/MC/ARM/ldr-pseudo-cond-darwin.s @@ -1,5 +1,5 @@ -@RUN: llvm-mc -triple armv7-base-apple-darwin %s | FileCheck --check-prefix=CHECK-ARM --check-prefix=CHECK %s -@RUN: llvm-mc -triple thumbv7-base-apple-darwin %s | FileCheck --check-prefix=CHECK-THUMB2 --check-prefix=CHECK %s +@RUN: llvm-mc -triple armv7-base-apple-darwin %s | FileCheck --check-prefix=CHECK %s +@RUN: llvm-mc -triple thumbv7-base-apple-darwin %s | FileCheck --check-prefix=CHECK %s @ @ Check that ldr to constant pool correctly transfers the condition codes diff --git a/llvm/test/MC/ARM/ldr-pseudo-cond.s b/llvm/test/MC/ARM/ldr-pseudo-cond.s index fa78311965c592..2785247f2256c2 100644 --- a/llvm/test/MC/ARM/ldr-pseudo-cond.s +++ b/llvm/test/MC/ARM/ldr-pseudo-cond.s @@ -1,5 +1,5 @@ -@RUN: llvm-mc -triple armv7-unknown-linux-gnueabi %s | FileCheck --check-prefix=CHECK-ARM --check-prefix=CHECK %s -@RUN: llvm-mc -triple thumbv7-unknown-linux-gnueabi %s | FileCheck --check-prefix=CHECK-THUMB2 --check-prefix=CHECK %s +@RUN: llvm-mc -triple armv7-unknown-linux-gnueabi %s | FileCheck --check-prefix=CHECK %s +@RUN: llvm-mc -triple thumbv7-unknown-linux-gnueabi %s | FileCheck --check-prefix=CHECK %s @ @ Check that ldr to constant pool correctly transfers the condition codes diff --git a/llvm/test/MC/ARM/lit.local.cfg b/llvm/test/MC/ARM/lit.local.cfg index 236e1d34416659..b305cf706e7781 100644 --- a/llvm/test/MC/ARM/lit.local.cfg +++ b/llvm/test/MC/ARM/lit.local.cfg @@ -1,2 +1,10 @@ +from lit.llvm.subst import ToolSubst + if not 'ARM' in config.root.targets: config.unsupported = True + +fc = ToolSubst('FileCheck', unresolved='fatal') +# Insert this first. Then, we'll first update the blank FileCheck command; then, +# the default substitution of FileCheck will replace it to its full path. +config.substitutions.insert(0, (fc.regex, + 'FileCheck --allow-unused-prefixes=false')) diff --git a/llvm/test/MC/ARM/lsl-zero-errors.s b/llvm/test/MC/ARM/lsl-zero-errors.s index 937b50f62da04a..ad39470a4a5011 100644 --- a/llvm/test/MC/ARM/lsl-zero-errors.s +++ b/llvm/test/MC/ARM/lsl-zero-errors.s @@ -1,6 +1,6 @@ -// RUN: not llvm-mc -triple=thumbv7 -show-encoding < %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONARM --check-prefix=CHECK-THUMBV7 %s -// RUN: not llvm-mc -triple=thumbv8 -show-encoding < %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONARM --check-prefix=CHECK-THUMBV8 %s -// RUN: llvm-mc -triple=armv7 -show-encoding < %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM %s +// RUN: not llvm-mc -triple=thumbv7 -show-encoding < %s 2>&1 | FileCheck --check-prefixes=CHECK-NONARM,CHECK-THUMBV7 %s +// RUN: not llvm-mc -triple=thumbv8 -show-encoding < %s 2>&1 | FileCheck --check-prefixes=CHECK-NONARM,CHECK-THUMBV8 %s +// RUN: llvm-mc -triple=armv7 -show-encoding < %s 2>&1 | FileCheck --check-prefix=CHECK-ARM %s // lsl #0 is actually mov, so here we check that it behaves the same as // mov with regards to the permitted registers diff --git a/llvm/test/MC/ARM/lsl-zero.s b/llvm/test/MC/ARM/lsl-zero.s index 6e64e001236245..81a599d6841763 100644 --- a/llvm/test/MC/ARM/lsl-zero.s +++ b/llvm/test/MC/ARM/lsl-zero.s @@ -1,6 +1,6 @@ -// RUN: llvm-mc -triple=thumbv7 -show-encoding < %s 2>/dev/null | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONARM --check-prefix=CHECK-THUMBV7 %s -// RUN: llvm-mc -triple=thumbv8 -show-encoding < %s 2>/dev/null | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONARM --check-prefix=CHECK-THUMBV8 %s -// RUN: llvm-mc -triple=armv7 -show-encoding < %s 2>/dev/null | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM %s +// RUN: llvm-mc -triple=thumbv7 -show-encoding < %s 2>/dev/null | FileCheck --check-prefix=CHECK-NONARM %s +// RUN: llvm-mc -triple=thumbv8 -show-encoding < %s 2>/dev/null | FileCheck --check-prefix=CHECK-NONARM %s +// RUN: llvm-mc -triple=armv7 -show-encoding < %s 2>/dev/null | FileCheck --check-prefix=CHECK-ARM %s // lsl #0 is actually mov, so here we check that it behaves the same as // mov with regards to the permitted registers and how it behaves in an diff --git a/llvm/test/MC/ARM/mve-fp-registers.s b/llvm/test/MC/ARM/mve-fp-registers.s index 745c464183fa66..886de8c4797e7e 100644 --- a/llvm/test/MC/ARM/mve-fp-registers.s +++ b/llvm/test/MC/ARM/mve-fp-registers.s @@ -10,36 +10,36 @@ // All of these instructions are rejected if no VFP or MVE features are // present. // RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding 2>%t < %s -// RUN: FileCheck %s < %t --check-prefix=NOFP16 --check-prefix=NOFP32 --check-prefix=NOFP64 +// RUN: FileCheck %s < %t --check-prefixes=NOFP16,NOFP32,NOFP64 // VFP and NEON implementations by default have FP32 and FP64, but not FP16. // The VFPv3 FP16 extension just added conversion instructions, which we don't // care about here. -// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp2 2>%t < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64 +// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp2 2>%t < %s | FileCheck %s --check-prefixes=FP32,FP64 // RUN: FileCheck %s < %t --check-prefix=NOFP16 -// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+fp-armv8,+neon 2>%t < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64 +// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+fp-armv8,+neon 2>%t < %s | FileCheck %s --check-prefixes=FP32,FP64 // RUN: FileCheck %s < %t --check-prefix=NOFP16 // The v8.2A FP16 extension added loads, stores and moves for FP16. -// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+fp-armv8,+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16 --check-prefix=FP32 --check-prefix=FP64 +// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+fp-armv8,+fullfp16 < %s | FileCheck %s --check-prefixes=FP16,FP32,FP64 // M-profile FPUs (e.g. Cortex-M4/M7/M33) do not have FP16 instructions, and // the FP64 instructions are optional. They are also limited to 16 D registers, // but we don't test that here. -// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp4d16sp 2>%t < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP32 -// RUN: FileCheck %s < %t --check-prefix=NOFP16 --check-prefix=NOFP64 -// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp4,-d32 2>%t < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64 +// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp4d16sp 2>%t < %s | FileCheck %s --check-prefix=FP32 +// RUN: FileCheck %s < %t --check-prefixes=NOFP16,NOFP64 +// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp4,-d32 2>%t < %s | FileCheck %s --check-prefixes=FP32,FP64 // RUN: FileCheck %s < %t --check-prefix=NOFP16 // Integer-only MVE, which can be combined with different options for scalar // FPU (or lack thereof), and has all of the move and store instructions // regardless of the scalar FPU. -// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve 2>%t < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16 --check-prefix=FP32 --check-prefix=FP64 -// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve,+fp-armv8-sp,+fullfp16 2>%t < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16 --check-prefix=FP32 --check-prefix=FP64 -// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve,+fp-armv8,+fullfp16 2>%t < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16 --check-prefix=FP32 --check-prefix=FP64 +// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve 2>%t < %s | FileCheck %s --check-prefixes=FP16,FP32,FP64 +// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve,+fp-armv8-sp,+fullfp16 2>%t < %s | FileCheck %s --check-prefixes=FP16,FP32,FP64 +// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve,+fp-armv8,+fullfp16 2>%t < %s | FileCheck %s --check-prefixes=FP16,FP32,FP64 // Maximal v8.1M target: MVE with FP, and scalar FP with double-precision. -// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16 --check-prefix=FP32 --check-prefix=FP64 +// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=FP16,FP32,FP64 vldmia r0, {d0} # FP32: vldmia r0, {d0} @ encoding: [0x90,0xec,0x02,0x0b] diff --git a/llvm/test/MC/ARM/thumbv8m.s b/llvm/test/MC/ARM/thumbv8m.s index 88ca22fcdb9378..0e9ab4a9b3bf91 100644 --- a/llvm/test/MC/ARM/thumbv8m.s +++ b/llvm/test/MC/ARM/thumbv8m.s @@ -1,12 +1,12 @@ // RUN: not llvm-mc -triple=thumbv8m.base -show-encoding < %s 2>%t \ -// RUN: | FileCheck --check-prefix=CHECK-BASELINE --check-prefix=CHECK %s -// RUN: FileCheck --check-prefix=UNDEF-BASELINE --check-prefix=UNDEF < %t %s +// RUN: | FileCheck --check-prefix=CHECK %s +// RUN: FileCheck --check-prefixes=UNDEF-BASELINE,UNDEF < %t %s // RUN: not llvm-mc -triple=thumbv8m.main -show-encoding < %s 2>%t \ -// RUN: | FileCheck --check-prefix=CHECK-MAINLINE --check-prefix=CHECK %s -// RUN: FileCheck --check-prefix=UNDEF-MAINLINE --check-prefix=UNDEF < %t %s +// RUN: | FileCheck --check-prefixes=CHECK-MAINLINE,CHECK %s +// RUN: FileCheck --check-prefixes=UNDEF-MAINLINE,UNDEF < %t %s // RUN: not llvm-mc -triple=thumbv8m.main -mattr=+dsp -show-encoding < %s 2>%t \ -// RUN: | FileCheck --check-prefix=CHECK-MAINLINE_DSP --check-prefix=CHECK %s -// RUN: FileCheck --check-prefix=UNDEF-MAINLINE_DSP --check-prefix=UNDEF < %t %s +// RUN: | FileCheck --check-prefixes=CHECK-MAINLINE_DSP,CHECK %s +// RUN: FileCheck --check-prefixes=UNDEF-MAINLINE_DSP,UNDEF < %t %s // Simple check that baseline is v6M and mainline is v7M // UNDEF-BASELINE: error: instruction requires: thumb2 From 2f395b7092bdac0e39bb4e2bb5e6b03e521a45dd Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Sun, 10 Jan 2021 16:23:03 +0100 Subject: [PATCH 03/17] [clangd] Make AST-based signals available to runWithPreamble. Many useful signals can be derived from a valid AST which is regularly updated by the ASTWorker. `runWithPreamble` does not have access to the ParsedAST but it can be provided access to some signals derived from a (possibly stale) AST. Differential Revision: https://reviews.llvm.org/D94424 --- clang-tools-extra/clangd/ASTSignals.cpp | 42 +++++++++++ clang-tools-extra/clangd/ASTSignals.h | 39 ++++++++++ clang-tools-extra/clangd/CMakeLists.txt | 1 + clang-tools-extra/clangd/TUScheduler.cpp | 73 +++++++++++------- clang-tools-extra/clangd/TUScheduler.h | 3 + .../clangd/unittests/ASTSignalsTests.cpp | 75 +++++++++++++++++++ .../clangd/unittests/CMakeLists.txt | 1 + .../clangd/unittests/TUSchedulerTests.cpp | 47 +++++++++++- 8 files changed, 252 insertions(+), 29 deletions(-) create mode 100644 clang-tools-extra/clangd/ASTSignals.cpp create mode 100644 clang-tools-extra/clangd/ASTSignals.h create mode 100644 clang-tools-extra/clangd/unittests/ASTSignalsTests.cpp diff --git a/clang-tools-extra/clangd/ASTSignals.cpp b/clang-tools-extra/clangd/ASTSignals.cpp new file mode 100644 index 00000000000000..da849287bbf698 --- /dev/null +++ b/clang-tools-extra/clangd/ASTSignals.cpp @@ -0,0 +1,42 @@ +//===--- ASTSignals.cpp - LSP server -----------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ASTSignals.h" +#include "AST.h" +#include "FindTarget.h" + +namespace clang { +namespace clangd { +ASTSignals ASTSignals::derive(const ParsedAST &AST) { + ASTSignals Signals; + const SourceManager &SM = AST.getSourceManager(); + findExplicitReferences(AST.getASTContext(), [&](ReferenceLoc Ref) { + for (const NamedDecl *ND : Ref.Targets) { + if (!isInsideMainFile(Ref.NameLoc, SM)) + continue; + SymbolID ID = getSymbolID(ND); + if (!ID) + continue; + unsigned &SymbolCount = Signals.ReferencedSymbols[ID]; + SymbolCount++; + // Process namespace only when we see the symbol for the first time. + if (SymbolCount != 1) + continue; + if (const auto *NSD = dyn_cast(ND->getDeclContext())) { + if (NSD->isAnonymousNamespace()) + continue; + std::string NS = printNamespaceScope(*NSD); + if (!NS.empty()) + Signals.RelatedNamespaces[NS]++; + } + } + }); + return Signals; +} +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/ASTSignals.h b/clang-tools-extra/clangd/ASTSignals.h new file mode 100644 index 00000000000000..bc70cd17310a13 --- /dev/null +++ b/clang-tools-extra/clangd/ASTSignals.h @@ -0,0 +1,39 @@ +//===--- ASTSignals.h - LSP server -------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_ASTSIGNALS_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_ASTSIGNALS_H + +#include "ParsedAST.h" +#include "index/SymbolID.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" + +namespace clang { +namespace clangd { + +/// Signals derived from a valid AST of a file. +/// Provides information that can only be extracted from the AST to actions that +/// can't access an AST. The signals are computed and updated asynchronously by +/// the ASTWorker and thus they are always stale and also can be absent. +/// Example usage: Information about the declarations used in a file affects +/// code-completion ranking in that file. +struct ASTSignals { + /// Number of occurrences of each symbol present in the file. + llvm::DenseMap ReferencedSymbols; + /// Namespaces whose symbols are used in the file, and the number of such + /// distinct symbols. + llvm::StringMap RelatedNamespaces; + + static ASTSignals derive(const ParsedAST &AST); +}; + +} // namespace clangd +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_ASTSIGNALS_H diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 9e62e094802745..1d12e7e2355df7 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -46,6 +46,7 @@ include_directories(BEFORE "${CMAKE_CURRENT_BINARY_DIR}/../clang-tidy") add_clang_library(clangDaemon AST.cpp + ASTSignals.cpp ClangdLSPServer.cpp ClangdServer.cpp CodeComplete.cpp diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index 7a858664faa5a0..16c186c34738d0 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -392,7 +392,8 @@ class ASTWorker { TUScheduler::ASTActionInvalidation); bool blockUntilIdle(Deadline Timeout) const; - std::shared_ptr getPossiblyStalePreamble() const; + std::shared_ptr getPossiblyStalePreamble( + std::shared_ptr *ASTSignals = nullptr) const; /// Used to inform ASTWorker about a new preamble build by PreambleThread. /// Diagnostics are only published through this callback. This ensures they @@ -437,6 +438,8 @@ class ASTWorker { void generateDiagnostics(std::unique_ptr Invocation, ParseInputs Inputs, std::vector CIDiags); + void updateASTSignals(ParsedAST &AST); + // Must be called exactly once on processing thread. Will return after // stop() is called on a separate thread and all pending requests are // processed. @@ -499,6 +502,7 @@ class ASTWorker { /// Signalled whenever a new request has been scheduled or processing of a /// request has completed. mutable std::condition_variable RequestsCV; + std::shared_ptr LatestASTSignals; /* GUARDED_BY(Mutex) */ /// Latest build preamble for current TU. /// None means no builds yet, null means there was an error while building. /// Only written by ASTWorker's thread. @@ -830,6 +834,16 @@ void ASTWorker::updatePreamble(std::unique_ptr CI, RequestsCV.notify_all(); } +void ASTWorker::updateASTSignals(ParsedAST &AST) { + auto Signals = std::make_shared(ASTSignals::derive(AST)); + // Existing readers of ASTSignals will have their copy preserved until the + // read is completed. The last reader deletes the old ASTSignals. + { + std::lock_guard Lock(Mutex); + std::swap(LatestASTSignals, Signals); + } +} + void ASTWorker::generateDiagnostics( std::unique_ptr Invocation, ParseInputs Inputs, std::vector CIDiags) { @@ -908,6 +922,7 @@ void ASTWorker::generateDiagnostics( if (*AST) { trace::Span Span("Running main AST callback"); Callbacks.onMainAST(FileName, **AST, RunPublish); + updateASTSignals(**AST); } else { // Failed to build the AST, at least report diagnostics from the // command line if there were any. @@ -925,9 +940,11 @@ void ASTWorker::generateDiagnostics( } } -std::shared_ptr -ASTWorker::getPossiblyStalePreamble() const { +std::shared_ptr ASTWorker::getPossiblyStalePreamble( + std::shared_ptr *ASTSignals) const { std::lock_guard Lock(Mutex); + if (ASTSignals) + *ASTSignals = LatestASTSignals; return LatestPreamble ? *LatestPreamble : nullptr; } @@ -1364,38 +1381,40 @@ void TUScheduler::runWithPreamble(llvm::StringRef Name, PathRef File, if (!PreambleTasks) { trace::Span Tracer(Name); SPAN_ATTACH(Tracer, "file", File); + std::shared_ptr Signals; std::shared_ptr Preamble = - It->second->Worker->getPossiblyStalePreamble(); + It->second->Worker->getPossiblyStalePreamble(&Signals); WithContext WithProvidedContext(Opts.ContextProvider(File)); Action(InputsAndPreamble{It->second->Contents, It->second->Worker->getCurrentCompileCommand(), - Preamble.get()}); + Preamble.get(), Signals.get()}); return; } std::shared_ptr Worker = It->second->Worker.lock(); - auto Task = - [Worker, Consistency, Name = Name.str(), File = File.str(), - Contents = It->second->Contents, - Command = Worker->getCurrentCompileCommand(), - Ctx = Context::current().derive(kFileBeingProcessed, std::string(File)), - Action = std::move(Action), this]() mutable { - std::shared_ptr Preamble; - if (Consistency == PreambleConsistency::Stale) { - // Wait until the preamble is built for the first time, if preamble - // is required. This avoids extra work of processing the preamble - // headers in parallel multiple times. - Worker->waitForFirstPreamble(); - } - Preamble = Worker->getPossiblyStalePreamble(); - - std::lock_guard BarrierLock(Barrier); - WithContext Guard(std::move(Ctx)); - trace::Span Tracer(Name); - SPAN_ATTACH(Tracer, "file", File); - WithContext WithProvidedContext(Opts.ContextProvider(File)); - Action(InputsAndPreamble{Contents, Command, Preamble.get()}); - }; + auto Task = [Worker, Consistency, Name = Name.str(), File = File.str(), + Contents = It->second->Contents, + Command = Worker->getCurrentCompileCommand(), + Ctx = Context::current().derive(kFileBeingProcessed, + std::string(File)), + Action = std::move(Action), this]() mutable { + std::shared_ptr Preamble; + if (Consistency == PreambleConsistency::Stale) { + // Wait until the preamble is built for the first time, if preamble + // is required. This avoids extra work of processing the preamble + // headers in parallel multiple times. + Worker->waitForFirstPreamble(); + } + std::shared_ptr Signals; + Preamble = Worker->getPossiblyStalePreamble(&Signals); + + std::lock_guard BarrierLock(Barrier); + WithContext Guard(std::move(Ctx)); + trace::Span Tracer(Name); + SPAN_ATTACH(Tracer, "file", File); + WithContext WithProvidedContext(Opts.ContextProvider(File)); + Action(InputsAndPreamble{Contents, Command, Preamble.get(), Signals.get()}); + }; PreambleTasks->runAsync("task:" + llvm::sys::path::filename(File), std::move(Task)); diff --git a/clang-tools-extra/clangd/TUScheduler.h b/clang-tools-extra/clangd/TUScheduler.h index cc38db8071aba5..5a8f4d3b817a9a 100644 --- a/clang-tools-extra/clangd/TUScheduler.h +++ b/clang-tools-extra/clangd/TUScheduler.h @@ -9,6 +9,7 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_TUSCHEDULER_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_TUSCHEDULER_H +#include "ASTSignals.h" #include "Compiler.h" #include "Diagnostics.h" #include "GlobalCompilationDatabase.h" @@ -43,6 +44,8 @@ struct InputsAndPreamble { const tooling::CompileCommand &Command; // This can be nullptr if no preamble is available. const PreambleData *Preamble; + // This can be nullptr if no ASTSignals are available. + const ASTSignals *Signals; }; /// Determines whether diagnostics should be generated for a file snapshot. diff --git a/clang-tools-extra/clangd/unittests/ASTSignalsTests.cpp b/clang-tools-extra/clangd/unittests/ASTSignalsTests.cpp new file mode 100644 index 00000000000000..2d8c1846a8ae88 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/ASTSignalsTests.cpp @@ -0,0 +1,75 @@ +//===-- ASTSignalsTests.cpp -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "AST.h" + +#include "ParsedAST.h" +#include "TestIndex.h" +#include "TestTU.h" +#include "llvm/ADT/StringRef.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { + +using ::testing::_; +using ::testing::Pair; +using ::testing::UnorderedElementsAre; + +TEST(ASTSignals, Derive) { + TestTU TU = TestTU::withCode(R"cpp( + namespace ns1 { + namespace ns2 { + namespace { + int func() { + tar::X a; + a.Y = 1; + return ADD(tar::kConst, a.Y, tar::foo()) + fooInNS2() + tar::foo(); + } + } // namespace + } // namespace ns2 + } // namespace ns1 + )cpp"); + + TU.HeaderCode = R"cpp( + #define ADD(x, y, z) (x + y + z) + namespace tar { // A related namespace. + int kConst = 5; + int foo(); + void bar(); // Unused symbols are not recorded. + class X { + public: int Y; + }; + } // namespace tar + namespace ns1::ns2 { int fooInNS2(); }} + )cpp"; + ASTSignals Signals = ASTSignals::derive(TU.build()); + std::vector> NS; + for (const auto &P : Signals.RelatedNamespaces) + NS.emplace_back(P.getKey(), P.getValue()); + EXPECT_THAT(NS, UnorderedElementsAre(Pair("ns1::", 1), Pair("ns1::ns2::", 1), + Pair("tar::", /*foo, kConst, X*/ 3))); + + std::vector> Sym; + for (const auto &P : Signals.ReferencedSymbols) + Sym.emplace_back(P.getFirst(), P.getSecond()); + EXPECT_THAT( + Sym, + UnorderedElementsAre( + Pair(ns("tar").ID, 4), Pair(ns("ns1").ID, 1), + Pair(ns("ns1::ns2").ID, 1), Pair(_ /*int func();*/, 1), + Pair(cls("tar::X").ID, 1), Pair(var("tar::kConst").ID, 1), + Pair(func("tar::foo").ID, 2), Pair(func("ns1::ns2::fooInNS2").ID, 1), + Pair(sym("Y", index::SymbolKind::Variable, "@N@tar@S@X@FI@\\0").ID, + 2), + Pair(_ /*a*/, 3))); +} +} // namespace +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt index 10f10f200471da..adf4ac827cce34 100644 --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -35,6 +35,7 @@ add_custom_target(ClangdUnitTests) add_unittest(ClangdUnitTests ClangdTests Annotations.cpp ASTTests.cpp + ASTSignalsTests.cpp BackgroundIndexTests.cpp CallHierarchyTests.cpp CanonicalIncludesTests.cpp diff --git a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp index c87c1be6f8e9a2..0c9455f0eaf63b 100644 --- a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp +++ b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp @@ -14,6 +14,7 @@ #include "Preamble.h" #include "TUScheduler.h" #include "TestFS.h" +#include "TestIndex.h" #include "support/Cancellation.h" #include "support/Context.h" #include "support/Path.h" @@ -48,6 +49,7 @@ using ::testing::ElementsAre; using ::testing::Eq; using ::testing::Field; using ::testing::IsEmpty; +using ::testing::Pair; using ::testing::Pointee; using ::testing::SizeIs; using ::testing::UnorderedElementsAre; @@ -679,12 +681,12 @@ TEST_F(TUSchedulerTests, EmptyPreamble) { cantFail(std::move(Preamble)).Preamble->Preamble.getBounds().Size, 0u); }); - // Wait for the preamble is being built. + // Wait while the preamble is being built. ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10))); // Update the file which results in an empty preamble. S.update(Foo, getInputs(Foo, WithEmptyPreamble), WantDiagnostics::Auto); - // Wait for the preamble is being built. + // Wait while the preamble is being built. ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10))); S.runWithPreamble( "getEmptyPreamble", Foo, TUScheduler::Stale, @@ -696,6 +698,47 @@ TEST_F(TUSchedulerTests, EmptyPreamble) { }); } +TEST_F(TUSchedulerTests, ASTSignalsSmokeTests) { + TUScheduler S(CDB, optsForTest()); + auto Foo = testPath("foo.cpp"); + auto Header = testPath("foo.h"); + + FS.Files[Header] = "namespace tar { int foo(); }"; + const char *Contents = R"cpp( + #include "foo.h" + namespace ns { + int func() { + return tar::foo()); + } + } // namespace ns + )cpp"; + // Update the file which results in an empty preamble. + S.update(Foo, getInputs(Foo, Contents), WantDiagnostics::Yes); + // Wait while the preamble is being built. + ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10))); + Notification TaskRun; + S.runWithPreamble( + "ASTSignals", Foo, TUScheduler::Stale, + [&](Expected IP) { + ASSERT_FALSE(!IP); + std::vector> NS; + for (const auto &P : IP->Signals->RelatedNamespaces) + NS.emplace_back(P.getKey(), P.getValue()); + EXPECT_THAT(NS, + UnorderedElementsAre(Pair("ns::", 1), Pair("tar::", 1))); + + std::vector> Sym; + for (const auto &P : IP->Signals->ReferencedSymbols) + Sym.emplace_back(P.getFirst(), P.getSecond()); + EXPECT_THAT(Sym, UnorderedElementsAre(Pair(ns("tar").ID, 1), + Pair(ns("ns").ID, 1), + Pair(func("tar::foo").ID, 1), + Pair(func("ns::func").ID, 1))); + TaskRun.notify(); + }); + TaskRun.wait(); +} + TEST_F(TUSchedulerTests, RunWaitsForPreamble) { // Testing strategy: we update the file and schedule a few preamble reads at // the same time. All reads should get the same non-null preamble. From 8b09cf7956d8abc722fa736874e4cea667a9d3cb Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Thu, 14 Jan 2021 18:38:42 +0100 Subject: [PATCH 04/17] [clangd] Trivial: Documentation fix in ASTSignals. --- clang-tools-extra/clangd/ASTSignals.cpp | 2 +- clang-tools-extra/clangd/ASTSignals.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/ASTSignals.cpp b/clang-tools-extra/clangd/ASTSignals.cpp index da849287bbf698..b8cc7f05927a76 100644 --- a/clang-tools-extra/clangd/ASTSignals.cpp +++ b/clang-tools-extra/clangd/ASTSignals.cpp @@ -1,4 +1,4 @@ -//===--- ASTSignals.cpp - LSP server -----------------------------*- C++-*-===// +//===--- ASTSignals.cpp ------------------------------------------*- C++-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang-tools-extra/clangd/ASTSignals.h b/clang-tools-extra/clangd/ASTSignals.h index bc70cd17310a13..fd31be38ce8bae 100644 --- a/clang-tools-extra/clangd/ASTSignals.h +++ b/clang-tools-extra/clangd/ASTSignals.h @@ -1,4 +1,4 @@ -//===--- ASTSignals.h - LSP server -------------------------------*- C++-*-===// +//===--- ASTSignals.h --------------------------------------------*- C++-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From b4e083b0ef7ca86851b5b1d043004ae632a63f8d Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 14 Jan 2021 17:39:58 +0000 Subject: [PATCH 05/17] [gn build] Port 2f395b7092bd --- llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn | 1 + .../gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn index c07a61fe61e437..88a51958061807 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn @@ -67,6 +67,7 @@ static_library("clangd") { ] sources = [ "AST.cpp", + "ASTSignals.cpp", "ClangdLSPServer.cpp", "ClangdServer.cpp", "CodeComplete.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn index 34ba224fa7e039..26cc183b132eb8 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn @@ -42,6 +42,7 @@ unittest("ClangdTests") { target_gen_dir, ] sources = [ + "ASTSignalsTests.cpp", "ASTTests.cpp", "Annotations.cpp", "BackgroundIndexTests.cpp", From 4864d9f7e91fdd58a84e4ae576f1ad16f71f9d91 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Wed, 13 Jan 2021 14:12:23 -0800 Subject: [PATCH 06/17] [flang] Fix some module file issues exposed by Whizard Generic type-bound interfaces for user-defined operators need to be formatted as "OPERATOR(.op.)", not just ".op." PRIVATE generics need to be marked as such. Declaration ordering: when a generic interface shadows a derived type of the same name, it needs to be emitted to the module file at the point of definition of the derived type; otherwise, the derived type's definition may appear after its first use. The module symbol for a module read from a module file needs to be marked as coming from a module file before semantic processing is performed on the contents of the module so that any special handling for declarations in module files can be properly activated. IMPORT statements were sometimes missing for use-associated symbols in surrounding scopes; fine-tune NeedImport(). Differential Revision: https://reviews.llvm.org/D94636 --- flang/lib/Semantics/mod-file.cpp | 79 +++++++++++++++++++++--------- flang/test/Semantics/modfile35.f90 | 6 +-- flang/test/Semantics/modfile37.f90 | 32 ++++++++++++ flang/test/Semantics/modfile38.f90 | 35 +++++++++++++ 4 files changed, 125 insertions(+), 27 deletions(-) create mode 100644 flang/test/Semantics/modfile37.f90 create mode 100644 flang/test/Semantics/modfile38.f90 diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index 23733f944d8cdf..af3267a1c9a08f 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -198,6 +198,15 @@ bool ModFileWriter::PutSymbols(const Scope &scope) { } } +static llvm::raw_ostream &PutGenericName( + llvm::raw_ostream &os, const Symbol &symbol) { + if (IsGenericDefinedOp(symbol)) { + return os << "operator(" << symbol.name() << ')'; + } else { + return os << symbol.name(); + } +} + // Emit a symbol to decls_, except for bindings in a derived type (type-bound // procedures, type-bound generics, final procedures) which go to typeBindings. void ModFileWriter::PutSymbol( @@ -210,8 +219,8 @@ void ModFileWriter::PutSymbol( if (symbol.owner().IsDerivedType()) { // generic binding for (const Symbol &proc : x.specificProcs()) { - typeBindings << "generic::" << symbol.name() << "=>" - << proc.name() << '\n'; + PutGenericName(typeBindings << "generic::", symbol) + << "=>" << proc.name() << '\n'; } } else { PutGeneric(symbol); @@ -392,15 +401,6 @@ static bool IsIntrinsicOp(const Symbol &symbol) { } } -static llvm::raw_ostream &PutGenericName( - llvm::raw_ostream &os, const Symbol &symbol) { - if (IsGenericDefinedOp(symbol)) { - return os << "operator(" << symbol.name() << ')'; - } else { - return os << symbol.name(); - } -} - void ModFileWriter::PutGeneric(const Symbol &symbol) { const auto &genericOwner{symbol.owner()}; auto &details{symbol.get()}; @@ -427,9 +427,11 @@ void ModFileWriter::PutUse(const Symbol &symbol) { PutGenericName(uses_ << "=>", use); } uses_ << '\n'; - PutUseExtraAttr(Attr::PRIVATE, symbol, use); PutUseExtraAttr(Attr::VOLATILE, symbol, use); PutUseExtraAttr(Attr::ASYNCHRONOUS, symbol, use); + if (symbol.attrs().test(Attr::PRIVATE)) { + PutGenericName(useExtraAttrs_ << "private::", symbol) << '\n'; + } } // We have "USE local => use" in this module. If attr was added locally @@ -442,6 +444,31 @@ void ModFileWriter::PutUseExtraAttr( } } +// When a generic interface has the same name as a derived type +// in the same scope, the generic shadows the derived type. +// If the derived type were declared first, emit the generic +// interface at the position of derived type's declaration. +// (ReplaceName() is not used for this purpose because doing so +// would confusingly position error messages pertaining to the generic +// interface upon the derived type's declaration.) +static inline SourceName NameInModuleFile(const Symbol &symbol) { + if (const auto *generic{symbol.detailsIf()}) { + if (const auto *derivedTypeOverload{generic->derivedType()}) { + if (derivedTypeOverload->name().begin() < symbol.name().begin()) { + return derivedTypeOverload->name(); + } + } + } else if (const auto *use{symbol.detailsIf()}) { + if (use->symbol().attrs().test(Attr::PRIVATE)) { + // Avoid the use in sorting of names created to access private + // specific procedures as a result of generic resolution; + // they're not in the cooked source. + return use->symbol().name(); + } + } + return symbol.name(); +} + // Collect the symbols of this scope sorted by their original order, not name. // Namelists are an exception: they are sorted after other symbols. void CollectSymbols( @@ -465,7 +492,7 @@ void CollectSymbols( // Sort most symbols by name: use of Symbol::ReplaceName ensures the source // location of a symbol's name is the first "real" use. std::sort(sorted.begin(), sorted.end(), [](SymbolRef x, SymbolRef y) { - return x->name().begin() < y->name().begin(); + return NameInModuleFile(x).begin() < NameInModuleFile(y).begin(); }); sorted.insert(sorted.end(), namelist.begin(), namelist.end()); for (const auto &pair : scope.commonBlocks()) { @@ -819,13 +846,15 @@ Scope *ModFileReader::Read(const SourceName &name, Scope *ancestor) { } else { parentScope = ancestor; } - ResolveNames(context_, *parseTree); - const auto &it{parentScope->find(name)}; - if (it == parentScope->end()) { + auto pair{parentScope->try_emplace(name, UnknownDetails{})}; + if (!pair.second) { return nullptr; } - auto &modSymbol{*it->second}; + Symbol &modSymbol{*pair.first->second}; modSymbol.set(Symbol::Flag::ModFile); + ResolveNames(context_, *parseTree); + CHECK(modSymbol.has()); + CHECK(modSymbol.test(Symbol::Flag::ModFile)); return modSymbol.scope(); } @@ -974,14 +1003,16 @@ bool SubprogramSymbolCollector::NeedImport( const SourceName &name, const Symbol &symbol) { if (!isInterface_) { return false; - } else if (symbol.owner() != scope_.parent()) { - // detect import from parent of use-associated symbol - // can be null in the case of a use-associated derived type's parent type - const auto *found{scope_.FindSymbol(name)}; - CHECK(found || symbol.has()); - return found && found->has() && found->owner() != scope_; - } else { + } else if (symbol.owner().Contains(scope_)) { return true; + } else if (const Symbol * found{scope_.FindSymbol(name)}) { + // detect import from ancestor of use-associated symbol + return found->has() && found->owner() != scope_; + } else { + // "found" can be null in the case of a use-associated derived type's parent + // type + CHECK(symbol.has()); + return false; } } diff --git a/flang/test/Semantics/modfile35.f90 b/flang/test/Semantics/modfile35.f90 index 928b6c5472bbae..1c50bfed25e3ce 100644 --- a/flang/test/Semantics/modfile35.f90 +++ b/flang/test/Semantics/modfile35.f90 @@ -205,13 +205,13 @@ subroutine test4(x, y, a) ! contains ! procedure,pass(x)::p1=>f1 ! procedure::p3=>f3 -! generic::.binary.=>p1 -! generic::.unary.=>p3 +! generic::operator(.binary.)=>p1 +! generic::operator(.unary.)=>p3 ! end type ! type,extends(t1)::t2 ! contains ! procedure,pass(y)::p2=>f2 -! generic::.binary.=>p2 +! generic::operator(.binary.)=>p2 ! end type !contains ! pure function f1(x,y) diff --git a/flang/test/Semantics/modfile37.f90 b/flang/test/Semantics/modfile37.f90 new file mode 100644 index 00000000000000..61f64fbb9169c7 --- /dev/null +++ b/flang/test/Semantics/modfile37.f90 @@ -0,0 +1,32 @@ +! RUN: %S/test_modfile.sh %s %t %f18 + +! Ensure that a dummy procedure's interface's imports +! appear in the module file. + +module m + type :: t + end type + contains + subroutine s1(s2) + interface + subroutine s2(x) + import + class(t) :: x + end subroutine + end interface + end subroutine +end module +!Expect: m.mod +!module m +!type::t +!end type +!contains +!subroutine s1(s2) +!interface +!subroutine s2(x) +!import::t +!class(t)::x +!end +!end interface +!end +!end diff --git a/flang/test/Semantics/modfile38.f90 b/flang/test/Semantics/modfile38.f90 new file mode 100644 index 00000000000000..c234568444b3dc --- /dev/null +++ b/flang/test/Semantics/modfile38.f90 @@ -0,0 +1,35 @@ +! RUN: %S/test_modfile.sh %s %t %f18 + +! Ensure that an interface with the same name as a derived type +! does not cause that shadowed name to be emitted later than its +! uses in the module file. + +module m + type :: t + end type + type :: t2 + type(t) :: c + end type + interface t + module procedure f + end interface + contains + type(t) function f + end function +end module + +!Expect: m.mod +!module m +!interface t +!procedure::f +!end interface +!type::t +!end type +!type::t2 +!type(t)::c +!end type +!contains +!function f() +!type(t)::f +!end +!end From 35c8a6cbf5ff0b525e2c01e5d746067bdda1dde7 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 13 Jan 2021 10:39:25 -0800 Subject: [PATCH 07/17] [NFC] Disallow unused prefixes under MC/AArch64 Differential Revision: https://reviews.llvm.org/D94616 --- llvm/test/MC/AArch64/armv8.7a-ls64.s | 4 ++-- llvm/test/MC/AArch64/armv8.7a-xs.s | 6 +++--- llvm/test/MC/AArch64/lit.local.cfg | 8 ++++++++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/llvm/test/MC/AArch64/armv8.7a-ls64.s b/llvm/test/MC/AArch64/armv8.7a-ls64.s index c647ecce53f099..d4684e38cbea28 100644 --- a/llvm/test/MC/AArch64/armv8.7a-ls64.s +++ b/llvm/test/MC/AArch64/armv8.7a-ls64.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+ls64 < %s 2>%t | FileCheck %s -// RUN: FileCheck --check-prefix=CHECK-ERR --check-prefix=CHECK-LS64-ERR %s < %t +// RUN: FileCheck --check-prefix=CHECK-ERR %s < %t // RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t -// RUN: FileCheck --check-prefix=CHECK-ERR --check-prefix=CHECK-NO-LS64-ERR %s < %t +// RUN: FileCheck --check-prefixes=CHECK-ERR,CHECK-NO-LS64-ERR %s < %t ld64b x0, [x13] st64b x14, [x13] diff --git a/llvm/test/MC/AArch64/armv8.7a-xs.s b/llvm/test/MC/AArch64/armv8.7a-xs.s index 6193c1f15f534e..e3a1e12aae9a5f 100644 --- a/llvm/test/MC/AArch64/armv8.7a-xs.s +++ b/llvm/test/MC/AArch64/armv8.7a-xs.s @@ -1,9 +1,9 @@ // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a,+xs < %s 2>%t | FileCheck %s -// RUN: FileCheck --check-prefix=CHECK-ERR --check-prefix=CHECK-XS-ERR %s < %t +// RUN: FileCheck --check-prefix=CHECK-ERR %s < %t // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.7a < %s 2>%t | FileCheck %s -// RUN: FileCheck --check-prefix=CHECK-ERR --check-prefix=CHECK-XS-ERR %s < %t +// RUN: FileCheck --check-prefix=CHECK-ERR %s < %t // RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.4a < %s 2> %t -// RUN: FileCheck --check-prefix=CHECK-ERR --check-prefix=CHECK-NO-XS-ERR %s < %t +// RUN: FileCheck --check-prefixes=CHECK-ERR,CHECK-NO-XS-ERR %s < %t dsb #16 dsb #20 diff --git a/llvm/test/MC/AArch64/lit.local.cfg b/llvm/test/MC/AArch64/lit.local.cfg index 5822b72266874f..ab829130e2696a 100644 --- a/llvm/test/MC/AArch64/lit.local.cfg +++ b/llvm/test/MC/AArch64/lit.local.cfg @@ -1,2 +1,10 @@ +from lit.llvm.subst import ToolSubst + if 'AArch64' not in config.root.targets: config.unsupported = True + +fc = ToolSubst('FileCheck', unresolved='fatal') +# Insert this first. Then, we'll first update the blank FileCheck command; then, +# the default substitution of FileCheck will replace it to its full path. +config.substitutions.insert(0, (fc.regex, + 'FileCheck --allow-unused-prefixes=false')) From a03ffa98503bb6d5a990e61df060ed480c3e3f3b Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 13 Jan 2021 21:46:25 -0800 Subject: [PATCH 08/17] [NewPM] Fix placement of LoopFlatten https://reviews.llvm.org/D90402 was inconsistent with where it put LoopFlatten between the two pass managers. It also missed adding it to the non-O1 function simplification pipeline. PR48738 Reviewed By: SjoerdMeijer Differential Revision: https://reviews.llvm.org/D94650 --- llvm/lib/Passes/PassBuilder.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index d5c0c47bd9a64c..7f3f132ab82b16 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -562,8 +562,6 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); LPM1.addPass(SimpleLoopUnswitchPass()); - if (EnableLoopFlatten) - FPM.addPass(LoopFlattenPass()); LPM2.addPass(LoopIdiomRecognizePass()); LPM2.addPass(IndVarSimplifyPass()); @@ -594,6 +592,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, DebugLogging)); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); + if (EnableLoopFlatten) + FPM.addPass(LoopFlattenPass()); // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. FPM.addPass(createFunctionToLoopPassAdaptor( @@ -756,6 +756,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, DebugLogging)); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); + if (EnableLoopFlatten) + FPM.addPass(LoopFlattenPass()); // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. From b99782cf7850a481fa36fd95ae04923739e0da6d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 14 Jan 2021 17:51:06 +0000 Subject: [PATCH 09/17] [X86][AVX] Adjust unsigned saturation downconvert negative test D87145 was showing that this test (added in D45315) could always be constant folded (with suitable value tracking). What we actually needed was smax(smin()) negative test coverage, the invert of negative_test2_smax_usat_trunc_wb_256_mem, so I've tweaked the test to provide that instead. --- llvm/test/CodeGen/X86/avx512-trunc.ll | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/llvm/test/CodeGen/X86/avx512-trunc.ll b/llvm/test/CodeGen/X86/avx512-trunc.ll index 0b2a47c2772cc1..d61ada4e5d0550 100644 --- a/llvm/test/CodeGen/X86/avx512-trunc.ll +++ b/llvm/test/CodeGen/X86/avx512-trunc.ll @@ -1007,10 +1007,8 @@ define <16 x i16> @smax_usat_trunc_dw_512(<16 x i32> %i) { define void @negative_test1_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) { ; KNL-LABEL: negative_test1_smax_usat_trunc_wb_256_mem: ; KNL: ## %bb.0: -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; KNL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; KNL-NEXT: vpmovdb %zmm0, (%rdi) ; KNL-NEXT: vzeroupper @@ -1018,17 +1016,15 @@ define void @negative_test1_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* ; ; SKX-LABEL: negative_test1_smax_usat_trunc_wb_256_mem: ; SKX: ## %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 -; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 +; SKX-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 +; SKX-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0 ; SKX-NEXT: vpmovwb %ymm0, (%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq - %x1 = icmp sgt <16 x i16> %i, - %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> - %x3 = icmp slt <16 x i16> %x2, - %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> + %x1 = icmp slt <16 x i16> %i, + %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> + %x3 = icmp sgt <16 x i16> %x2, + %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> %x6 = trunc <16 x i16> %x5 to <16 x i8> store <16 x i8> %x6, <16 x i8>* %res, align 1 ret void From 5d165f0b893d4fc5fb5caeb2b05c566dd26e4d89 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Thu, 14 Jan 2021 18:13:22 +0000 Subject: [PATCH 10/17] [libomptarget][amdgpu] Fix kernel launch tracing to match previous behavior Restore control of kernel launch tracing to be >= 1 as it was before export LIBOMPTARGET_KERNEL_TRACE=1 Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D94695 --- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index bd450f9898faf7..9453171e137807 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1762,7 +1762,7 @@ int32_t __tgt_rtl_run_target_team_region_locked( loop_tripcount, // From run_region arg KernelInfo->device_id); - if (print_kernel_trace == 4) + if (print_kernel_trace >= 1) // enum modes are SPMD, GENERIC, NONE 0,1,2 fprintf(stderr, "DEVID:%2d SGN:%1d ConstWGSize:%-4d args:%2d teamsXthrds:(%4dX%4d) " From 868da2ea939baf8c71a6dcb878cf6094ede9486e Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 3 Sep 2020 19:22:33 +0100 Subject: [PATCH 11/17] [SelectionDAG] Remove an early-out from computeKnownBits for smin/smax Even if we know nothing about LHS, it can still be useful to know that smax(LHS, RHS) >= RHS and smin(LHS, RHS) <= RHS. Differential Revision: https://reviews.llvm.org/D87145 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1 - llvm/test/CodeGen/X86/known-bits-vector.ll | 12 ++---------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 7084ab68524b56..82da553954d2f6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3416,7 +3416,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - if (Known.isUnknown()) break; // Early-out Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); if (IsMax) Known = KnownBits::smax(Known, Known2); diff --git a/llvm/test/CodeGen/X86/known-bits-vector.ll b/llvm/test/CodeGen/X86/known-bits-vector.ll index 3b6912a9d94610..05bf984101abc5 100644 --- a/llvm/test/CodeGen/X86/known-bits-vector.ll +++ b/llvm/test/CodeGen/X86/known-bits-vector.ll @@ -435,11 +435,7 @@ define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) { ; X32-NEXT: vpminsd {{\.LCPI.*}}, %xmm0, %xmm0 ; X32-NEXT: vpmaxsd {{\.LCPI.*}}, %xmm0, %xmm0 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] -; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X32-NEXT: vpsrld $16, %xmm0, %xmm0 -; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X32-NEXT: vsubps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: knownbits_smax_smin_shuffle_uitofp: @@ -447,11 +443,7 @@ define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) { ; X64-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 ; X64-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] -; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X64-NEXT: vpsrld $16, %xmm0, %xmm0 -; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X64-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X64-NEXT: retq %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> ) %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> ) From 85dfcaadc5f0920dc8ecbece6c786701b8f45ab4 Mon Sep 17 00:00:00 2001 From: Joseph Tremoulet Date: Thu, 14 Jan 2021 13:17:02 -0500 Subject: [PATCH 12/17] [LLDB] MinidumpParser: Prefer executable module even at higher address When a program maps one of its own modules for reading, and then crashes, breakpad can emit two entries for that module in the ModuleList. We have logic to identify this case by checking permissions on mapped memory regions and report just the module with an executable region. As currently written, though, the check is asymmetric -- the entry with the executable region must be the second one encountered for the preference to kick in. This change makes the logic symmetric, so that the first-encountered module will similarly be preferred if it has an executable region but the second-encountered module does not. This happens for example when the module in question is the executable itself, which breakpad likes to report first -- we need to ignore the other entry for that module when we see it later, even though it may be mapped at a lower virtual address. Reviewed By: clayborg Differential Revision: https://reviews.llvm.org/D94629 --- .../Process/minidump/MinidumpParser.cpp | 26 +++++++----- .../Process/minidump/MinidumpParserTest.cpp | 41 +++++++++++++++++++ 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp b/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp index e16f86cca1c21d..61106ebcc43036 100644 --- a/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp +++ b/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp @@ -391,19 +391,23 @@ std::vector MinidumpParser::GetFilteredModuleList() { filtered_modules.push_back(&module); } else { // We have a duplicate module entry. Check the linux regions to see if - // the module we already have is not really a mapped executable. If it - // isn't check to see if the current duplicate module entry is a real - // mapped executable, and if so, replace it. This can happen when a - // process mmap's in the file for an executable in order to read bytes - // from the executable file. A memory region mapping will exist for the - // mmap'ed version and for the loaded executable, but only one will have - // a consecutive region that is executable in the memory regions. + // either module is not really a mapped executable. If one but not the + // other is a real mapped executable, prefer the executable one. This + // can happen when a process mmap's in the file for an executable in + // order to read bytes from the executable file. A memory region mapping + // will exist for the mmap'ed version and for the loaded executable, but + // only one will have a consecutive region that is executable in the + // memory regions. auto dup_module = filtered_modules[iter->second]; ConstString name(*ExpectedName); - if (!CheckForLinuxExecutable(name, linux_regions, - dup_module->BaseOfImage) && - CheckForLinuxExecutable(name, linux_regions, module.BaseOfImage)) { - filtered_modules[iter->second] = &module; + bool is_executable = + CheckForLinuxExecutable(name, linux_regions, module.BaseOfImage); + bool dup_is_executable = + CheckForLinuxExecutable(name, linux_regions, dup_module->BaseOfImage); + + if (is_executable != dup_is_executable) { + if (is_executable) + filtered_modules[iter->second] = &module; continue; } // This module has been seen. Modules are sometimes mentioned multiple diff --git a/lldb/unittests/Process/minidump/MinidumpParserTest.cpp b/lldb/unittests/Process/minidump/MinidumpParserTest.cpp index 69046af283eba0..e3f23c5fe33ada 100644 --- a/lldb/unittests/Process/minidump/MinidumpParserTest.cpp +++ b/lldb/unittests/Process/minidump/MinidumpParserTest.cpp @@ -792,6 +792,47 @@ TEST_F(MinidumpParserTest, MinidumpDuplicateModuleMappedSecond) { EXPECT_EQ(0x400d0000u, filtered_modules[0]->BaseOfImage); } +TEST_F(MinidumpParserTest, MinidumpDuplicateModuleMappedSecondHigh) { + ASSERT_THAT_ERROR(SetUpFromYaml(R"( +--- !minidump +Streams: + - Type: ModuleList + Modules: + - Base of Image: 0x400d3000 + Size of Image: 0x00002000 + Module Name: '/usr/lib/libc.so' + CodeView Record: '' + - Base of Image: 0x400d0000 + Size of Image: 0x00001000 + Module Name: '/usr/lib/libc.so' + CodeView Record: '' + - Type: LinuxMaps + Text: | + 400d0000-400d2000 r--p 00000000 b3:04 227 /usr/lib/libc.so + 400d2000-400d3000 rw-p 00000000 00:00 0 + 400d3000-400d4000 r-xp 00010000 b3:04 227 /usr/lib/libc.so + 400d4000-400d5000 rwxp 00001000 b3:04 227 /usr/lib/libc.so +... +)"), + llvm::Succeeded()); + // If we have a module mentioned twice in the module list, and we have full + // linux maps for all of the memory regions, make sure we pick the one that + // has a consecutive region with a matching path that has executable + // permissions. If clients open an object file with mmap, breakpad can create + // multiple mappings for a library errnoneously and the lowest address isn't + // always the right address. In this case we check the consective memory + // regions whose path matches starting at the base of image address and make + // sure one of the regions is executable and prefer that one. + // + // This test will make sure that if the executable is first in the module + // list, that it will remain the correctly selected module in the filtered + // list, even if the non-executable module was loaded at a lower base address. + std::vector filtered_modules = + parser->GetFilteredModuleList(); + ASSERT_EQ(1u, filtered_modules.size()); + EXPECT_EQ(0x400d3000u, filtered_modules[0]->BaseOfImage); +} + TEST_F(MinidumpParserTest, MinidumpDuplicateModuleSeparateCode) { ASSERT_THAT_ERROR(SetUpFromYaml(R"( --- !minidump From be40c12040a0d5551bf3430cbb184b5ef23e25fd Mon Sep 17 00:00:00 2001 From: Aaron En Ye Shi Date: Thu, 14 Jan 2021 17:52:27 +0000 Subject: [PATCH 13/17] [HIP] Add signbit(long double) decl An _MSC_VER version of signbit(long double) is required for MSVC headers. Fixes: SWDEV-256409 Differential Revision: https://reviews.llvm.org/D93062 --- clang/lib/Headers/__clang_cuda_math_forward_declares.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/lib/Headers/__clang_cuda_math_forward_declares.h b/clang/lib/Headers/__clang_cuda_math_forward_declares.h index 8a270859e4a589..c0f1f47cc99302 100644 --- a/clang/lib/Headers/__clang_cuda_math_forward_declares.h +++ b/clang/lib/Headers/__clang_cuda_math_forward_declares.h @@ -160,6 +160,9 @@ __DEVICE__ double scalbln(double, long); __DEVICE__ float scalbln(float, long); __DEVICE__ double scalbn(double, int); __DEVICE__ float scalbn(float, int); +#ifdef _MSC_VER +__DEVICE__ bool signbit(long double); +#endif __DEVICE__ bool signbit(double); __DEVICE__ bool signbit(float); __DEVICE__ double sin(double); From 6ebeba88f51959d763a8f274cdfecea46d51d28c Mon Sep 17 00:00:00 2001 From: Arjun P Date: Thu, 14 Jan 2021 19:29:51 +0100 Subject: [PATCH 14/17] Support emptiness checks for unbounded FlatAffineConstraints. With this, we have complete support for emptiness checks. This also paves the way for future support to check if two FlatAffineConstraints are equal. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D94272 --- mlir/include/mlir/Analysis/AffineStructures.h | 7 + mlir/include/mlir/Analysis/LinearTransform.h | 48 +++++ .../mlir/Analysis/Presburger/Fraction.h | 2 + .../include/mlir/Analysis/Presburger/Matrix.h | 6 + .../mlir/Analysis/Presburger/Simplex.h | 30 ++- mlir/lib/Analysis/AffineStructures.cpp | 151 ++++++++++++- mlir/lib/Analysis/CMakeLists.txt | 2 + mlir/lib/Analysis/LinearTransform.cpp | 156 ++++++++++++++ mlir/lib/Analysis/Presburger/CMakeLists.txt | 2 +- mlir/lib/Analysis/Presburger/Matrix.cpp | 13 ++ mlir/lib/Analysis/Presburger/Simplex.cpp | 31 ++- .../Analysis/AffineStructuresTest.cpp | 203 +++++++++++++++--- mlir/unittests/Analysis/CMakeLists.txt | 1 + .../Analysis/LinearTransformTest.cpp | 87 ++++++++ 14 files changed, 697 insertions(+), 42 deletions(-) create mode 100644 mlir/include/mlir/Analysis/LinearTransform.h create mode 100644 mlir/lib/Analysis/LinearTransform.cpp create mode 100644 mlir/unittests/Analysis/LinearTransformTest.cpp diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index 25071db100e320..fa80db7d4b639b 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -13,6 +13,7 @@ #ifndef MLIR_ANALYSIS_AFFINE_STRUCTURES_H #define MLIR_ANALYSIS_AFFINE_STRUCTURES_H +#include "mlir/Analysis/Presburger/Matrix.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/OpDefinition.h" #include "mlir/Support/LogicalResult.h" @@ -153,6 +154,12 @@ class FlatAffineConstraints { /// false if a solution exists or all tests were inconclusive. bool isIntegerEmpty() const; + // Returns a matrix where each row is a vector along which the polytope is + // bounded. The span of the returned vectors is guaranteed to contain all + // such vectors. The returned vectors are NOT guaranteed to be linearly + // independent. This function should not be called on empty sets. + Matrix getBoundedDirections() const; + /// Find a sample point satisfying the constraints. This uses a branch and /// bound algorithm with generalized basis reduction, which always works if /// the set is bounded. This should not be called for unbounded sets. diff --git a/mlir/include/mlir/Analysis/LinearTransform.h b/mlir/include/mlir/Analysis/LinearTransform.h new file mode 100644 index 00000000000000..0850f5a0060971 --- /dev/null +++ b/mlir/include/mlir/Analysis/LinearTransform.h @@ -0,0 +1,48 @@ +//===- LinearTransform.h - MLIR LinearTransform Class -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Support for linear transforms and applying them to FlatAffineConstraints. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_ANALYSIS_LINEARTRANSFORM_H +#define MLIR_ANALYSIS_LINEARTRANSFORM_H + +#include "mlir/Analysis/AffineStructures.h" +#include "mlir/Analysis/Presburger/Matrix.h" +#include "llvm/ADT/SmallVector.h" + +namespace mlir { + +class LinearTransform { +public: + explicit LinearTransform(Matrix &&oMatrix); + explicit LinearTransform(const Matrix &oMatrix); + + // Returns a linear transform T such that MT is M in column echelon form. + // Also returns the number of non-zero columns in MT. + // + // Specifically, T is such that in every column the first non-zero row is + // strictly below that of the previous column, and all columns which have only + // zeros are at the end. + static std::pair + makeTransformToColumnEchelon(Matrix m); + + // Returns a FlatAffineConstraints having a constraint vector vT for every + // constraint vector v in fac, where T is this transform. + FlatAffineConstraints applyTo(const FlatAffineConstraints &fac); + + // Post-multiply the given vector v with this transform, say T, returning vT. + SmallVector applyTo(ArrayRef v); + +private: + Matrix matrix; +}; + +} // namespace mlir +#endif // MLIR_ANALYSIS_LINEARTRANSFORM_H diff --git a/mlir/include/mlir/Analysis/Presburger/Fraction.h b/mlir/include/mlir/Analysis/Presburger/Fraction.h index 09996c486ef332..61b0915e559ec6 100644 --- a/mlir/include/mlir/Analysis/Presburger/Fraction.h +++ b/mlir/include/mlir/Analysis/Presburger/Fraction.h @@ -64,6 +64,8 @@ inline bool operator<=(Fraction x, Fraction y) { return compare(x, y) <= 0; } inline bool operator==(Fraction x, Fraction y) { return compare(x, y) == 0; } +inline bool operator!=(Fraction x, Fraction y) { return compare(x, y) != 0; } + inline bool operator>(Fraction x, Fraction y) { return compare(x, y) > 0; } inline bool operator>=(Fraction x, Fraction y) { return compare(x, y) >= 0; } diff --git a/mlir/include/mlir/Analysis/Presburger/Matrix.h b/mlir/include/mlir/Analysis/Presburger/Matrix.h index 7bc29f81a8346c..8ed40bb9c02665 100644 --- a/mlir/include/mlir/Analysis/Presburger/Matrix.h +++ b/mlir/include/mlir/Analysis/Presburger/Matrix.h @@ -58,6 +58,12 @@ class Matrix { /// Add `scale` multiples of the source row to the target row. void addToRow(unsigned sourceRow, unsigned targetRow, int64_t scale); + /// Add `scale` multiples of the source column to the target column. + void addToColumn(unsigned sourceColumn, unsigned targetColumn, int64_t scale); + + /// Negate the specified column. + void negateColumn(unsigned column); + /// Resize the matrix to the specified dimensions. If a dimension is smaller, /// the values are truncated; if it is bigger, the new values are default /// initialized. diff --git a/mlir/include/mlir/Analysis/Presburger/Simplex.h b/mlir/include/mlir/Analysis/Presburger/Simplex.h index 05d241e60958c8..370035cbc7ba1f 100644 --- a/mlir/include/mlir/Analysis/Presburger/Simplex.h +++ b/mlir/include/mlir/Analysis/Presburger/Simplex.h @@ -17,10 +17,12 @@ #include "mlir/Analysis/AffineStructures.h" #include "mlir/Analysis/Presburger/Fraction.h" #include "mlir/Analysis/Presburger/Matrix.h" +#include "mlir/IR/Location.h" #include "mlir/Support/LogicalResult.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" namespace mlir { @@ -84,7 +86,7 @@ class GBRSimplex; /// /// The unknowns in row position are represented in terms of the basis unknowns. /// If the basis unknowns are u_1, u_2, ... u_m, and a row in the tableau is -/// d, c, a_1, a_2, ... a_m, this representats the unknown for that row as +/// d, c, a_1, a_2, ... a_m, this represents the unknown for that row as /// (c + a_1*u_1 + a_2*u_2 + ... + a_m*u_m)/d. In our running example, if the /// basis is the initial basis of x, y, then the constraint 1 + 2x + 3y >= 0 /// would be represented by the row [1, 1, 2, 3]. @@ -173,20 +175,25 @@ class Simplex { void intersectFlatAffineConstraints(const FlatAffineConstraints &fac); /// Compute the maximum or minimum value of the given row, depending on - /// direction. The specified row is never pivoted. + /// direction. The specified row is never pivoted. On return, the row may + /// have a negative sample value if the direction is down. /// - /// Returns a (num, den) pair denoting the optimum, or None if no - /// optimum exists, i.e., if the expression is unbounded in this direction. + /// Returns a Fraction denoting the optimum, or a null value if no optimum + /// exists, i.e., if the expression is unbounded in this direction. Optional computeRowOptimum(Direction direction, unsigned row); /// Compute the maximum or minimum value of the given expression, depending on - /// direction. + /// direction. Should not be called when the Simplex is empty. /// - /// Returns a (num, den) pair denoting the optimum, or a null value if no - /// optimum exists, i.e., if the expression is unbounded in this direction. + /// Returns a Fraction denoting the optimum, or a null value if no optimum + /// exists, i.e., if the expression is unbounded in this direction. Optional computeOptimum(Direction direction, ArrayRef coeffs); + /// Returns whether the perpendicular of the specified constraint is a + /// is a direction along which the polytope is bounded. + bool isBoundedAlongConstraint(unsigned constraintIndex); + /// Returns whether the specified constraint has been marked as redundant. /// Constraints are numbered from 0 starting at the first added inequality. /// Equalities are added as a pair of inequalities and so correspond to two @@ -299,6 +306,15 @@ class Simplex { /// sample value, false otherwise. LogicalResult restoreRow(Unknown &u); + /// Compute the maximum or minimum of the specified Unknown, depending on + /// direction. The specified unknown may be pivoted. If the unknown is + /// restricted, it will have a non-negative sample value on return. + /// Should not be called if the Simplex is empty. + /// + /// Returns a Fraction denoting the optimum, or a null value if no optimum + /// exists, i.e., if the expression is unbounded in this direction. + Optional computeOptimum(Direction direction, Unknown &u); + /// Mark the specified unknown redundant. This operation is added to the undo /// log and will be undone by rollbacks. The specified unknown must be in row /// orientation. diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index 51141e6f61841f..12c90fbcfc54c4 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Analysis/AffineStructures.h" +#include "mlir/Analysis/LinearTransform.h" #include "mlir/Analysis/Presburger/Simplex.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" @@ -20,6 +21,7 @@ #include "mlir/Support/LLVM.h" #include "mlir/Support/MathExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -1034,21 +1036,152 @@ bool FlatAffineConstraints::isEmptyByGCDTest() const { return false; } -// First, try the GCD test heuristic. +// Returns a matrix where each row is a vector along which the polytope is +// bounded. The span of the returned vectors is guaranteed to contain all +// such vectors. The returned vectors are NOT guaranteed to be linearly +// independent. This function should not be called on empty sets. // -// If that doesn't find the set empty, check if the set is unbounded. If it is, -// we cannot use the GBR algorithm and we conservatively return false. -// -// If the set is bounded, we use the complete emptiness check for this case -// provided by Simplex::findIntegerSample(), which gives a definitive answer. +// It is sufficient to check the perpendiculars of the constraints, as the set +// of perpendiculars which are bounded must span all bounded directions. +Matrix FlatAffineConstraints::getBoundedDirections() const { + // Note that it is necessary to add the equalities too (which the constructor + // does) even though we don't need to check if they are bounded; whether an + // inequality is bounded or not depends on what other constraints, including + // equalities, are present. + Simplex simplex(*this); + + assert(!simplex.isEmpty() && "It is not meaningful to ask whether a " + "direction is bounded in an empty set."); + + SmallVector boundedIneqs; + // The constructor adds the inequalities to the simplex first, so this + // processes all the inequalities. + for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) { + if (simplex.isBoundedAlongConstraint(i)) + boundedIneqs.push_back(i); + } + + // The direction vector is given by the coefficients and does not include the + // constant term, so the matrix has one fewer column. + unsigned dirsNumCols = getNumCols() - 1; + Matrix dirs(boundedIneqs.size() + getNumEqualities(), dirsNumCols); + + // Copy the bounded inequalities. + unsigned row = 0; + for (unsigned i : boundedIneqs) { + for (unsigned col = 0; col < dirsNumCols; ++col) + dirs(row, col) = atIneq(i, col); + ++row; + } + + // Copy the equalities. All the equalities' perpendiculars are bounded. + for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) { + for (unsigned col = 0; col < dirsNumCols; ++col) + dirs(row, col) = atEq(i, col); + ++row; + } + + return dirs; +} + +bool eqInvolvesSuffixDims(const FlatAffineConstraints &fac, unsigned eqIndex, + unsigned numDims) { + for (unsigned e = fac.getNumDimIds(), j = e - numDims; j < e; ++j) + if (fac.atEq(eqIndex, j) != 0) + return true; + return false; +} +bool ineqInvolvesSuffixDims(const FlatAffineConstraints &fac, + unsigned ineqIndex, unsigned numDims) { + for (unsigned e = fac.getNumDimIds(), j = e - numDims; j < e; ++j) + if (fac.atIneq(ineqIndex, j) != 0) + return true; + return false; +} + +void removeConstraintsInvolvingSuffixDims(FlatAffineConstraints &fac, + unsigned unboundedDims) { + // We iterate backwards so that whether we remove constraint i - 1 or not, the + // next constraint to be tested is always i - 2. + for (unsigned i = fac.getNumEqualities(); i > 0; i--) + if (eqInvolvesSuffixDims(fac, i - 1, unboundedDims)) + fac.removeEquality(i - 1); + for (unsigned i = fac.getNumInequalities(); i > 0; i--) + if (ineqInvolvesSuffixDims(fac, i - 1, unboundedDims)) + fac.removeInequality(i - 1); +} + +/// Let this set be S. If S is bounded then we directly call into the GBR +/// sampling algorithm. Otherwise, there are some unbounded directions, i.e., +/// vectors v such that S extends to infininty along v or -v. In this case we +/// use an algorithm described in the integer set library (isl) manual and used +/// by the isl_set_sample function in that library. The algorithm is: +/// +/// 1) Apply a unimodular transform T to S to obtain S*T, such that all +/// dimensions in which S*T is bounded lie in the linear span of a prefix of the +/// dimensions. +/// +/// 2) Construct a set transformedSet by removing all constraints that involve +/// the unbounded dimensions and also deleting the unbounded dimensions. Note +/// that this is a bounded set. +/// +/// 3) Check if transformedSet is empty using the GBR sampling algorithm. +/// +/// 4) return S is empty iff transformedSet is empty. +/// +/// Since T is unimodular, a vector v is a solution to S*T iff T*v is a +/// solution to S. The following is a sketch of a proof that S*T is empty +/// iff transformedSet is empty: +/// +/// If transformedSet is empty, then S*T is certainly empty since transformedSet +/// was obtained by removing constraints and deleting dimensions from S*T. +/// +/// If transformedSet contains a sample, consider the set C obtained by +/// substituting the sample for the bounded dimensions of S*T. All the +/// constraints of S*T that did not involve unbounded dimensions are +/// satisfied by this substitution. +/// +/// In step 1, all dimensions in the linear span of the dimensions outside the +/// prefix are unbounded in S*T. Substituting values for the bounded dimensions +/// cannot makes these dimensions bounded, and these are the only remaining +/// dimensions in C, so C is unbounded along every vector. C is hence a +/// full-dimensional cone and therefore always contains an integer point, which +/// we can then substitute to get a full solution to S*T. bool FlatAffineConstraints::isIntegerEmpty() const { + // First, try the GCD test heuristic. if (isEmptyByGCDTest()) return true; Simplex simplex(*this); - if (simplex.isUnbounded()) - return false; - return !simplex.findIntegerSample().hasValue(); + if (simplex.isEmpty()) + return true; + + // For a bounded set, we directly call into the GBR sampling algorithm. + if (!simplex.isUnbounded()) + return !simplex.findIntegerSample().hasValue(); + + // The set is unbounded. We cannot directly use the GBR algorithm. + // + // m is a matrix containing, in each row, a vector in which S is + // bounded, such that the linear span of all these dimensions contains all + // bounded dimensions in S. + Matrix m = getBoundedDirections(); + // In column echelon form, each row of m occupies only the first rank(m) + // columns and has zeros on the other columns. The transform T that brings S + // to column echelon form is unimodular as well, so this is a suitable + // transform to use in step 1 of the algorithm. + std::pair result = + LinearTransform::makeTransformToColumnEchelon(std::move(m)); + FlatAffineConstraints transformedSet = result.second.applyTo(*this); + + unsigned numBoundedDims = result.first; + unsigned numUnboundedDims = getNumIds() - numBoundedDims; + removeConstraintsInvolvingSuffixDims(transformedSet, numUnboundedDims); + + // Remove all the unbounded dimensions. + transformedSet.removeIdRange(numBoundedDims, transformedSet.getNumIds()); + + return !Simplex(transformedSet).findIntegerSample().hasValue(); } Optional> diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt index 3247ef1f56b02d..585ba2aa8baf98 100644 --- a/mlir/lib/Analysis/CMakeLists.txt +++ b/mlir/lib/Analysis/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_OPTIONAL_SOURCES AffineStructures.cpp BufferAliasAnalysis.cpp CallGraph.cpp + LinearTransform.cpp Liveness.cpp LoopAnalysis.cpp NestedMatcher.cpp @@ -36,6 +37,7 @@ add_mlir_library(MLIRAnalysis add_mlir_library(MLIRLoopAnalysis AffineAnalysis.cpp AffineStructures.cpp + LinearTransform.cpp LoopAnalysis.cpp NestedMatcher.cpp PresburgerSet.cpp diff --git a/mlir/lib/Analysis/LinearTransform.cpp b/mlir/lib/Analysis/LinearTransform.cpp new file mode 100644 index 00000000000000..7176cb01231f42 --- /dev/null +++ b/mlir/lib/Analysis/LinearTransform.cpp @@ -0,0 +1,156 @@ +//===- LinearTransform.cpp - MLIR LinearTransform Class -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/LinearTransform.h" +#include "mlir/Analysis/AffineStructures.h" + +namespace mlir { + +LinearTransform::LinearTransform(Matrix &&oMatrix) : matrix(oMatrix) {} +LinearTransform::LinearTransform(const Matrix &oMatrix) : matrix(oMatrix) {} + +// Set M(row, targetCol) to its remainder on division by M(row, sourceCol) +// by subtracting from column targetCol an appropriate integer multiple of +// sourceCol. This brings M(row, targetCol) to the range [0, M(row, sourceCol)). +// Apply the same column operation to otherMatrix, with the same integer +// multiple. +static void modEntryColumnOperation(Matrix &m, unsigned row, unsigned sourceCol, + unsigned targetCol, Matrix &otherMatrix) { + assert(m(row, sourceCol) != 0 && "Cannot divide by zero!"); + assert((m(row, sourceCol) > 0 && m(row, targetCol) > 0) && + "Operands must be positive!"); + int64_t ratio = m(row, targetCol) / m(row, sourceCol); + m.addToColumn(sourceCol, targetCol, -ratio); + otherMatrix.addToColumn(sourceCol, targetCol, -ratio); +} + +std::pair +LinearTransform::makeTransformToColumnEchelon(Matrix m) { + // We start with an identity result matrix and perform operations on m + // until m is in column echelon form. We apply the same sequence of operations + // on resultMatrix to obtain a transform that takes m to column echelon + // form. + Matrix resultMatrix = Matrix::identity(m.getNumColumns()); + + unsigned echelonCol = 0; + // Invariant: in all rows above row, all columns from echelonCol onwards + // are all zero elements. In an iteration, if the curent row has any non-zero + // elements echelonCol onwards, we bring one to echelonCol and use it to + // make all elements echelonCol + 1 onwards zero. + for (unsigned row = 0; row < m.getNumRows(); ++row) { + // Search row for a non-empty entry, starting at echelonCol. + unsigned nonZeroCol = echelonCol; + for (unsigned e = m.getNumColumns(); nonZeroCol < e; ++nonZeroCol) { + if (m(row, nonZeroCol) == 0) + continue; + break; + } + + // Continue to the next row with the same echelonCol if this row is all + // zeros from echelonCol onwards. + if (nonZeroCol == m.getNumColumns()) + continue; + + // Bring the non-zero column to echelonCol. This doesn't affect rows + // above since they are all zero at these columns. + if (nonZeroCol != echelonCol) { + m.swapColumns(nonZeroCol, echelonCol); + resultMatrix.swapColumns(nonZeroCol, echelonCol); + } + + // Make m(row, echelonCol) non-negative. + if (m(row, echelonCol) < 0) { + m.negateColumn(echelonCol); + resultMatrix.negateColumn(echelonCol); + } + + // Make all the entries in row after echelonCol zero. + for (unsigned i = echelonCol + 1, e = m.getNumColumns(); i < e; ++i) { + // We make m(row, i) non-negative, and then apply the Euclidean GCD + // algorithm to (row, i) and (row, echelonCol). At the end, one of them + // has value equal to the gcd of the two entries, and the other is zero. + + if (m(row, i) < 0) { + m.negateColumn(i); + resultMatrix.negateColumn(i); + } + + unsigned targetCol = i, sourceCol = echelonCol; + // At every step, we set m(row, targetCol) %= m(row, sourceCol), and + // swap the indices sourceCol and targetCol. (not the columns themselves) + // This modulo is implemented as a subtraction + // m(row, targetCol) -= quotient * m(row, sourceCol), + // where quotient = floor(m(row, targetCol) / m(row, sourceCol)), + // which brings m(row, targetCol) to the range [0, m(row, sourceCol)). + // + // We are only allowed column operations; we perform the above + // for every row, i.e., the above subtraction is done as a column + // operation. This does not affect any rows above us since they are + // guaranteed to be zero at these columns. + while (m(row, targetCol) != 0 && m(row, sourceCol) != 0) { + modEntryColumnOperation(m, row, sourceCol, targetCol, resultMatrix); + std::swap(targetCol, sourceCol); + } + + // One of (row, echelonCol) and (row, i) is zero and the other is the gcd. + // Make it so that (row, echelonCol) holds the non-zero value. + if (m(row, echelonCol) == 0) { + m.swapColumns(i, echelonCol); + resultMatrix.swapColumns(i, echelonCol); + } + } + + ++echelonCol; + } + + return {echelonCol, LinearTransform(std::move(resultMatrix))}; +} + +SmallVector LinearTransform::applyTo(ArrayRef v) { + assert(v.size() == matrix.getNumRows() && + "vector dimension should be matrix output dimension"); + + SmallVector result; + result.reserve(v.size()); + for (unsigned col = 0, e = matrix.getNumColumns(); col < e; ++col) { + int64_t elem = 0; + for (unsigned i = 0, e = matrix.getNumRows(); i < e; ++i) + elem += v[i] * matrix(i, col); + result.push_back(elem); + } + return result; +} + +FlatAffineConstraints +LinearTransform::applyTo(const FlatAffineConstraints &fac) { + FlatAffineConstraints result(fac.getNumDimIds()); + + for (unsigned i = 0, e = fac.getNumEqualities(); i < e; ++i) { + ArrayRef eq = fac.getEquality(i); + + int64_t c = eq.back(); + + SmallVector newEq = applyTo(eq.drop_back()); + newEq.push_back(c); + result.addEquality(newEq); + } + + for (unsigned i = 0, e = fac.getNumInequalities(); i < e; ++i) { + ArrayRef ineq = fac.getInequality(i); + + int64_t c = ineq.back(); + + SmallVector newIneq = applyTo(ineq.drop_back()); + newIneq.push_back(c); + result.addInequality(newIneq); + } + + return result; +} + +} // namespace mlir diff --git a/mlir/lib/Analysis/Presburger/CMakeLists.txt b/mlir/lib/Analysis/Presburger/CMakeLists.txt index 49cdd5ac14312b..2561013696d9bd 100644 --- a/mlir/lib/Analysis/Presburger/CMakeLists.txt +++ b/mlir/lib/Analysis/Presburger/CMakeLists.txt @@ -1,4 +1,4 @@ add_mlir_library(MLIRPresburger Simplex.cpp Matrix.cpp - ) \ No newline at end of file + ) diff --git a/mlir/lib/Analysis/Presburger/Matrix.cpp b/mlir/lib/Analysis/Presburger/Matrix.cpp index 213f1111e2a3a8..4a5a53921548c7 100644 --- a/mlir/lib/Analysis/Presburger/Matrix.cpp +++ b/mlir/lib/Analysis/Presburger/Matrix.cpp @@ -79,6 +79,19 @@ void Matrix::addToRow(unsigned sourceRow, unsigned targetRow, int64_t scale) { return; } +void Matrix::addToColumn(unsigned sourceColumn, unsigned targetColumn, + int64_t scale) { + if (scale == 0) + return; + for (unsigned row = 0, e = getNumRows(); row < e; ++row) + at(row, targetColumn) += scale * at(row, sourceColumn); +} + +void Matrix::negateColumn(unsigned column) { + for (unsigned row = 0, e = getNumRows(); row < e; ++row) + at(row, column) = -at(row, column); +} + void Matrix::print(raw_ostream &os) const { for (unsigned row = 0; row < nRows; ++row) { for (unsigned column = 0; column < nColumns; ++column) diff --git a/mlir/lib/Analysis/Presburger/Simplex.cpp b/mlir/lib/Analysis/Presburger/Simplex.cpp index 47e199baba2af5..2cfe5929e21dc9 100644 --- a/mlir/lib/Analysis/Presburger/Simplex.cpp +++ b/mlir/lib/Analysis/Presburger/Simplex.cpp @@ -9,6 +9,7 @@ #include "mlir/Analysis/Presburger/Simplex.h" #include "mlir/Analysis/Presburger/Matrix.h" #include "mlir/Support/MathExtras.h" +#include "llvm/ADT/Optional.h" namespace mlir { using Direction = Simplex::Direction; @@ -482,7 +483,7 @@ Optional Simplex::computeRowOptimum(Direction direction, /// or None if it is unbounded. Optional Simplex::computeOptimum(Direction direction, ArrayRef coeffs) { - assert(!empty && "Tableau should not be empty"); + assert(!empty && "Simplex should not be empty"); unsigned snapshot = getSnapshot(); unsigned conIndex = addRow(coeffs); @@ -492,6 +493,34 @@ Optional Simplex::computeOptimum(Direction direction, return optimum; } +Optional Simplex::computeOptimum(Direction direction, Unknown &u) { + assert(!empty && "Simplex should not be empty!"); + if (u.orientation == Orientation::Column) { + unsigned column = u.pos; + Optional pivotRow = findPivotRow({}, direction, column); + // If no pivot is returned, the constraint is unbounded in the specified + // direction. + if (!pivotRow) + return {}; + pivot(*pivotRow, column); + } + + unsigned row = u.pos; + Optional optimum = computeRowOptimum(direction, row); + if (u.restricted && direction == Direction::Down && + (!optimum || *optimum < Fraction(0, 1))) + restoreRow(u); + return optimum; +} + +bool Simplex::isBoundedAlongConstraint(unsigned constraintIndex) { + assert(!empty && "It is not meaningful to ask whether a direction is bounded " + "in an empty set."); + // The constraint's perpendicular is already bounded below, since it is a + // constraint. If it is also bounded above, we can return true. + return computeOptimum(Direction::Up, con[constraintIndex]).hasValue(); +} + /// Redundant constraints are those that are in row orientation and lie in /// rows 0 to nRedundant - 1. bool Simplex::isMarkedRedundant(unsigned constraintIndex) const { diff --git a/mlir/unittests/Analysis/AffineStructuresTest.cpp b/mlir/unittests/Analysis/AffineStructuresTest.cpp index 6fcb1c489cfcb4..ac11c90ec15b3e 100644 --- a/mlir/unittests/Analysis/AffineStructuresTest.cpp +++ b/mlir/unittests/Analysis/AffineStructuresTest.cpp @@ -15,22 +15,36 @@ namespace mlir { -/// If 'hasValue' is true, check that findIntegerSample returns a valid sample +enum class TestFunction { Sample, Empty }; + +/// If fn is TestFunction::Sample (default): +/// If hasSample is true, check that findIntegerSample returns a valid sample /// for the FlatAffineConstraints fac. +/// If hasSample is false, check that findIntegerSample returns None. /// -/// If hasValue is false, check that findIntegerSample does not return None. -static void checkSample(bool hasValue, const FlatAffineConstraints &fac) { - Optional> maybeSample = fac.findIntegerSample(); - if (!hasValue) { - EXPECT_FALSE(maybeSample.hasValue()); - if (maybeSample.hasValue()) { - for (auto x : *maybeSample) - llvm::errs() << x << ' '; - llvm::errs() << '\n'; +/// If fn is TestFunction::Empty, check that isIntegerEmpty returns the +/// opposite of hasSample. +static void checkSample(bool hasSample, const FlatAffineConstraints &fac, + TestFunction fn = TestFunction::Sample) { + Optional> maybeSample; + switch (fn) { + case TestFunction::Sample: + maybeSample = fac.findIntegerSample(); + if (!hasSample) { + EXPECT_FALSE(maybeSample.hasValue()); + if (maybeSample.hasValue()) { + for (auto x : *maybeSample) + llvm::errs() << x << ' '; + llvm::errs() << '\n'; + } + } else { + ASSERT_TRUE(maybeSample.hasValue()); + EXPECT_TRUE(fac.containsPoint(*maybeSample)); } - } else { - ASSERT_TRUE(maybeSample.hasValue()); - EXPECT_TRUE(fac.containsPoint(*maybeSample)); + break; + case TestFunction::Empty: + EXPECT_EQ(!hasSample, fac.isIntegerEmpty()); + break; } } @@ -52,9 +66,11 @@ makeFACFromConstraints(unsigned dims, ArrayRef> ineqs, /// orderings may cause the algorithm to proceed differently. At least some of ///.these permutations should make it past the heuristics and test the /// implementation of the GBR algorithm itself. -static void checkPermutationsSample(bool hasValue, unsigned nDim, +/// Use TestFunction fn to test. +static void checkPermutationsSample(bool hasSample, unsigned nDim, ArrayRef> ineqs, - ArrayRef> eqs) { + ArrayRef> eqs, + TestFunction fn = TestFunction::Sample) { SmallVector perm(nDim); std::iota(perm.begin(), perm.end(), 0); auto permute = [&perm](ArrayRef coeffs) { @@ -71,8 +87,8 @@ static void checkPermutationsSample(bool hasValue, unsigned nDim, for (const auto &eq : eqs) permutedEqs.push_back(permute(eq)); - checkSample(hasValue, - makeFACFromConstraints(nDim, permutedIneqs, permutedEqs)); + checkSample(hasSample, + makeFACFromConstraints(nDim, permutedIneqs, permutedEqs), fn); } while (std::next_permutation(perm.begin(), perm.end())); } @@ -206,19 +222,158 @@ TEST(FlatAffineConstraintsTest, IsIntegerEmptyTest) { EXPECT_FALSE( makeFACFromConstraints(1, {{5, -1}, {-5, 9}}, {}).isIntegerEmpty()); - // An unbounded set, which isIntegerEmpty should detect as unbounded and - // return without calling findIntegerSample. + // Unbounded sets. + EXPECT_TRUE(makeFACFromConstraints(3, + { + {2, 0, 0, -1}, // 2x >= 1 + {-2, 0, 0, 1}, // 2x <= 1 + {0, 2, 0, -1}, // 2y >= 1 + {0, -2, 0, 1}, // 2y <= 1 + {0, 0, 2, -1}, // 2z >= 1 + }, + {}) + .isIntegerEmpty()); + EXPECT_FALSE(makeFACFromConstraints(3, { - {2, 0, 0, -1}, - {-2, 0, 0, 1}, - {0, 2, 0, -1}, - {0, -2, 0, 1}, - {0, 0, 2, -1}, + {2, 0, 0, -1}, // 2x >= 1 + {-3, 0, 0, 3}, // 3x <= 3 + {0, 0, 5, -6}, // 5z >= 6 + {0, 0, -7, 17}, // 7z <= 17 + {0, 3, 0, -2}, // 3y >= 2 }, {}) .isIntegerEmpty()); + // 2D cone with apex at (10000, 10000) and + // edges passing through (1/3, 0) and (2/3, 0). + EXPECT_FALSE( + makeFACFromConstraints( + 2, {{300000, -299999, -100000}, {-300000, 299998, 200000}}, {}) + .isIntegerEmpty()); + + // Cartesian product of a tetrahedron and a 2D cone. + // The tetrahedron has vertices at + // (1/3, 0, 0), (2/3, 0, 0), (2/3, 0, 10000), and (10000, 10000, 10000). + // The first three points form a triangular base on the xz plane with the + // apex at the fourth point, which is the only integer point. + // The cone has apex at (10000, 10000) and + // edges passing through (1/3, 0) and (2/3, 0). + checkPermutationsSample( + true /* not empty */, 5, + { + // Tetrahedron contraints: + {0, 1, 0, 0, 0, 0}, // y >= 0 + {0, -1, 1, 0, 0, 0}, // z >= y + // -300000x + 299998y + 100000 + z <= 0. + {300000, -299998, -1, 0, 0, -100000}, + // -150000x + 149999y + 100000 >= 0. + {-150000, 149999, 0, 0, 0, 100000}, + + // Triangle constraints: + // 300000p - 299999q >= 100000 + {0, 0, 0, 300000, -299999, -100000}, + // -300000p + 299998q + 200000 >= 0 + {0, 0, 0, -300000, 299998, 200000}, + }, + {}, TestFunction::Empty); + + // Cartesian product of same tetrahedron as above and {(p, q) : 1/3 <= p <= + // 2/3}. Since the second set is empty, the whole set is too. + checkPermutationsSample( + false /* empty */, 5, + { + // Tetrahedron contraints: + {0, 1, 0, 0, 0, 0}, // y >= 0 + {0, -1, 1, 0, 0, 0}, // z >= y + // -300000x + 299998y + 100000 + z <= 0. + {300000, -299998, -1, 0, 0, -100000}, + // -150000x + 149999y + 100000 >= 0. + {-150000, 149999, 0, 0, 0, 100000}, + + // Second set constraints: + // 3p >= 1 + {0, 0, 0, 3, 0, -1}, + // 3p <= 2 + {0, 0, 0, -3, 0, 2}, + }, + {}, TestFunction::Empty); + + // Cartesian product of same tetrahedron as above and + // {(p, q, r) : 1 <= p <= 2 and p = 3q + 3r}. + // Since the second set is empty, the whole set is too. + checkPermutationsSample( + false /* empty */, 5, + { + // Tetrahedron contraints: + {0, 1, 0, 0, 0, 0, 0}, // y >= 0 + {0, -1, 1, 0, 0, 0, 0}, // z >= y + // -300000x + 299998y + 100000 + z <= 0. + {300000, -299998, -1, 0, 0, 0, -100000}, + // -150000x + 149999y + 100000 >= 0. + {-150000, 149999, 0, 0, 0, 0, 100000}, + + // Second set constraints: + // p >= 1 + {0, 0, 0, 1, 0, 0, -1}, + // p <= 2 + {0, 0, 0, -1, 0, 0, 2}, + }, + { + {0, 0, 0, 1, -3, -3, 0}, // p = 3q + 3r + }, + TestFunction::Empty); + + // Cartesian product of a tetrahedron and a 2D cone. + // The tetrahedron is empty and has vertices at + // (1/3, 0, 0), (2/3, 0, 0), (2/3, 0, 100), and (100, 100 - 1/3, 100). + // The cone has apex at (10000, 10000) and + // edges passing through (1/3, 0) and (2/3, 0). + // Since the tetrahedron is empty, the Cartesian product is too. + checkPermutationsSample(false /* empty */, 5, + { + // Tetrahedron contraints: + {0, 1, 0, 0, 0, 0}, + {0, -300, 299, 0, 0, 0}, + {300 * 299, -89400, -299, 0, 0, -100 * 299}, + {-897, 894, 0, 0, 0, 598}, + + // Triangle constraints: + // 300000p - 299999q >= 100000 + {0, 0, 0, 300000, -299999, -100000}, + // -300000p + 299998q + 200000 >= 0 + {0, 0, 0, -300000, 299998, 200000}, + }, + {}, TestFunction::Empty); + + // Cartesian product of same tetrahedron as above and + // {(p, q) : 1/3 <= p <= 2/3}. + checkPermutationsSample(false /* empty */, 5, + { + // Tetrahedron contraints: + {0, 1, 0, 0, 0, 0}, + {0, -300, 299, 0, 0, 0}, + {300 * 299, -89400, -299, 0, 0, -100 * 299}, + {-897, 894, 0, 0, 0, 598}, + + // Second set constraints: + // 3p >= 1 + {0, 0, 0, 3, 0, -1}, + // 3p <= 2 + {0, 0, 0, -3, 0, 2}, + }, + {}, TestFunction::Empty); + + EXPECT_FALSE(makeFACFromConstraints(3, + { + {2, 0, 0, -1}, // 2x >= 1 + }, + {{ + {1, -1, 0, -1}, // y = x - 1 + {0, 1, -1, 0}, // z = y + }}) + .isIntegerEmpty()); + // FlatAffineConstraints::isEmpty() does not detect the following sets to be // empty. diff --git a/mlir/unittests/Analysis/CMakeLists.txt b/mlir/unittests/Analysis/CMakeLists.txt index 6317aeb8df892d..0df0af866d6627 100644 --- a/mlir/unittests/Analysis/CMakeLists.txt +++ b/mlir/unittests/Analysis/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_unittest(MLIRAnalysisTests AffineStructuresTest.cpp + LinearTransformTest.cpp PresburgerSetTest.cpp ) diff --git a/mlir/unittests/Analysis/LinearTransformTest.cpp b/mlir/unittests/Analysis/LinearTransformTest.cpp new file mode 100644 index 00000000000000..598c84920d5df8 --- /dev/null +++ b/mlir/unittests/Analysis/LinearTransformTest.cpp @@ -0,0 +1,87 @@ +//===- LinearTransformTest.cpp - Tests for LinearTransform ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/LinearTransform.h" +#include +#include + +namespace mlir { + +void testColumnEchelonForm(const Matrix &m, unsigned expectedRank) { + unsigned lastAllowedNonZeroCol = 0; + std::pair result = + LinearTransform::makeTransformToColumnEchelon(m); + unsigned rank = result.first; + EXPECT_EQ(rank, expectedRank); + LinearTransform transform = result.second; + // In column echelon form, each row's last non-zero value can be at most one + // column to the right of the last non-zero column among the previous rows. + for (unsigned row = 0, nRows = m.getNumRows(); row < nRows; ++row) { + SmallVector rowVec = transform.applyTo(m.getRow(row)); + for (unsigned col = lastAllowedNonZeroCol + 1, nCols = m.getNumColumns(); + col < nCols; ++col) { + EXPECT_EQ(rowVec[col], 0); + if (rowVec[col] != 0) { + llvm::errs() << "Failed at input matrix:\n"; + m.dump(); + } + } + if (rowVec[lastAllowedNonZeroCol] != 0) + lastAllowedNonZeroCol++; + } + // The final value of lastAllowedNonZeroCol is the index of the first + // all-zeros column, so it must be equal to the rank. + EXPECT_EQ(lastAllowedNonZeroCol, rank); +} + +TEST(LinearTransformTest, transformToColumnEchelonTest) { + // m1, m2, m3 are rank 1 matrices -- the first and second rows are identical. + Matrix m1(2, 2); + m1(0, 0) = 4; + m1(0, 1) = -7; + m1(1, 0) = 4; + m1(1, 1) = -7; + testColumnEchelonForm(m1, 1u); + + Matrix m2(2, 2); + m2(0, 0) = -4; + m2(0, 1) = 7; + m2(1, 0) = 4; + m2(1, 1) = -7; + testColumnEchelonForm(m2, 1u); + + Matrix m3(2, 2); + m3(0, 0) = -4; + m3(0, 1) = -7; + m3(1, 0) = -4; + m3(1, 1) = -7; + testColumnEchelonForm(m3, 1u); + + // m4, m5, m6 are rank 2 matrices -- the first and second rows are different. + Matrix m4(2, 2); + m4(0, 0) = 4; + m4(0, 1) = -7; + m4(1, 0) = -4; + m4(1, 1) = -7; + testColumnEchelonForm(m4, 2u); + + Matrix m5(2, 2); + m5(0, 0) = -4; + m5(0, 1) = 7; + m5(1, 0) = 4; + m5(1, 1) = 7; + testColumnEchelonForm(m5, 2u); + + Matrix m6(2, 2); + m6(0, 0) = -4; + m6(0, 1) = -7; + m6(1, 0) = 4; + m6(1, 1) = -7; + testColumnEchelonForm(m5, 2u); +} +} // namespace mlir From 763c1f9933463c40c39c04b68bbe4d296823b003 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 14 Jan 2021 13:34:18 -0500 Subject: [PATCH 15/17] [OpenMP] Drop the static library libomptarget-nvptx For NVPTX target, OpenMP provides a static library `libomptarget-nvptx` built by NVCC, and another bitcode `libomptarget-nvptx-sm_{$sm}.bc` generated by Clang. When compiling an OpenMP program, the `.bc` file will be fed to `clang` in the second run on the program that compiles the target part. Then the generated PTX file will be fed to `ptxas` to generate the object file, and finally the driver invokes `nvlink` to generate the binary, where the static library will be appened to `nvlink`. One question is, why do we need two libraries? The only difference is, the static library contains `omp_data.cu` and the bitcode library doesn't. It's unclear why they were implemented in this way, but per D94565, there is no issue if we also include the file into the bitcode library. Therefore, we can safely drop the static library. This patch is about the change in OpenMP. The driver will be updated as well if this patch is accepted. Reviewed By: jdoerfert, JonChesterfield Differential Revision: https://reviews.llvm.org/D94573 --- .../deviceRTLs/nvptx/CMakeLists.txt | 96 ++++--------------- 1 file changed, 19 insertions(+), 77 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt index ea11c8114166e3..200c6401d62848 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -10,31 +10,6 @@ # ##===----------------------------------------------------------------------===## -set(LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER "" CACHE STRING - "Path to alternate NVCC host compiler to be used by the NVPTX device RTL.") - -if(LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER) - find_program(ALTERNATE_CUDA_HOST_COMPILER NAMES ${LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER}) - if(NOT ALTERNATE_CUDA_HOST_COMPILER) - libomptarget_say("Not building CUDA offloading device RTL: invalid NVPTX alternate host compiler.") - endif() - set(CUDA_HOST_COMPILER ${ALTERNATE_CUDA_HOST_COMPILER} CACHE FILEPATH "" FORCE) -endif() - -# We can't use clang as nvcc host preprocessor, so we attempt to replace it with -# gcc. -if(CUDA_HOST_COMPILER MATCHES clang) - - find_program(LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER NAMES gcc) - - if(NOT LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER) - libomptarget_say("Not building CUDA offloading device RTL: clang is not supported as NVCC host compiler.") - libomptarget_say("Please include gcc in your path or set LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER to the full path of of valid compiler.") - return() - endif() - set(CUDA_HOST_COMPILER "${LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER}" CACHE FILEPATH "" FORCE) -endif() - get_filename_component(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) @@ -44,28 +19,6 @@ set(devicertl_nvptx_directory ${devicertl_base_directory}/nvptx) if(LIBOMPTARGET_DEP_CUDA_FOUND) - libomptarget_say("Building CUDA offloading device RTL.") - - # We really don't have any host code, so we don't need to care about - # propagating host flags. - set(CUDA_PROPAGATE_HOST_FLAGS OFF) - - set(cuda_src_files - ${devicertl_common_directory}/src/cancel.cu - ${devicertl_common_directory}/src/critical.cu - ${devicertl_common_directory}/src/data_sharing.cu - ${devicertl_common_directory}/src/libcall.cu - ${devicertl_common_directory}/src/loop.cu - ${devicertl_common_directory}/src/omp_data.cu - ${devicertl_common_directory}/src/omptarget.cu - ${devicertl_common_directory}/src/parallel.cu - ${devicertl_common_directory}/src/reduction.cu - ${devicertl_common_directory}/src/support.cu - ${devicertl_common_directory}/src/sync.cu - ${devicertl_common_directory}/src/task.cu - src/target_impl.cu - ) - # Build library support for the highest compute capability the system supports # and always build support for sm_35 by default if (${LIBOMPTARGET_DEP_CUDA_ARCH} EQUAL 35) @@ -94,24 +47,6 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND) # Activate RTL message dumps if requested by the user. set(LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL "Activate NVPTX device RTL debug messages.") - if(${LIBOMPTARGET_NVPTX_DEBUG}) - set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1 -g --ptxas-options=-v) - endif() - - # NVPTX runtime library has to be statically linked. Dynamic linking is not - # yet supported by the CUDA toolchain on the device. - set(BUILD_SHARED_LIBS OFF) - set(CUDA_SEPARABLE_COMPILATION ON) - list(APPEND CUDA_NVCC_FLAGS -I${devicertl_base_directory} - -I${devicertl_nvptx_directory}/src) - cuda_add_library(omptarget-nvptx STATIC ${cuda_src_files} - OPTIONS ${CUDA_ARCH} ${CUDA_DEBUG} ${MAX_SM_DEFINITION}) - - # Install device RTL under the lib destination folder. - install(TARGETS omptarget-nvptx ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}") - - target_link_libraries(omptarget-nvptx ${CUDA_LIBRARIES}) - # Check if we can create an LLVM bitcode implementation of the runtime library # that could be inlined in the user application. For that we need to find @@ -124,18 +59,25 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND) include(LibomptargetNVPTXBitcodeLibrary) - set(bclib_default FALSE) - if (${LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED}) - set(bclib_default TRUE) - endif() - set(LIBOMPTARGET_NVPTX_ENABLE_BCLIB ${bclib_default} CACHE BOOL - "Enable CUDA LLVM bitcode offloading device RTL.") - if (${LIBOMPTARGET_NVPTX_ENABLE_BCLIB}) - if (NOT ${LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED}) - libomptarget_error_say("Cannot build CUDA LLVM bitcode offloading device RTL!") - endif() + if (LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED) libomptarget_say("Building CUDA LLVM bitcode offloading device RTL.") + set(cuda_src_files + ${devicertl_common_directory}/src/cancel.cu + ${devicertl_common_directory}/src/critical.cu + ${devicertl_common_directory}/src/data_sharing.cu + ${devicertl_common_directory}/src/libcall.cu + ${devicertl_common_directory}/src/loop.cu + ${devicertl_common_directory}/src/omp_data.cu + ${devicertl_common_directory}/src/omptarget.cu + ${devicertl_common_directory}/src/parallel.cu + ${devicertl_common_directory}/src/reduction.cu + ${devicertl_common_directory}/src/support.cu + ${devicertl_common_directory}/src/sync.cu + ${devicertl_common_directory}/src/task.cu + src/target_impl.cu + ) + # Set flags for LLVM Bitcode compilation. set(bc_flags ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS} -I${devicertl_base_directory} @@ -195,7 +137,7 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND) # Copy library to destination. add_custom_command(TARGET omptarget-nvptx-${sm}-bc POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc - $) + ${LIBOMPTARGET_LIBRARY_DIR}) # Install bitcode library under the lib destination folder. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc DESTINATION "${OPENMP_INSTALL_LIBDIR}") @@ -204,5 +146,5 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND) add_subdirectory(test) else() - libomptarget_say("Not building CUDA offloading device RTL: CUDA tools not found in the system.") + libomptarget_say("Not building CUDA offloading device RTL: tools to build bc lib not found in the system.") endif() From 4fffbc150cca1638051b8ad2a20f4b8240df0869 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Wed, 13 Jan 2021 19:14:25 -0800 Subject: [PATCH 16/17] [clang][MSVC] Fix missing MSInheritanceAttr in template specialization. Fix PR48687. Differential Revision: https://reviews.llvm.org/D94646 --- clang/lib/Sema/SemaTemplate.cpp | 5 +++++ .../test/CodeGenCXX/microsoft-abi-member-pointers.cpp | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 64259767d98a68..12880b95b9c634 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -9764,6 +9764,11 @@ DeclResult Sema::ActOnExplicitInstantiation( dllExportImportClassTemplateSpecialization(*this, Def); } + if (Def->hasAttr()) { + Specialization->addAttr(Def->getAttr()); + Consumer.AssignInheritanceModel(Specialization); + } + // Set the template specialization kind. Make sure it is set before // instantiating the members which will trigger ASTConsumer callbacks. Specialization->setTemplateSpecializationKind(TSK); diff --git a/clang/test/CodeGenCXX/microsoft-abi-member-pointers.cpp b/clang/test/CodeGenCXX/microsoft-abi-member-pointers.cpp index bfe620df5ce32c..527363a6ff8fdc 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-member-pointers.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-member-pointers.cpp @@ -148,6 +148,16 @@ const C table[] = { // CHECK-SAME: %"struct.pr43803::C" { { i32, i32, i32 } { i32 8, i32 0, i32 0 }, [4 x i8] undef }] } +namespace pr48687 { +template struct A { + T value; + static constexpr auto address = &A::value; +}; +extern template class A; +template class A; +// CHECK: @"?address@?$A@M@pr48687@@2QQ12@MQ12@" = weak_odr dso_local constant i32 0, comdat, align 4 +} + struct PR26313_Y; typedef void (PR26313_Y::*PR26313_FUNC)(); struct PR26313_X { From 202d359753d1f130a228c3ad52dfaabf384250d1 Mon Sep 17 00:00:00 2001 From: Hiroshi Yamauchi Date: Mon, 11 Jan 2021 11:02:37 -0800 Subject: [PATCH 17/17] [X86] Add the FSRM feature (Fast Short Rep Mov) to Zen3. Note -x86-use-fsrm-for-memcpy is still disabled by default and there's no default behavior change. Differential Revision: https://reviews.llvm.org/D94436 --- llvm/lib/Target/X86/X86.td | 3 ++- llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 9096d9d544529f..c492d686c52e11 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1071,7 +1071,8 @@ def ProcessorFeatures { list ZN2Tuning = ZNTuning; list ZN2Features = !listconcat(ZNFeatures, ZN2AdditionalFeatures); - list ZN3AdditionalFeatures = [FeatureINVPCID, + list ZN3AdditionalFeatures = [FeatureFSRM, + FeatureINVPCID, FeaturePKU, FeatureVAES, FeatureVPCLMULQDQ]; diff --git a/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll b/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll index 9480d74723fcc3..77e97626b1c601 100644 --- a/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll +++ b/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll @@ -4,6 +4,7 @@ ; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=haswell < %s | FileCheck %s --check-prefix=NOFSRM ; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-client < %s | FileCheck %s --check-prefix=FSRM ; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-server < %s | FileCheck %s --check-prefix=FSRM +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=znver3 < %s | FileCheck %s --check-prefix=FSRM declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind