From 20abff0481d598c850d2690083f90700fc8c9603 Mon Sep 17 00:00:00 2001
From: jasonliu <jasonliu.development@gmail.com>
Date: Mon, 10 Aug 2020 19:35:50 +0000
Subject: [PATCH 1/9] [XCOFF][AIX] Use TE storage mapping class when large code
 model is enabled

Summary:
Use TE SMC instead of TC SMC in large code model mode,
so that large code model TOC entries could get placed after all
the small code model TOC entries, which reduces the chance of TOC overflow.

Reviewed By: Xiangling_L

Differential Revision: https://reviews.llvm.org/D85455
---
 .../llvm/CodeGen/TargetLoweringObjectFileImpl.h   |  3 ++-
 .../llvm/Target/TargetLoweringObjectFile.h        |  3 ++-
 llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp |  7 +++++--
 llvm/lib/MC/MCSectionXCOFF.cpp                    |  1 +
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp      |  7 ++++---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp         |  2 +-
 .../CodeGen/PowerPC/aix-lower-block-address.ll    | 15 +++++++++------
 .../PowerPC/aix-lower-constant-pool-index.ll      | 15 +++++++++------
 llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll | 15 +++++++++------
 .../CodeGen/PowerPC/lower-globaladdr32-aix-asm.ll | 11 +++++++----
 .../CodeGen/PowerPC/lower-globaladdr64-aix-asm.ll | 11 +++++++----
 11 files changed, 56 insertions(+), 34 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index a333d971bbcb9b..73668f393d20a9 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -251,7 +251,8 @@ class TargetLoweringObjectFileXCOFF : public TargetLoweringObjectFile {
   MCSection *
   getSectionForFunctionDescriptor(const Function *F,
                                   const TargetMachine &TM) const override;
-  MCSection *getSectionForTOCEntry(const MCSymbol *Sym) const override;
+  MCSection *getSectionForTOCEntry(const MCSymbol *Sym,
+                                   const TargetMachine &TM) const override;
 
   /// For external functions, this will always return a function descriptor
   /// csect.
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index 0cb888e7507816..e087f467091ebc 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -226,7 +226,8 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
   /// On targets that support TOC entries, return a section for the entry given
   /// the symbol it refers to.
   /// TODO: Implement this interface for existing ELF targets.
-  virtual MCSection *getSectionForTOCEntry(const MCSymbol *S) const {
+  virtual MCSection *getSectionForTOCEntry(const MCSymbol *S,
+                                           const TargetMachine &TM) const {
     return nullptr;
   }
 
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 317166096f1bfe..8a0aacc4cee2bf 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2202,8 +2202,11 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForFunctionDescriptor(
 }
 
 MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
-    const MCSymbol *Sym) const {
+    const MCSymbol *Sym, const TargetMachine &TM) const {
+  // Use TE storage-mapping class when large code model is enabled so that
+  // the chance of needing -bbigtoc is decreased.
   return getContext().getXCOFFSection(
-      cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), XCOFF::XMC_TC,
+      cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(),
+      TM.getCodeModel() == CodeModel::Large ? XCOFF::XMC_TE : XCOFF::XMC_TC,
       XCOFF::XTY_SD, XCOFF::C_HIDEXT, SectionKind::getData());
 }
diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp
index 1fa495239f7486..17b7b60a04ab89 100644
--- a/llvm/lib/MC/MCSectionXCOFF.cpp
+++ b/llvm/lib/MC/MCSectionXCOFF.cpp
@@ -45,6 +45,7 @@ void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
       printCsectDirective(OS);
       break;
     case XCOFF::XMC_TC:
+    case XCOFF::XMC_TE:
       break;
     case XCOFF::XMC_TC0:
       OS << "\t.toc\n";
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 23eb02f29ed3cb..93a162a92ac079 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -20,8 +20,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/BinaryFormat/ELF.h"
-#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
@@ -30,6 +30,7 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionXCOFF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
@@ -122,8 +123,8 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
   void emitTCEntry(const MCSymbol &S) override {
     if (const MCSymbolXCOFF *XSym = dyn_cast<MCSymbolXCOFF>(&S)) {
       MCSymbolXCOFF *TCSym =
-          cast<MCSymbolXCOFF>(Streamer.getContext().getOrCreateSymbol(
-              XSym->getSymbolTableName() + "[TC]"));
+          cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
+              ->getQualNameSymbol();
       OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n';
 
       if (TCSym->hasRename())
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index f7a0b607e61fc7..5358a8a36b33f9 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1826,7 +1826,7 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
   for (auto &I : TOC) {
     // Setup the csect for the current TC entry.
     MCSectionXCOFF *TCEntry = cast<MCSectionXCOFF>(
-        getObjFileLowering().getSectionForTOCEntry(I.first));
+        getObjFileLowering().getSectionForTOCEntry(I.first, TM));
     OutStreamer->SwitchSection(TCEntry);
 
     OutStreamer->emitLabel(I.second);
diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll b/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll
index 8d65fbe1495339..4db879aec0f138 100644
--- a/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll
@@ -15,16 +15,16 @@
 ; RUN: --check-prefix=64LARGE-MIR %s
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff \
-; RUN: -code-model=small < %s | FileCheck --check-prefixes=32SMALL-ASM,CHECK %s
+; RUN: -code-model=small < %s | FileCheck --check-prefixes=32SMALL-ASM,SMALL-ASM %s
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff \
-; RUN: -code-model=large < %s | FileCheck --check-prefixes=32LARGE-ASM,CHECK %s
+; RUN: -code-model=large < %s | FileCheck --check-prefixes=32LARGE-ASM,LARGE-ASM %s
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff \
-; RUN: -code-model=small < %s | FileCheck --check-prefixes=64SMALL-ASM,CHECK %s
+; RUN: -code-model=small < %s | FileCheck --check-prefixes=64SMALL-ASM,SMALL-ASM %s
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff \
-; RUN: -code-model=large < %s | FileCheck --check-prefixes=64LARGE-ASM,CHECK %s
+; RUN: -code-model=large < %s | FileCheck --check-prefixes=64LARGE-ASM,LARGE-ASM %s
 
 define void @foo() {
 entry:
@@ -68,5 +68,8 @@ __here:
 ; 64LARGE-ASM:         addis [[REG1:[0-9]+]], L..C0@u(2)
 ; 64LARGE-ASM:         ld [[REG2:[0-9]+]], L..C0@l([[REG1]])
 
-; CHECK: .toc
-; CHECK: .tc L..tmp0[TC],L..tmp0
+; SMALL-ASM: .toc
+; SMALL-ASM: .tc L..tmp0[TC],L..tmp0
+
+; LARGE-ASM: .toc
+; LARGE-ASM: .tc L..tmp0[TE],L..tmp0
diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll b/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll
index 1737097d368c9a..d125b89889ed82 100644
--- a/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll
@@ -15,16 +15,16 @@
 ; RUN: --check-prefix=64LARGE-MIR %s
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff \
-; RUN: -code-model=small < %s | FileCheck --check-prefixes=32SMALL-ASM,CHECK %s
+; RUN: -code-model=small < %s | FileCheck --check-prefixes=32SMALL-ASM,SMALL-ASM %s
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff \
-; RUN: -code-model=large < %s | FileCheck --check-prefixes=32LARGE-ASM,CHECK %s
+; RUN: -code-model=large < %s | FileCheck --check-prefixes=32LARGE-ASM,LARGE-ASM %s
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff \
-; RUN: -code-model=small < %s | FileCheck --check-prefixes=64SMALL-ASM,CHECK %s
+; RUN: -code-model=small < %s | FileCheck --check-prefixes=64SMALL-ASM,SMALL-ASM %s
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff \
-; RUN: -code-model=large < %s | FileCheck --check-prefixes=64LARGE-ASM,CHECK %s
+; RUN: -code-model=large < %s | FileCheck --check-prefixes=64LARGE-ASM,LARGE-ASM %s
 
 define float @test_float() {
 entry:
@@ -83,5 +83,8 @@ entry:
 ; 64LARGE-ASM:         lfs 1, 0([[REG2]])
 ; 64LARGE-ASM:         blr
 
-; CHECK: .toc
-; CHECK: .tc L..CPI0_0[TC],L..CPI0_0
+; SMALL-ASM: .toc
+; SMALL-ASM: .tc L..CPI0_0[TC],L..CPI0_0
+
+; LARGE-ASM: .toc
+; LARGE-ASM: .tc L..CPI0_0[TE],L..CPI0_0
diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll b/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll
index 1163d61d463c84..78ef175e114801 100644
--- a/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll
@@ -15,16 +15,16 @@
 ; RUN: --check-prefix=64LARGE-MIR %s
 
 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -code-model=small < %s | FileCheck \
-; RUN: --check-prefixes=32SMALL-ASM,CHECK %s
+; RUN: --check-prefixes=32SMALL-ASM,SMALL-ASM %s
 
 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -code-model=large < %s | FileCheck \
-; RUN: --check-prefixes=32LARGE-ASM,CHECK %s
+; RUN: --check-prefixes=32LARGE-ASM,LARGE-ASM %s
 
 ; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -code-model=small < %s | FileCheck \
-; RUN: --check-prefixes=64SMALL-ASM,CHECK %s
+; RUN: --check-prefixes=64SMALL-ASM,SMALL-ASM %s
 
 ; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -code-model=large < %s | FileCheck \
-; RUN: --check-prefixes=64LARGE-ASM,CHECK %s
+; RUN: --check-prefixes=64LARGE-ASM,LARGE-ASM %s
 
 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -function-sections < %s | FileCheck \
 ; RUN: --check-prefixes=FUNC-ASM,CHECK %s
@@ -206,5 +206,8 @@
 ; FUNC-ASM:         .vbyte  4, L..BB0_4-L..JTI0_0
 ; FUNC-ASM:         .vbyte  4, L..BB0_5-L..JTI0_0
 
-; CHECK: .toc
-; CHECK: .tc L..JTI0_0[TC],L..JTI0_0
+; SMALL-ASM: .toc
+; SMALL-ASM: .tc L..JTI0_0[TC],L..JTI0_0
+
+; LARGE-ASM: .toc
+; LARGE-ASM: .tc L..JTI0_0[TE],L..JTI0_0
diff --git a/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix-asm.ll b/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix-asm.ll
index 2ef2f9b5342d0f..04aa0d58fa800d 100644
--- a/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix-asm.ll
+++ b/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix-asm.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff \
-; RUN: -code-model=small < %s | FileCheck %s --check-prefixes=CHECK,SMALL
+; RUN: -code-model=small < %s | FileCheck %s --check-prefix=SMALL
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff \
-; RUN: -code-model=large < %s | FileCheck %s --check-prefixes=CHECK,LARGE
+; RUN: -code-model=large < %s | FileCheck %s --check-prefix=LARGE
 
 @a = common global i32 0
 
@@ -41,5 +41,8 @@ define void @test_store(i32 %0) {
 ; LARGE: stw [[REG3:[0-9]+]], 0([[REG2]])
 ; LARGE: blr
 
-; CHECK: .tc a[TC],a
-; CHECK: .tc b[TC],b
+; SMALL: .tc a[TC],a
+; SMALL: .tc b[TC],b
+
+; LARGE: .tc a[TE],a
+; LARGE: .tc b[TE],b
diff --git a/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix-asm.ll b/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix-asm.ll
index ea32c433dfa7de..14c24f039ff0f7 100644
--- a/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix-asm.ll
+++ b/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix-asm.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff \
-; RUN: -code-model=small < %s | FileCheck %s --check-prefixes=CHECK,SMALL
+; RUN: -code-model=small < %s | FileCheck %s --check-prefix=SMALL
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff \
-; RUN: -code-model=large < %s | FileCheck %s --check-prefixes=CHECK,LARGE
+; RUN: -code-model=large < %s | FileCheck %s --check-prefix=LARGE
 
 @a = common global i32 0
 
@@ -41,5 +41,8 @@ define void @test_store(i32 zeroext %0) {
 ; LARGE: stw [[REG3:[0-9]+]], 0([[REG2]])
 ; LARGE: blr
 
-; CHECK: .tc a[TC],a
-; CHECK: .tc b[TC],b
+; SMALL: .tc a[TC],a
+; SMALL: .tc b[TC],b
+
+; LARGE: .tc a[TE],a
+; LARGE: .tc b[TE],b

From a680ea2c720751a3d724fd8282515924acb3bd32 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Mon, 10 Aug 2020 16:03:03 -0400
Subject: [PATCH 2/9] Fix "last accessed time" test failing on Windows

Before this patch, the tests in llvm/test/tools/llvm-objcopy/ELF/strip-preserve-atime.test used to fail on my machine, because the "last accessed time" is disabled in the OS by default since Windows XP. One needs to explicitly enable it for the feature to work. Otherwise the last access time is the last write time. Please see: https://superuser.com/questions/251263/the-last-access-date-is-not-changed-even-after-reading-the-file-on-windows-7

    Differential Revision: https://reviews.llvm.org/D85669
---
 llvm/test/tools/llvm-objcopy/ELF/strip-preserve-atime.test | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/test/tools/llvm-objcopy/ELF/strip-preserve-atime.test b/llvm/test/tools/llvm-objcopy/ELF/strip-preserve-atime.test
index f23ece4d4455e5..b145599d9ff331 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/strip-preserve-atime.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/strip-preserve-atime.test
@@ -1,6 +1,7 @@
 # Note: ls -lu prints the accessed timestamp
 # NetBSD: noatime mounts currently inhibit 'touch -a' updates
-# UNSUPPORTED: system-netbsd
+# Windows: the last access time is disabled by default in the OS
+# UNSUPPORTED: system-netbsd, system-windows
 
 # Preserve dates when stripping to an output file.
 # RUN: yaml2obj %s -o %t.1.o

From 50dd75c8e0b81815c6da209d5a3dd7bf899d1262 Mon Sep 17 00:00:00 2001
From: David Tenty <daltenty@ibm.com>
Date: Mon, 10 Aug 2020 16:07:23 -0400
Subject: [PATCH 3/9] [AIX] Try to not use LLVM tools while building runtimes

Since 64-bit XCOFF and the big AR format is not yet supported in some of these tools, this patch avoids additional setup of these tools. This patch is not intended to prevent picking up the LLVM tools if they happen to be available otherwise.

Reviewed By: hubert.reinterpretcast

Differential Revision: https://reviews.llvm.org/D85329
---
 llvm/runtimes/CMakeLists.txt | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index dc74a46c70b556..8f5815e8705708 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -274,6 +274,11 @@ else() # if this is included from LLVM's CMake
       endif()
     endforeach()
 
+    # 64-bit XCOFF and big AR format is not yet supported in some of these tools.
+    if(NOT target MATCHES aix)
+      set(${target}_toolchain_tools lld llvm-ar llvm-lipo llvm-ranlib llvm-nm llvm-objcopy llvm-objdump llvm-strip)
+    endif()
+
     llvm_ExternalProject_Add(builtins-${target}
                              ${compiler_rt_path}/lib/builtins
                              DEPENDS ${ARG_DEPENDS}
@@ -287,7 +292,7 @@ else() # if this is included from LLVM's CMake
                                         -DCMAKE_ASM_COMPILER_WORKS=ON
                                         -DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON
                                         ${${target}_extra_args}
-                             TOOLCHAIN_TOOLS clang lld llvm-ar llvm-lipo llvm-ranlib llvm-nm llvm-objcopy llvm-objdump llvm-strip
+                             TOOLCHAIN_TOOLS clang ${target}_toolchain_tools
                              USE_TOOLCHAIN
                              ${EXTRA_ARGS})
   endfunction()
@@ -487,6 +492,11 @@ else() # if this is included from LLVM's CMake
       list(APPEND EXTRA_ARGS STRIP_TOOL ${CMAKE_CURRENT_BINARY_DIR}/llvm-strip-link)
     endif()
 
+    # 64-bit XCOFF and big AR format is not yet supported in some of these tools.
+    if(NOT target MATCHES aix)
+      set(${name}_toolchain_tools lld llvm-ar llvm-lipo llvm-ranlib llvm-nm llvm-objcopy llvm-objdump llvm-strip)
+    endif()
+
     llvm_ExternalProject_Add(runtimes-${name}
                              ${CMAKE_CURRENT_SOURCE_DIR}
                              DEPENDS ${${name}_deps} ${CXX_HEADER_TARGET}
@@ -505,7 +515,7 @@ else() # if this is included from LLVM's CMake
                                         -DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON
                                         -DLLVM_RUNTIMES_TARGET=${name}
                                         ${${name}_extra_args}
-                             TOOLCHAIN_TOOLS clang lld llvm-ar llvm-lipo llvm-ranlib llvm-nm llvm-objcopy llvm-objdump llvm-strip
+                             TOOLCHAIN_TOOLS clang ${{name}_toolchain_tools}
                              EXTRA_TARGETS ${${name}_extra_targets}
                                            ${${name}_test_targets}
                              USE_TOOLCHAIN

From 566a66703f020996d07191323ae8ad6f7ad827b3 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Mon, 10 Aug 2020 22:39:07 +0200
Subject: [PATCH 4/9] [InstSimplify] Add test for expand binop undef issue
 (NFC)

Add test case from https://reviews.llvm.org/D83360#2146539.
---
 llvm/test/Transforms/InstSimplify/select.ll | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/llvm/test/Transforms/InstSimplify/select.ll b/llvm/test/Transforms/InstSimplify/select.ll
index b1264138a15eab..49133ddcaa9b9a 100644
--- a/llvm/test/Transforms/InstSimplify/select.ll
+++ b/llvm/test/Transforms/InstSimplify/select.ll
@@ -919,3 +919,16 @@ define <2 x i32> @all_constant_true_undef_false_constexpr_vec() {
   %s = select i1 ptrtoint (<2 x i32> ()* @all_constant_true_undef_false_constexpr_vec to i1), <2 x i32> undef, <2 x i32><i32 -1, i32 ptrtoint (<2 x i32> ()* @all_constant_true_undef_false_constexpr_vec to i32)>
   ret <2 x i32> %s
 }
+
+define i1 @expand_binop_undef(i32 %x, i32 %y) {
+; CHECK-LABEL: @expand_binop_undef(
+; CHECK-NEXT:    [[CMP15:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP15]]
+;
+  %cmp9.not.1 = icmp eq i32 %x, %y
+  %cmp15 = icmp slt i32 %x, %y
+  %spec.select39 = select i1 %cmp9.not.1, i1 undef, i1 %cmp15
+  %spec.select40 = xor i1 %cmp9.not.1, 1
+  %spec.select  = and i1 %spec.select39, %spec.select40
+  ret i1 %spec.select
+}

From 2c48e3629cfb25ac1034117fa8945fd0d342f2ae Mon Sep 17 00:00:00 2001
From: Christian Sigg <csigg@google.com>
Date: Mon, 10 Aug 2020 10:13:57 +0200
Subject: [PATCH 5/9] [MLIR] Adding gpu.host_register op and lower it to a
 runtime call.

Reviewed By: herhut

Differential Revision: https://reviews.llvm.org/D85631
---
 .../StandardToLLVM/ConvertStandardToLLVM.h    |  4 ++
 mlir/include/mlir/Dialect/GPU/GPUOps.td       | 12 +++++
 .../ConvertLaunchFuncToRuntimeCalls.cpp       | 53 +++++++++++++++++--
 .../StandardToLLVM/StandardToLLVM.cpp         | 32 +++++------
 .../test/mlir-cuda-runner/all-reduce-and.mlir |  5 +-
 .../test/mlir-cuda-runner/all-reduce-max.mlir |  5 +-
 .../test/mlir-cuda-runner/all-reduce-min.mlir |  5 +-
 mlir/test/mlir-cuda-runner/all-reduce-op.mlir |  3 +-
 mlir/test/mlir-cuda-runner/all-reduce-or.mlir |  5 +-
 .../mlir-cuda-runner/all-reduce-region.mlir   |  3 +-
 .../test/mlir-cuda-runner/all-reduce-xor.mlir |  5 +-
 mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir  |  3 +-
 .../mlir-cuda-runner/multiple-all-reduce.mlir |  7 ++-
 mlir/test/mlir-cuda-runner/shuffle.mlir       |  7 ++-
 mlir/test/mlir-cuda-runner/two-modules.mlir   |  3 +-
 mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir  |  3 +-
 mlir/test/mlir-rocm-runner/two-modules.mlir   |  3 +-
 mlir/test/mlir-rocm-runner/vecadd.mlir        |  7 ++-
 .../mlir-rocm-runner/vector-transferops.mlir  |  5 +-
 .../cuda-runtime-wrappers.cpp                 | 33 +++++-------
 .../rocm-runtime-wrappers.cpp                 | 36 +++++--------
 21 files changed, 132 insertions(+), 107 deletions(-)

diff --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
index 25bff67d4a6e37..2f8f87fa0e41e4 100644
--- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
+++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
@@ -440,6 +440,10 @@ class ConvertToLLVMPattern : public ConversionPattern {
                                 ConversionPatternRewriter &rewriter,
                                 SmallVectorImpl<Value> &sizes) const;
 
+  /// Computes the size of type in bytes.
+  Value getSizeInBytes(Location loc, Type type,
+                       ConversionPatternRewriter &rewriter) const;
+
   /// Computes total size in bytes of to store the given shape.
   Value getCumulativeSizeInBytes(Location loc, Type elementType,
                                  ArrayRef<Value> shape,
diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index c0a6ac101d7ba7..288031c598ff4b 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -741,4 +741,16 @@ def GPU_ModuleEndOp : GPU_Op<"module_end", [
   let printer = [{ p << getOperationName(); }];
 }
 
+def GPU_HostRegisterOp : GPU_Op<"host_register">,
+    Arguments<(ins AnyUnrankedMemRef:$value)> {
+  let summary = "Registers a memref for access from device.";
+  let description = [{
+    This op registers the host memory pointed to by a memref to be accessed from
+    a device.
+  }];
+
+  let assemblyFormat = "$value attr-dict `:` type($value)";
+  let verifier = [{ return success(); }];
+}
+
 #endif // GPU_OPS
diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
index e14c9fbc671836..8aa843308cffa0 100644
--- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
+++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
@@ -117,6 +117,26 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern<OpTy> {
       "mgpuStreamSynchronize",
       llvmVoidType,
       {llvmPointerType /* void *stream */}};
+  FunctionCallBuilder hostRegisterCallBuilder = {
+      "mgpuMemHostRegisterMemRef",
+      llvmVoidType,
+      {llvmIntPtrType /* intptr_t rank */,
+       llvmPointerType /* void *memrefDesc */,
+       llvmIntPtrType /* intptr_t elementSizeBytes */}};
+};
+
+/// A rewrite patter to convert gpu.host_register operations into a GPU runtime
+/// call. Currently it supports CUDA and ROCm (HIP).
+class ConvertHostRegisterOpToGpuRuntimeCallPattern
+    : public ConvertOpToGpuRuntimeCallPattern<gpu::HostRegisterOp> {
+public:
+  ConvertHostRegisterOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter)
+      : ConvertOpToGpuRuntimeCallPattern<gpu::HostRegisterOp>(typeConverter) {}
+
+private:
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override;
 };
 
 /// A rewrite patter to convert gpu.launch_func operations into a sequence of
@@ -192,6 +212,33 @@ LLVM::CallOp FunctionCallBuilder::create(Location loc, OpBuilder &builder,
       builder.getSymbolRefAttr(function), arguments);
 }
 
+// Returns whether value is of LLVM type.
+static bool isLLVMType(Value value) {
+  return value.getType().isa<LLVM::LLVMType>();
+}
+
+LogicalResult ConvertHostRegisterOpToGpuRuntimeCallPattern::matchAndRewrite(
+    Operation *op, ArrayRef<Value> operands,
+    ConversionPatternRewriter &rewriter) const {
+  if (!llvm::all_of(operands, isLLVMType))
+    return rewriter.notifyMatchFailure(
+        op, "Cannot convert if operands aren't of LLVM type.");
+
+  Location loc = op->getLoc();
+
+  auto memRefType = cast<gpu::HostRegisterOp>(op).value().getType();
+  auto elementType = memRefType.cast<UnrankedMemRefType>().getElementType();
+  auto elementSize = getSizeInBytes(loc, elementType, rewriter);
+
+  auto arguments =
+      typeConverter.promoteOperands(loc, op->getOperands(), operands, rewriter);
+  arguments.push_back(elementSize);
+  hostRegisterCallBuilder.create(loc, rewriter, arguments);
+
+  rewriter.eraseOp(op);
+  return success();
+}
+
 // Creates a struct containing all kernel parameters on the stack and returns
 // an array of type-erased pointers to the fields of the struct. The array can
 // then be passed to the CUDA / ROCm (HIP) kernel launch calls.
@@ -269,11 +316,6 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateKernelNameConstant(
       LLVM::Linkage::Internal);
 }
 
-// Returns whether value is of LLVM type.
-static bool isLLVMType(Value value) {
-  return value.getType().isa<LLVM::LLVMType>();
-}
-
 // Emits LLVM IR to launch a kernel function. Expects the module that contains
 // the compiled kernel function as a cubin in the 'nvvm.cubin' attribute, or a
 // hsaco in the 'rocdl.hsaco' attribute of the kernel function in the IR.
@@ -351,6 +393,7 @@ mlir::createGpuToLLVMConversionPass(StringRef gpuBinaryAnnotation) {
 void mlir::populateGpuToLLVMConversionPatterns(
     LLVMTypeConverter &converter, OwningRewritePatternList &patterns,
     StringRef gpuBinaryAnnotation) {
+  patterns.insert<ConvertHostRegisterOpToGpuRuntimeCallPattern>(converter);
   patterns.insert<ConvertLaunchFuncOpToGpuRuntimeCallPattern>(
       converter, gpuBinaryAnnotation);
   patterns.insert<EraseGpuModuleOpPattern>(&converter.getContext());
diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
index 5f8a3874340083..08d1c32d13c5df 100644
--- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
+++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
@@ -927,30 +927,32 @@ void ConvertToLLVMPattern::getMemRefDescriptorSizes(
                         : createIndexConstant(rewriter, loc, s));
 }
 
-Value ConvertToLLVMPattern::getCumulativeSizeInBytes(
-    Location loc, Type elementType, ArrayRef<Value> sizes,
-    ConversionPatternRewriter &rewriter) const {
-  // Compute the total number of memref elements.
-  Value cumulativeSizeInBytes =
-      sizes.empty() ? createIndexConstant(rewriter, loc, 1) : sizes.front();
-  for (unsigned i = 1, e = sizes.size(); i < e; ++i)
-    cumulativeSizeInBytes = rewriter.create<LLVM::MulOp>(
-        loc, getIndexType(), ArrayRef<Value>{cumulativeSizeInBytes, sizes[i]});
-
+Value ConvertToLLVMPattern::getSizeInBytes(
+    Location loc, Type type, ConversionPatternRewriter &rewriter) const {
   // Compute the size of an individual element. This emits the MLIR equivalent
   // of the following sizeof(...) implementation in LLVM IR:
   //   %0 = getelementptr %elementType* null, %indexType 1
   //   %1 = ptrtoint %elementType* %0 to %indexType
   // which is a common pattern of getting the size of a type in bytes.
-  auto convertedPtrType = typeConverter.convertType(elementType)
-                              .cast<LLVM::LLVMType>()
-                              .getPointerTo();
+  auto convertedPtrType =
+      typeConverter.convertType(type).cast<LLVM::LLVMType>().getPointerTo();
   auto nullPtr = rewriter.create<LLVM::NullOp>(loc, convertedPtrType);
   auto gep = rewriter.create<LLVM::GEPOp>(
       loc, convertedPtrType,
       ArrayRef<Value>{nullPtr, createIndexConstant(rewriter, loc, 1)});
-  auto elementSize =
-      rewriter.create<LLVM::PtrToIntOp>(loc, getIndexType(), gep);
+  return rewriter.create<LLVM::PtrToIntOp>(loc, getIndexType(), gep);
+}
+
+Value ConvertToLLVMPattern::getCumulativeSizeInBytes(
+    Location loc, Type elementType, ArrayRef<Value> sizes,
+    ConversionPatternRewriter &rewriter) const {
+  // Compute the total number of memref elements.
+  Value cumulativeSizeInBytes =
+      sizes.empty() ? createIndexConstant(rewriter, loc, 1) : sizes.front();
+  for (unsigned i = 1, e = sizes.size(); i < e; ++i)
+    cumulativeSizeInBytes = rewriter.create<LLVM::MulOp>(
+        loc, getIndexType(), ArrayRef<Value>{cumulativeSizeInBytes, sizes[i]});
+  auto elementSize = this->getSizeInBytes(loc, elementType, rewriter);
   return rewriter.create<LLVM::MulOp>(
       loc, getIndexType(), ArrayRef<Value>{cumulativeSizeInBytes, elementSize});
 }
diff --git a/mlir/test/mlir-cuda-runner/all-reduce-and.mlir b/mlir/test/mlir-cuda-runner/all-reduce-and.mlir
index f89f9141572486..ef8d50580d9e12 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-and.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-and.mlir
@@ -25,9 +25,9 @@ func @main() {
   %c6 = constant 6 : index
 
   %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32>
-  call @mgpuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
+  gpu.host_register %cast_data : memref<*xi32>
   %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32>
-  call @mgpuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
+  gpu.host_register %cast_sum : memref<*xi32>
 
   store %cst0, %data[%c0, %c0] : memref<2x6xi32>
   store %cst1, %data[%c0, %c1] : memref<2x6xi32>
@@ -58,6 +58,5 @@ func @main() {
   return
 }
 
-func @mgpuMemHostRegisterInt32(%ptr : memref<*xi32>)
 func @print_memref_i32(memref<*xi32>)
 
diff --git a/mlir/test/mlir-cuda-runner/all-reduce-max.mlir b/mlir/test/mlir-cuda-runner/all-reduce-max.mlir
index 4adf8a73d924ca..be8087a55ea224 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-max.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-max.mlir
@@ -25,9 +25,9 @@ func @main() {
   %c6 = constant 6 : index
 
   %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32>
-  call @mgpuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
+  gpu.host_register %cast_data : memref<*xi32>
   %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32>
-  call @mgpuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
+  gpu.host_register %cast_sum : memref<*xi32>
 
   store %cst0, %data[%c0, %c0] : memref<2x6xi32>
   store %cst1, %data[%c0, %c1] : memref<2x6xi32>
@@ -58,6 +58,5 @@ func @main() {
   return
 }
 
-func @mgpuMemHostRegisterInt32(%ptr : memref<*xi32>)
 func @print_memref_i32(memref<*xi32>)
 
diff --git a/mlir/test/mlir-cuda-runner/all-reduce-min.mlir b/mlir/test/mlir-cuda-runner/all-reduce-min.mlir
index 8cb3116e9d0dd8..ad03ed5497f009 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-min.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-min.mlir
@@ -25,9 +25,9 @@ func @main() {
   %c6 = constant 6 : index
 
   %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32>
-  call @mgpuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
+  gpu.host_register %cast_data : memref<*xi32>
   %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32>
-  call @mgpuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
+  gpu.host_register %cast_sum : memref<*xi32>
 
   store %cst0, %data[%c0, %c0] : memref<2x6xi32>
   store %cst1, %data[%c0, %c1] : memref<2x6xi32>
@@ -58,6 +58,5 @@ func @main() {
   return
 }
 
-func @mgpuMemHostRegisterInt32(%ptr : memref<*xi32>)
 func @print_memref_i32(memref<*xi32>)
 
diff --git a/mlir/test/mlir-cuda-runner/all-reduce-op.mlir b/mlir/test/mlir-cuda-runner/all-reduce-op.mlir
index 72306674c3ff99..a639c699027bdd 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-op.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-op.mlir
@@ -11,7 +11,7 @@ func @main() {
   %sy = dim %dst, %c1 : memref<?x?x?xf32>
   %sz = dim %dst, %c0 : memref<?x?x?xf32>
   %cast_dst = memref_cast %dst : memref<?x?x?xf32> to memref<*xf32>
-  call @mgpuMemHostRegisterFloat(%cast_dst) : (memref<*xf32>) -> ()
+  gpu.host_register %cast_dst : memref<*xf32>
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
              threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %sy, %block_z = %sz) {
     %t0 = muli %tz, %block_y : index
@@ -28,5 +28,4 @@ func @main() {
   return
 }
 
-func @mgpuMemHostRegisterFloat(%ptr : memref<*xf32>)
 func @print_memref_f32(%ptr : memref<*xf32>)
diff --git a/mlir/test/mlir-cuda-runner/all-reduce-or.mlir b/mlir/test/mlir-cuda-runner/all-reduce-or.mlir
index 7d0ed929322e83..1ed85e61ded57a 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-or.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-or.mlir
@@ -25,9 +25,9 @@ func @main() {
   %c6 = constant 6 : index
 
   %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32>
-  call @mgpuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
+  gpu.host_register %cast_data : memref<*xi32>
   %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32>
-  call @mgpuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
+  gpu.host_register %cast_sum : memref<*xi32>
 
   store %cst0, %data[%c0, %c0] : memref<2x6xi32>
   store %cst1, %data[%c0, %c1] : memref<2x6xi32>
@@ -58,6 +58,5 @@ func @main() {
   return
 }
 
-func @mgpuMemHostRegisterInt32(%ptr : memref<*xi32>)
 func @print_memref_i32(memref<*xi32>)
 
diff --git a/mlir/test/mlir-cuda-runner/all-reduce-region.mlir b/mlir/test/mlir-cuda-runner/all-reduce-region.mlir
index a9426c65897875..49f24db131c7c7 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-region.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-region.mlir
@@ -8,7 +8,7 @@ func @main() {
   %c0 = constant 0 : index
   %sx = dim %dst, %c0 : memref<?xf32>
   %cast_dst = memref_cast %dst : memref<?xf32> to memref<*xf32>
-  call @mgpuMemHostRegisterFloat(%cast_dst) : (memref<*xf32>) -> ()
+  gpu.host_register %cast_dst : memref<*xf32>
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
              threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
     %val = index_cast %tx : index to i32
@@ -25,5 +25,4 @@ func @main() {
   return
 }
 
-func @mgpuMemHostRegisterFloat(%ptr : memref<*xf32>)
 func @print_memref_f32(memref<*xf32>)
diff --git a/mlir/test/mlir-cuda-runner/all-reduce-xor.mlir b/mlir/test/mlir-cuda-runner/all-reduce-xor.mlir
index 67461783b2570b..88cd9036998f69 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-xor.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-xor.mlir
@@ -25,9 +25,9 @@ func @main() {
   %c6 = constant 6 : index
 
   %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32>
-  call @mgpuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
+  gpu.host_register %cast_data : memref<*xi32>
   %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32>
-  call @mgpuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
+  gpu.host_register %cast_sum : memref<*xi32>
 
   store %cst0, %data[%c0, %c0] : memref<2x6xi32>
   store %cst1, %data[%c0, %c1] : memref<2x6xi32>
@@ -58,6 +58,5 @@ func @main() {
   return
 }
 
-func @mgpuMemHostRegisterInt32(%ptr : memref<*xi32>)
 func @print_memref_i32(memref<*xi32>)
 
diff --git a/mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir b/mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir
index 80339c36fb384f..fb458eb375d099 100644
--- a/mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir
+++ b/mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir
@@ -18,7 +18,7 @@ func @main() {
   %21 = constant 5 : i32
   %22 = memref_cast %arg0 : memref<5xf32> to memref<?xf32>
   %23 = memref_cast %22 : memref<?xf32> to memref<*xf32>
-  call @mgpuMemHostRegisterFloat(%23) : (memref<*xf32>) -> ()
+  gpu.host_register %23 : memref<*xf32>
   call @print_memref_f32(%23) : (memref<*xf32>) -> ()
   %24 = constant 1.0 : f32
   call @other_func(%24, %22) : (f32, memref<?xf32>) -> ()
@@ -26,5 +26,4 @@ func @main() {
   return
 }
 
-func @mgpuMemHostRegisterFloat(%ptr : memref<*xf32>)
 func @print_memref_f32(%ptr : memref<*xf32>)
diff --git a/mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir b/mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir
index b88d8e1b8ba1a9..f3fb3b219ce0c7 100644
--- a/mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir
+++ b/mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir
@@ -26,11 +26,11 @@ func @main() {
   %c6 = constant 6 : index
 
   %cast_data = memref_cast %data : memref<2x6xf32> to memref<*xf32>
-  call @mgpuMemHostRegisterFloat(%cast_data) : (memref<*xf32>) -> ()
+  gpu.host_register %cast_data : memref<*xf32>
   %cast_sum = memref_cast %sum : memref<2xf32> to memref<*xf32>
-  call @mgpuMemHostRegisterFloat(%cast_sum) : (memref<*xf32>) -> ()
+  gpu.host_register %cast_sum : memref<*xf32>
   %cast_mul = memref_cast %mul : memref<2xf32> to memref<*xf32>
-  call @mgpuMemHostRegisterFloat(%cast_mul) : (memref<*xf32>) -> ()
+  gpu.host_register %cast_mul : memref<*xf32>
 
   store %cst0, %data[%c0, %c0] : memref<2x6xf32>
   store %cst1, %data[%c0, %c1] : memref<2x6xf32>
@@ -66,5 +66,4 @@ func @main() {
   return
 }
 
-func @mgpuMemHostRegisterFloat(%ptr : memref<*xf32>)
 func @print_memref_f32(memref<*xf32>)
diff --git a/mlir/test/mlir-cuda-runner/shuffle.mlir b/mlir/test/mlir-cuda-runner/shuffle.mlir
index a4563cc0c381d5..9846455142d674 100644
--- a/mlir/test/mlir-cuda-runner/shuffle.mlir
+++ b/mlir/test/mlir-cuda-runner/shuffle.mlir
@@ -7,8 +7,8 @@ func @main() {
   %one = constant 1 : index
   %c0 = constant 0 : index
   %sx = dim %dst, %c0 : memref<?xf32>
-  %cast_dest = memref_cast %dst : memref<?xf32> to memref<*xf32>
-  call @mgpuMemHostRegisterFloat(%cast_dest) : (memref<*xf32>) -> ()
+  %cast_dst = memref_cast %dst : memref<?xf32> to memref<*xf32>
+  gpu.host_register %cast_dst : memref<*xf32>
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
              threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
     %t0 = index_cast %tx : index to i32
@@ -24,9 +24,8 @@ func @main() {
     store %value, %dst[%tx] : memref<?xf32>
     gpu.terminator
   }
-  call @print_memref_f32(%cast_dest) : (memref<*xf32>) -> ()
+  call @print_memref_f32(%cast_dst) : (memref<*xf32>) -> ()
   return
 }
 
-func @mgpuMemHostRegisterFloat(%ptr : memref<*xf32>)
 func @print_memref_f32(%ptr : memref<*xf32>)
diff --git a/mlir/test/mlir-cuda-runner/two-modules.mlir b/mlir/test/mlir-cuda-runner/two-modules.mlir
index 9bdda2ae9c667b..2ea58ae55b5e79 100644
--- a/mlir/test/mlir-cuda-runner/two-modules.mlir
+++ b/mlir/test/mlir-cuda-runner/two-modules.mlir
@@ -8,7 +8,7 @@ func @main() {
   %c0 = constant 0 : index
   %sx = dim %dst, %c0 : memref<?xi32>
   %cast_dst = memref_cast %dst : memref<?xi32> to memref<*xi32>
-  call @mgpuMemHostRegisterInt32(%cast_dst) : (memref<*xi32>) -> ()
+  gpu.host_register %cast_dst : memref<*xi32>
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
              threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
     %t0 = index_cast %tx : index to i32
@@ -25,5 +25,4 @@ func @main() {
   return
 }
 
-func @mgpuMemHostRegisterInt32(%memref : memref<*xi32>)
 func @print_memref_i32(%memref : memref<*xi32>)
diff --git a/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir b/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir
index 4b1137468b149c..68e31fede8dc37 100644
--- a/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir
+++ b/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir
@@ -18,7 +18,7 @@ func @main() {
   %21 = constant 5 : i32
   %22 = memref_cast %arg0 : memref<5xf32> to memref<?xf32>
   %cast = memref_cast %22 : memref<?xf32> to memref<*xf32>
-  call @mgpuMemHostRegisterFloat(%cast) : (memref<*xf32>) -> ()
+  gpu.host_register %cast : memref<*xf32>
   %23 = memref_cast %22 : memref<?xf32> to memref<*xf32>
   call @print_memref_f32(%23) : (memref<*xf32>) -> ()
   %24 = constant 1.0 : f32
@@ -28,6 +28,5 @@ func @main() {
   return
 }
 
-func @mgpuMemHostRegisterFloat(%ptr : memref<*xf32>)
 func @mgpuMemGetDeviceMemRef1dFloat(%ptr : memref<?xf32>) -> (memref<?xf32>)
 func @print_memref_f32(%ptr : memref<*xf32>)
diff --git a/mlir/test/mlir-rocm-runner/two-modules.mlir b/mlir/test/mlir-rocm-runner/two-modules.mlir
index d6b92229b58569..f196c8e8fefe57 100644
--- a/mlir/test/mlir-rocm-runner/two-modules.mlir
+++ b/mlir/test/mlir-rocm-runner/two-modules.mlir
@@ -8,7 +8,7 @@ func @main() {
   %c1 = constant 1 : index
   %sx = dim %dst, %c0 : memref<?xi32>
   %cast_dst = memref_cast %dst : memref<?xi32> to memref<*xi32>
-  call @mgpuMemHostRegisterInt32(%cast_dst) : (memref<*xi32>) -> ()
+  gpu.host_register %cast_dst : memref<*xi32>
   %dst_device = call @mgpuMemGetDeviceMemRef1dInt32(%dst) : (memref<?xi32>) -> (memref<?xi32>)
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
              threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %c1, %block_z = %c1) {
@@ -26,6 +26,5 @@ func @main() {
   return
 }
 
-func @mgpuMemHostRegisterInt32(%ptr : memref<*xi32>)
 func @mgpuMemGetDeviceMemRef1dInt32(%ptr : memref<?xi32>) -> (memref<?xi32>)
 func @print_memref_i32(%ptr : memref<*xi32>)
diff --git a/mlir/test/mlir-rocm-runner/vecadd.mlir b/mlir/test/mlir-rocm-runner/vecadd.mlir
index a86412ff8fef5f..df5c073f9b8114 100644
--- a/mlir/test/mlir-rocm-runner/vecadd.mlir
+++ b/mlir/test/mlir-rocm-runner/vecadd.mlir
@@ -26,9 +26,9 @@ func @main() {
   %6 = memref_cast %3 : memref<?xf32> to memref<*xf32>
   %7 = memref_cast %4 : memref<?xf32> to memref<*xf32>
   %8 = memref_cast %5 : memref<?xf32> to memref<*xf32>
-  call @mgpuMemHostRegisterFloat(%6) : (memref<*xf32>) -> ()
-  call @mgpuMemHostRegisterFloat(%7) : (memref<*xf32>) -> ()
-  call @mgpuMemHostRegisterFloat(%8) : (memref<*xf32>) -> ()
+  gpu.host_register %6 : memref<*xf32>
+  gpu.host_register %7 : memref<*xf32>
+  gpu.host_register %8 : memref<*xf32>
   %9 = call @mgpuMemGetDeviceMemRef1dFloat(%3) : (memref<?xf32>) -> (memref<?xf32>)
   %10 = call @mgpuMemGetDeviceMemRef1dFloat(%4) : (memref<?xf32>) -> (memref<?xf32>)
   %11 = call @mgpuMemGetDeviceMemRef1dFloat(%5) : (memref<?xf32>) -> (memref<?xf32>)
@@ -38,6 +38,5 @@ func @main() {
   return
 }
 
-func @mgpuMemHostRegisterFloat(%ptr : memref<*xf32>)
 func @mgpuMemGetDeviceMemRef1dFloat(%ptr : memref<?xf32>) -> (memref<?xf32>)
 func @print_memref_f32(%ptr : memref<*xf32>)
diff --git a/mlir/test/mlir-rocm-runner/vector-transferops.mlir b/mlir/test/mlir-rocm-runner/vector-transferops.mlir
index b028f91f839450..873897011464b0 100644
--- a/mlir/test/mlir-rocm-runner/vector-transferops.mlir
+++ b/mlir/test/mlir-rocm-runner/vector-transferops.mlir
@@ -55,8 +55,8 @@ func @main() {
   %cast0 = memref_cast %22 : memref<?xf32> to memref<*xf32>
   %cast1 = memref_cast %23 : memref<?xf32> to memref<*xf32>
 
-  call @mgpuMemHostRegisterFloat(%cast0) : (memref<*xf32>) -> ()
-  call @mgpuMemHostRegisterFloat(%cast1) : (memref<*xf32>) -> ()
+  gpu.host_register %cast0 : memref<*xf32>
+  gpu.host_register %cast1 : memref<*xf32>
 
   %24 = call @mgpuMemGetDeviceMemRef1dFloat(%22) : (memref<?xf32>) -> (memref<?xf32>)
   %26 = call @mgpuMemGetDeviceMemRef1dFloat(%23) : (memref<?xf32>) -> (memref<?xf32>)
@@ -71,6 +71,5 @@ func @main() {
   return
 }
 
-func @mgpuMemHostRegisterFloat(%ptr : memref<*xf32>)
 func @mgpuMemGetDeviceMemRef1dFloat(%ptr : memref<?xf32>) -> (memref<?xf32>)
 func @print_memref_f32(%ptr : memref<*xf32>)
diff --git a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
index 8e2dc029fa9f06..517fc9fc18f54a 100644
--- a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
+++ b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
@@ -75,17 +75,19 @@ extern "C" void mgpuMemHostRegister(void *ptr, uint64_t sizeBytes) {
   CUDA_REPORT_IF_ERROR(cuMemHostRegister(ptr, sizeBytes, /*flags=*/0));
 }
 
-// Allows to register a MemRef with the CUDA runtime. Initializes array with
-// value. Helpful until we have transfer functions implemented.
-template <typename T>
-void mgpuMemHostRegisterMemRef(const DynamicMemRefType<T> &memRef, T value) {
-  llvm::SmallVector<int64_t, 4> denseStrides(memRef.rank);
-  llvm::ArrayRef<int64_t> sizes(memRef.sizes, memRef.rank);
-  llvm::ArrayRef<int64_t> strides(memRef.strides, memRef.rank);
+// Allows to register a MemRef with the CUDA runtime. Helpful until we have
+// transfer functions implemented.
+extern "C" void
+mgpuMemHostRegisterMemRef(int64_t rank, StridedMemRefType<char, 1> *descriptor,
+                          int64_t elementSizeBytes) {
+
+  llvm::SmallVector<int64_t, 4> denseStrides(rank);
+  llvm::ArrayRef<int64_t> sizes(descriptor->sizes, rank);
+  llvm::ArrayRef<int64_t> strides(sizes.end(), rank);
 
   std::partial_sum(sizes.rbegin(), sizes.rend(), denseStrides.rbegin(),
                    std::multiplies<int64_t>());
-  auto count = denseStrides.front();
+  auto sizeBytes = denseStrides.front() * elementSizeBytes;
 
   // Only densely packed tensors are currently supported.
   std::rotate(denseStrides.begin(), denseStrides.begin() + 1,
@@ -93,17 +95,6 @@ void mgpuMemHostRegisterMemRef(const DynamicMemRefType<T> &memRef, T value) {
   denseStrides.back() = 1;
   assert(strides == llvm::makeArrayRef(denseStrides));
 
-  auto *pointer = memRef.data + memRef.offset;
-  std::fill_n(pointer, count, value);
-  mgpuMemHostRegister(pointer, count * sizeof(T));
-}
-
-extern "C" void mgpuMemHostRegisterFloat(int64_t rank, void *ptr) {
-  UnrankedMemRefType<float> memRef = {rank, ptr};
-  mgpuMemHostRegisterMemRef(DynamicMemRefType<float>(memRef), 1.23f);
-}
-
-extern "C" void mgpuMemHostRegisterInt32(int64_t rank, void *ptr) {
-  UnrankedMemRefType<int32_t> memRef = {rank, ptr};
-  mgpuMemHostRegisterMemRef(DynamicMemRefType<int32_t>(memRef), 123);
+  auto ptr = descriptor->data + descriptor->offset * elementSizeBytes;
+  mgpuMemHostRegister(ptr, sizeBytes);
 }
diff --git a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp b/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp
index a64815007661e6..9184c9fa20fa2b 100644
--- a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp
+++ b/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp
@@ -76,17 +76,19 @@ extern "C" void mgpuMemHostRegister(void *ptr, uint64_t sizeBytes) {
   HIP_REPORT_IF_ERROR(hipHostRegister(ptr, sizeBytes, /*flags=*/0));
 }
 
-// Allows to register a MemRef with the ROCM runtime. Initializes array with
-// value. Helpful until we have transfer functions implemented.
-template <typename T>
-void mgpuMemHostRegisterMemRef(T *pointer, llvm::ArrayRef<int64_t> sizes,
-                               llvm::ArrayRef<int64_t> strides, T value) {
-  assert(sizes.size() == strides.size());
-  llvm::SmallVector<int64_t, 4> denseStrides(strides.size());
+// Allows to register a MemRef with the ROCm runtime. Helpful until we have
+// transfer functions implemented.
+extern "C" void
+mgpuMemHostRegisterMemRef(int64_t rank, StridedMemRefType<char, 1> *descriptor,
+                          int64_t elementSizeBytes) {
+
+  llvm::SmallVector<int64_t, 4> denseStrides(rank);
+  llvm::ArrayRef<int64_t> sizes(descriptor->sizes, rank);
+  llvm::ArrayRef<int64_t> strides(sizes.end(), rank);
 
   std::partial_sum(sizes.rbegin(), sizes.rend(), denseStrides.rbegin(),
                    std::multiplies<int64_t>());
-  auto count = denseStrides.front();
+  auto sizeBytes = denseStrides.front() * elementSizeBytes;
 
   // Only densely packed tensors are currently supported.
   std::rotate(denseStrides.begin(), denseStrides.begin() + 1,
@@ -94,22 +96,8 @@ void mgpuMemHostRegisterMemRef(T *pointer, llvm::ArrayRef<int64_t> sizes,
   denseStrides.back() = 1;
   assert(strides == llvm::makeArrayRef(denseStrides));
 
-  std::fill_n(pointer, count, value);
-  mgpuMemHostRegister(pointer, count * sizeof(T));
-}
-
-extern "C" void mgpuMemHostRegisterFloat(int64_t rank, void *ptr) {
-  auto *desc = static_cast<StridedMemRefType<float, 1> *>(ptr);
-  auto sizes = llvm::ArrayRef<int64_t>(desc->sizes, rank);
-  auto strides = llvm::ArrayRef<int64_t>(desc->sizes + rank, rank);
-  mgpuMemHostRegisterMemRef(desc->data + desc->offset, sizes, strides, 1.23f);
-}
-
-extern "C" void mgpuMemHostRegisterInt32(int64_t rank, void *ptr) {
-  auto *desc = static_cast<StridedMemRefType<int32_t, 1> *>(ptr);
-  auto sizes = llvm::ArrayRef<int64_t>(desc->sizes, rank);
-  auto strides = llvm::ArrayRef<int64_t>(desc->sizes + rank, rank);
-  mgpuMemHostRegisterMemRef(desc->data + desc->offset, sizes, strides, 123);
+  auto ptr = descriptor->data + descriptor->offset * elementSizeBytes;
+  mgpuMemHostRegister(ptr, sizeBytes);
 }
 
 template <typename T>

From 62223ff1376b540dc9612239fdfb11b376d796d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= <david.bolvansky@gmail.com>
Date: Mon, 10 Aug 2020 23:04:20 +0200
Subject: [PATCH 6/9] [Diagnostics] Avoid false positives with
 -Wstring-concatenation

---
 clang/lib/Sema/SemaExpr.cpp     | 10 ++++++----
 clang/test/Sema/string-concat.c | 15 ++++++++++++++-
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 23b2fbd5cbbf2b..8f38238401fcf0 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6908,13 +6908,15 @@ Sema::ActOnInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList,
         << InitArgList[I]->getSourceRange();
     } else if (const auto *SL = dyn_cast<StringLiteral>(InitArgList[I])) {
       unsigned NumConcat = SL->getNumConcatenated();
-      const auto *SLNext =
-          dyn_cast<StringLiteral>(InitArgList[I + 1 < E ? I + 1 : 0]);
+      const auto *SLPrev =
+          dyn_cast<StringLiteral>(InitArgList[I == 0 ? E - 1 : I - 1]);
       // Diagnose missing comma in string array initialization.
       // Do not warn when all the elements in the initializer are concatenated
       // together. Do not warn for macros too.
-      if (NumConcat > 1 && E > 2 && !SL->getBeginLoc().isMacroID() && SLNext &&
-          NumConcat != SLNext->getNumConcatenated()) {
+      if (NumConcat > 1 && E > 2 && !SL->getBeginLoc().isMacroID() &&
+          SL->getString().find(" ") == llvm::StringRef::npos &&
+          isa<StringLiteral>(InitArgList[0]) && SLPrev &&
+          NumConcat != SLPrev->getNumConcatenated()) {
         SmallVector<FixItHint, 1> Hints;
         for (unsigned i = 0; i < NumConcat - 1; ++i)
           Hints.push_back(FixItHint::CreateInsertion(
diff --git a/clang/test/Sema/string-concat.c b/clang/test/Sema/string-concat.c
index 13e9656d2536f3..09de0a9bffbe28 100644
--- a/clang/test/Sema/string-concat.c
+++ b/clang/test/Sema/string-concat.c
@@ -108,7 +108,20 @@ const char *not_warn2[] = {
     "// Aaa\\\n"   " Bbb\\ \n"   " Ccc?" "?/\n",
     "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n",
     "// Aaa\\\r"   " Bbb\\ \r"   " Ccc?" "?/\r"
-  };
+};
+
+const char *not_warn3[] = {
+    "// \\param [in,out] aaa Bbb\n",
+    "// \\param[in,out] aaa Bbb\n",
+    "// \\param [in, out] aaa Bbb\n",
+    "// \\param [in,\n"
+    "//     out] aaa Bbb\n",
+    "// \\param [in,out]\n"
+    "//     aaa Bbb\n",
+    "// \\param [in,out] aaa\n"
+    "// Bbb\n"
+};
+
 
 // Do not warn when all the elements in the initializer are concatenated together.
 const char *all_elems_in_init_concatenated[] = {"a" "b" "c"};

From 9c8ae40860311e94de0a898101818f706228e958 Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra@google.com>
Date: Fri, 7 Aug 2020 17:18:38 -0700
Subject: [PATCH 7/9] [ARM] Speed up arm-cortex-cpus.c test

Trailing wildcard regex searches greedily continue searching through the whole
input and make the test unnecessarily slow.

Using equivalent plain text partial match speeds up the test execution time from
~35s to ~12s.

Differential Revision: https://reviews.llvm.org/D85575
---
 clang/test/Driver/arm-cortex-cpus.c | 52 ++++++++++++++---------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/clang/test/Driver/arm-cortex-cpus.c b/clang/test/Driver/arm-cortex-cpus.c
index 6de1040e942045..4481ba58fa64cf 100644
--- a/clang/test/Driver/arm-cortex-cpus.c
+++ b/clang/test/Driver/arm-cortex-cpus.c
@@ -510,19 +510,19 @@
 // ================== Check default Architecture on each ARM11 CPU
 // RUN: %clang -target arm-linux-gnueabi -mcpu=arm1136j-s -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6 %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=arm1136jf-s -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6 %s
-// CHECK-CPUV6: "-cc1"{{.*}} "-triple" "armv6-{{.*}}
+// CHECK-CPUV6: "-cc1"{{.*}} "-triple" "armv6-
 
 // RUN: %clang -target arm-linux-gnueabi -mcpu=mpcore -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6K %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=mpcorenovfp -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6K %s
-// CHECK-CPUV6K: "-cc1"{{.*}} "-triple" "armv6k-{{.*}}
+// CHECK-CPUV6K: "-cc1"{{.*}} "-triple" "armv6k-
 
 // RUN: %clang -target arm-linux-gnueabi -mcpu=arm1176jz-s -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6KZ %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=arm1176jzf-s -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6KZ %s
-// CHECK-CPUV6KZ: "-cc1"{{.*}} "-triple" "armv6kz-{{.*}}
+// CHECK-CPUV6KZ: "-cc1"{{.*}} "-triple" "armv6kz-
 
 // RUN: %clang -target arm-linux-gnueabi -mcpu=arm1156t2-s -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6T2 %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=arm1156t2f-s -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6T2 %s
-// CHECK-CPUV6T2: "-cc1"{{.*}} "-triple" "armv6t2-{{.*}}
+// CHECK-CPUV6T2: "-cc1"{{.*}} "-triple" "armv6t2-
 
 // ================== Check default Architecture on each Cortex CPU
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a5 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7A %s
@@ -539,7 +539,7 @@
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a12 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7A %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a15 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7A %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a17 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7A %s
-// CHECK-CPUV7A: "-cc1"{{.*}} "-triple" "armv7-{{.*}}
+// CHECK-CPUV7A: "-cc1"{{.*}} "-triple" "armv7-
 
 // RUN: %clang -target armeb-linux-gnueabi -mcpu=cortex-a5 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7A %s
 // RUN: %clang -target armeb-linux-gnueabi -mcpu=cortex-a7 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7A %s
@@ -555,7 +555,7 @@
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a12 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7A %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a15 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7A %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a17 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7A %s
-// CHECK-BE-CPUV7A: "-cc1"{{.*}} "-triple" "armebv7-{{.*}}
+// CHECK-BE-CPUV7A: "-cc1"{{.*}} "-triple" "armebv7-
 
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a5 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7A-THUMB %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a7 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7A-THUMB %s
@@ -571,7 +571,7 @@
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a12 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7A-THUMB %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a15 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7A-THUMB %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a17 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7A-THUMB %s
-// CHECK-CPUV7A-THUMB: "-cc1"{{.*}} "-triple" "thumbv7-{{.*}}
+// CHECK-CPUV7A-THUMB: "-cc1"{{.*}} "-triple" "thumbv7-
 
 // RUN: %clang -target armeb-linux-gnueabi -mcpu=cortex-a5 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7A-THUMB %s
 // RUN: %clang -target armeb-linux-gnueabi -mcpu=cortex-a7 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7A-THUMB %s
@@ -587,19 +587,19 @@
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a12 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7A-THUMB %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a15 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7A-THUMB %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-a17 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7A-THUMB %s
-// CHECK-BE-CPUV7A-THUMB: "-cc1"{{.*}} "-triple" "thumbebv7-{{.*}}
+// CHECK-BE-CPUV7A-THUMB: "-cc1"{{.*}} "-triple" "thumbebv7-
 
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-m0 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6M %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-m0plus -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6M %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-m1 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6M %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=sc000 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6M %s
-// CHECK-CPUV6M: "-cc1"{{.*}} "-triple" "thumbv6m-{{.*}}
+// CHECK-CPUV6M: "-cc1"{{.*}} "-triple" "thumbv6m-
 
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-m3 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7M %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-m3 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7M %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=sc300 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7M %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=sc300 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7M %s
-// CHECK-CPUV7M: "-cc1"{{.*}} "-triple" "thumbv7m-{{.*}}
+// CHECK-CPUV7M: "-cc1"{{.*}} "-triple" "thumbv7m-
 
 // RUN: %clang -target armeb-linux-gnueabi -mcpu=cortex-m3 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7M %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-m3 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7M %s
@@ -609,13 +609,13 @@
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-m7 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7EM %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-m4 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7EM %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-m7 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7EM %s
-// CHECK-CPUV7EM: "-cc1"{{.*}} "-triple" "thumbv7em-{{.*}}
+// CHECK-CPUV7EM: "-cc1"{{.*}} "-triple" "thumbv7em-
 
 // RUN: %clang -target armeb-linux-gnueabi -mcpu=cortex-m4 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7EM %s
 // RUN: %clang -target armeb-linux-gnueabi -mcpu=cortex-m7 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7EM %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-m4 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7EM %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-m7 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7EM %s
-// CHECK-BE-CPUV7EM: "-cc1"{{.*}} "-triple" "thumbebv7em-{{.*}}
+// CHECK-BE-CPUV7EM: "-cc1"{{.*}} "-triple" "thumbebv7em-
 
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r4 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7R %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r4f -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7R %s
@@ -627,7 +627,7 @@
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r5 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7R %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r7 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7R %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r8 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7R %s
-// CHECK-CPUV7R: "-cc1"{{.*}} "-triple" "armv7r-{{.*}}
+// CHECK-CPUV7R: "-cc1"{{.*}} "-triple" "armv7r-
 
 // RUN: %clang -target armeb-linux-gnueabi -mcpu=cortex-r4 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7R %s
 // RUN: %clang -target armeb-linux-gnueabi -mcpu=cortex-r4f -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7R %s
@@ -639,7 +639,7 @@
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r5 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7R %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r7 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7R %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r8 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7R %s
-// CHECK-BE-CPUV7R: "-cc1"{{.*}} "-triple" "armebv7r-{{.*}}
+// CHECK-BE-CPUV7R: "-cc1"{{.*}} "-triple" "armebv7r-
 
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r4 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7R-THUMB %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r4f -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7R-THUMB %s
@@ -651,7 +651,7 @@
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r5 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7R-THUMB %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r7 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7R-THUMB %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r8 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV7R-THUMB %s
-// CHECK-CPUV7R-THUMB: "-cc1"{{.*}} "-triple" "thumbv7r-{{.*}}
+// CHECK-CPUV7R-THUMB: "-cc1"{{.*}} "-triple" "thumbv7r-
 
 // RUN: %clang -target armeb-linux-gnueabi -mcpu=cortex-r4 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7R-THUMB %s
 // RUN: %clang -target armeb-linux-gnueabi -mcpu=cortex-r4f -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7R-THUMB %s
@@ -663,7 +663,7 @@
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r5 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7R-THUMB %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r7 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7R-THUMB %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r8 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7R-THUMB %s
-// CHECK-BE-CPUV7R-THUMB: "-cc1"{{.*}} "-triple" "thumbebv7r-{{.*}}
+// CHECK-BE-CPUV7R-THUMB: "-cc1"{{.*}} "-triple" "thumbebv7r-
 
 // RUN: %clang -target arm -mcpu=cortex-a32 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
@@ -680,7 +680,7 @@
 //
 // RUN: %clang -target arm -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // RUN: %clang -target arm -mcpu=exynos-m3 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
-// CHECK-CPUV8A: "-cc1"{{.*}} "-triple" "armv8-{{.*}}
+// CHECK-CPUV8A: "-cc1"{{.*}} "-triple" "armv8-
 
 // RUN: %clang -target arm -mcpu=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s
 // RUN: %clang -target arm -mcpu=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s
@@ -697,7 +697,7 @@
 // RUN: %clang -target arm -mcpu=exynos-m4 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s
 // RUN: %clang -target arm -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s
 // RUN: %clang -target arm -mcpu=exynos-m5 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s
-// CHECK-CPUV82A: "-cc1"{{.*}} "-triple" "armv8.2a-{{.*}}
+// CHECK-CPUV82A: "-cc1"{{.*}} "-triple" "armv8.2a-
 
 // RUN: %clang -target armeb -mcpu=cortex-a32 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target armeb -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
@@ -714,7 +714,7 @@
 //
 // RUN: %clang -target armeb -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target arm -mcpu=exynos-m3 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
-// CHECK-BE-CPUV8A: "-cc1"{{.*}} "-triple" "armebv8-{{.*}}
+// CHECK-BE-CPUV8A: "-cc1"{{.*}} "-triple" "armebv8-
 
 // RUN: %clang -target armeb -mcpu=cortex-a55 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A %s
 // RUN: %clang -target armeb -mcpu=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A %s
@@ -731,10 +731,10 @@
 // RUN: %clang -target arm -mcpu=exynos-m4 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A %s
 // RUN: %clang -target armeb -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A %s
 // RUN: %clang -target arm -mcpu=exynos-m5 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A %s
-// CHECK-BE-CPUV82A: "-cc1"{{.*}} "-triple" "armebv8.2a-{{.*}}
+// CHECK-BE-CPUV82A: "-cc1"{{.*}} "-triple" "armebv8.2a-
 
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r52 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8R %s
-// CHECK-CPUV8R: "-cc1"{{.*}} "-triple" "armv8r-{{.*}}
+// CHECK-CPUV8R: "-cc1"{{.*}} "-triple" "armv8r-
 
 // RUN: %clang -target arm -mcpu=cortex-a32 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a35 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
@@ -751,7 +751,7 @@
 //
 // RUN: %clang -target arm -mcpu=exynos-m3 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=exynos-m3 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
-// CHECK-CPUV8A-THUMB: "-cc1"{{.*}} "-triple" "thumbv8-{{.*}}
+// CHECK-CPUV8A-THUMB: "-cc1"{{.*}} "-triple" "thumbv8-
 
 // RUN: %clang -target arm -mcpu=cortex-a55 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a75 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A-THUMB %s
@@ -768,7 +768,7 @@
 // RUN: %clang -target arm -mcpu=exynos-m4 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A-THUMB %s
 // RUN: %clang -target arm -mcpu=exynos-m5 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A-THUMB %s
 // RUN: %clang -target arm -mcpu=exynos-m5 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A-THUMB %s
-// CHECK-CPUV82A-THUMB: "-cc1"{{.*}} "-triple" "thumbv8.2a-{{.*}}
+// CHECK-CPUV82A-THUMB: "-cc1"{{.*}} "-triple" "thumbv8.2a-
 
 // RUN: %clang -target armeb -mcpu=cortex-a32 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target armeb -mcpu=cortex-a35 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
@@ -785,7 +785,7 @@
 //
 // RUN: %clang -target armeb -mcpu=exynos-m3 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=exynos-m3 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
-// CHECK-BE-CPUV8A-THUMB: "-cc1"{{.*}} "-triple" "thumbebv8-{{.*}}
+// CHECK-BE-CPUV8A-THUMB: "-cc1"{{.*}} "-triple" "thumbebv8-
 
 // RUN: %clang -target armeb -mcpu=cortex-a55 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A-THUMB %s
 // RUN: %clang -target armeb -mcpu=cortex-a75 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A-THUMB %s
@@ -802,7 +802,7 @@
 // RUN: %clang -target arm -mcpu=exynos-m4 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A-THUMB %s
 // RUN: %clang -target armeb -mcpu=exynos-m5 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A-THUMB %s
 // RUN: %clang -target arm -mcpu=exynos-m5 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A-THUMB %s
-// CHECK-BE-CPUV82A-THUMB: "-cc1"{{.*}} "-triple" "thumbebv8.2a-{{.*}}
+// CHECK-BE-CPUV82A-THUMB: "-cc1"{{.*}} "-triple" "thumbebv8.2a-
 
 // RUN: %clang -target armv8a-arm-none-eabi -mcpu=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-A73 %s
 // RUN: %clang -target armv8a-arm-none-eabi -mcpu=cortex-a73 -mfpu=crypto-neon-fp-armv8 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-A73-MFPU %s
@@ -871,7 +871,7 @@
 // RUN: %clang -target arm-linux-gnueabi -mcpu=corteX-A12 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CASE-INSENSITIVE-CPUV7A %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=CorteX-a15 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CASE-INSENSITIVE-CPUV7A %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=CorteX-A17 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CASE-INSENSITIVE-CPUV7A %s
-// CHECK-CASE-INSENSITIVE-CPUV7A: "-cc1"{{.*}} "-triple" "armv7-{{.*}}
+// CHECK-CASE-INSENSITIVE-CPUV7A: "-cc1"{{.*}} "-triple" "armv7-
 
 // ================== Check whether -march accepts mixed-case values.
 // RUN: %clang -target arm -march=Armv5 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CASE-INSENSITIVE-V5 %s

From b2c9b631bb48087c988d798adc5465499b155a9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= <david.bolvansky@gmail.com>
Date: Mon, 10 Aug 2020 23:35:55 +0200
Subject: [PATCH 8/9] [Diagnostics] Move -Wstring-concatenation to -Wextra

---
 clang/include/clang/Basic/DiagnosticGroups.td   |  4 +++-
 .../include/clang/Basic/DiagnosticSemaKinds.td  |  2 +-
 clang/lib/Sema/SemaExpr.cpp                     |  1 -
 clang/test/Sema/string-concat.c                 | 17 ++---------------
 4 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 5ddd37e9972afe..2b13f9eca12d6f 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -547,6 +547,7 @@ def StaticLocalInInline : DiagGroup<"static-local-in-inline">;
 def GNUStaticFloatInit : DiagGroup<"gnu-static-float-init">;
 def StaticFloatInit : DiagGroup<"static-float-init", [GNUStaticFloatInit]>;
 def GNUStatementExpression : DiagGroup<"gnu-statement-expression">;
+def StringConcatation : DiagGroup<"string-concatenation">;
 def StringCompare : DiagGroup<"string-compare">;
 def StringPlusInt : DiagGroup<"string-plus-int">;
 def StringPlusChar : DiagGroup<"string-plus-char">;
@@ -880,7 +881,8 @@ def Extra : DiagGroup<"extra", [
     SignCompare,
     UnusedParameter,
     NullPointerArithmetic,
-    EmptyInitStatement
+    EmptyInitStatement,
+    StringConcatation
   ]>;
 
 def Most : DiagGroup<"most", [
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index d0bddd80b8fe56..f2e939da3050a6 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3005,7 +3005,7 @@ def warn_objc_string_literal_comparison : Warning<
 def warn_concatenated_literal_array_init : Warning<
   "suspicious concatenation of string literals in an array initialization; "
   "did you mean to separate the elements with a comma?">,
-  InGroup<DiagGroup<"string-concatenation">>;
+  InGroup<StringConcatation>, DefaultIgnore;
 def warn_concatenated_nsarray_literal : Warning<
   "concatenated NSString literal for an NSArray expression - "
   "possibly missing a comma">,
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 8f38238401fcf0..28ac1dfeb0823a 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6914,7 +6914,6 @@ Sema::ActOnInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList,
       // Do not warn when all the elements in the initializer are concatenated
       // together. Do not warn for macros too.
       if (NumConcat > 1 && E > 2 && !SL->getBeginLoc().isMacroID() &&
-          SL->getString().find(" ") == llvm::StringRef::npos &&
           isa<StringLiteral>(InitArgList[0]) && SLPrev &&
           NumConcat != SLPrev->getNumConcatenated()) {
         SmallVector<FixItHint, 1> Hints;
diff --git a/clang/test/Sema/string-concat.c b/clang/test/Sema/string-concat.c
index 09de0a9bffbe28..4e5ed4424e7c17 100644
--- a/clang/test/Sema/string-concat.c
+++ b/clang/test/Sema/string-concat.c
@@ -1,6 +1,6 @@
 
-// RUN: %clang_cc1 -x c -fsyntax-only -verify %s
-// RUN: %clang_cc1 -x c++ -fsyntax-only -verify %s
+// RUN: %clang_cc1 -x c -Wstring-concatenation -fsyntax-only -verify %s
+// RUN: %clang_cc1 -x c++ -Wstring-concatenation -fsyntax-only -verify %s
 
 const char *missing_comma[] = {
     "basic_filebuf",
@@ -110,19 +110,6 @@ const char *not_warn2[] = {
     "// Aaa\\\r"   " Bbb\\ \r"   " Ccc?" "?/\r"
 };
 
-const char *not_warn3[] = {
-    "// \\param [in,out] aaa Bbb\n",
-    "// \\param[in,out] aaa Bbb\n",
-    "// \\param [in, out] aaa Bbb\n",
-    "// \\param [in,\n"
-    "//     out] aaa Bbb\n",
-    "// \\param [in,out]\n"
-    "//     aaa Bbb\n",
-    "// \\param [in,out] aaa\n"
-    "// Bbb\n"
-};
-
-
 // Do not warn when all the elements in the initializer are concatenated together.
 const char *all_elems_in_init_concatenated[] = {"a" "b" "c"};
 

From aae349e2760e5e5986b34ca5ca9f2672377c41bf Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Mon, 10 Aug 2020 13:55:45 -0700
Subject: [PATCH 9/9] [InstSimplify][test] Remove unused parameter in vscale.ll

Reviewed By: huihuiz

Differential Revision: https://reviews.llvm.org/D85688
---
 llvm/test/Transforms/InstSimplify/vscale.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/InstSimplify/vscale.ll b/llvm/test/Transforms/InstSimplify/vscale.ll
index d396f02891969b..a96b943ec4efc4 100644
--- a/llvm/test/Transforms/InstSimplify/vscale.ll
+++ b/llvm/test/Transforms/InstSimplify/vscale.ll
@@ -86,7 +86,7 @@ define i32 @extractelement_idx_large_bound(<vscale x 4 x i32> %a) {
   ret i32 %r
 }
 
-define i32 @insert_extract_element_same_vec_idx_2(<vscale x 4 x i32> %a) {
+define i32 @insert_extract_element_same_vec_idx_2() {
 ; CHECK-LABEL: @insert_extract_element_same_vec_idx_2(
 ; CHECK-NEXT:    ret i32 1
 ;