From 57ffbe020af6469b7c2fdb599f2f7e5e5d0322f0 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Tue, 22 Dec 2020 11:00:57 -0500
Subject: [PATCH 01/10] glld/mac] Don't add names of unreferenced symbols to
 string table

Before this, a hello world program would contain many many unnecessary
entries in its string table.

No behavior change, just makes the string table in the output smaller
and more like ld64's.

Differential Revision: https://reviews.llvm.org/D93711
---
 lld/MachO/SyntheticSections.cpp | 22 +++++++++++-----------
 lld/test/MachO/symtab.s         |  6 ++++--
 2 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 8b2ebd36e1ae8..2ed1f2eb34fbe 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -694,6 +694,11 @@ void SymtabSection::emitStabs() {
 }
 
 void SymtabSection::finalizeContents() {
+  auto addSymbol = [&](std::vector<SymtabEntry> &symbols, Symbol *sym) {
+    uint32_t strx = stringTableSection.addString(sym->getName());
+    symbols.push_back({sym, strx});
+  };
+
   // Local symbols aren't in the SymbolTable, so we walk the list of object
   // files to gather them.
   for (InputFile *file : inputFiles) {
@@ -702,10 +707,8 @@ void SymtabSection::finalizeContents() {
         // TODO: when we implement -dead_strip, we should filter out symbols
         // that belong to dead sections.
         if (auto *defined = dyn_cast<Defined>(sym)) {
-          if (!defined->isExternal()) {
-            uint32_t strx = stringTableSection.addString(sym->getName());
-            localSymbols.push_back({sym, strx});
-          }
+          if (!defined->isExternal())
+            addSymbol(localSymbols, sym);
         }
       }
     }
@@ -713,19 +716,16 @@ void SymtabSection::finalizeContents() {
 
   // __dyld_private is a local symbol too. It's linker-created and doesn't
   // exist in any object file.
-  if (Defined* dyldPrivate = in.stubHelper->dyldPrivate) {
-    uint32_t strx = stringTableSection.addString(dyldPrivate->getName());
-    localSymbols.push_back({dyldPrivate, strx});
-  }
+  if (Defined* dyldPrivate = in.stubHelper->dyldPrivate)
+    addSymbol(localSymbols, dyldPrivate);
 
   for (Symbol *sym : symtab->getSymbols()) {
-    uint32_t strx = stringTableSection.addString(sym->getName());
     if (auto *defined = dyn_cast<Defined>(sym)) {
       assert(defined->isExternal());
-      externalSymbols.push_back({sym, strx});
+      addSymbol(externalSymbols, sym);
     } else if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
       if (dysym->isReferenced())
-        undefinedSymbols.push_back({sym, strx});
+        addSymbol(undefinedSymbols, sym);
     }
   }
 
diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s
index d18986c9d91c0..fa784a34e16a0 100644
--- a/lld/test/MachO/symtab.s
+++ b/lld/test/MachO/symtab.s
@@ -86,17 +86,19 @@
 # CHECK-NEXT:   iundefsym: 5
 # CHECK-NEXT:   nundefsym: 2
 
-## Verify that the first entry in the StringTable is a space.
+## Verify that the first entry in the StringTable is a space, and that
+## unreferenced symbols aren't emitted.
 # RUN: obj2yaml %t/test | FileCheck %s --check-prefix=YAML
 # YAML:      StringTable:
 # YAML-NEXT: ' '
+# YAML-NOT: _unreferenced
 
 #--- libfoo.s
 .globl _dynamic
 _dynamic:
 
 #--- test.s
-.globl _main, _external, _external_weak
+.globl _main, _external, _external_weak, _unreferenced
 
 .data
 _external:

From 0d15d4b6f43a3355c1d618766c8e550cfe1481d0 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 22 Dec 2020 14:13:39 -0500
Subject: [PATCH 02/10] [SLP] use operand index abstraction for number of
 operands

I think this is NFC currently, but the bug would be exposed
when we allow binary intrinsics (maxnum, etc) as candidates
for reductions.

The code in matchAssociativeReduction() is using
OperationData::getNumberOfOperands() when comparing whether
the "EdgeToVisit" iterator is in-bounds, so this code must
use the same (potentially offset) operand value to set
the "EdgeToVisit".
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b03fb203c6d75..baa8ce2638a0d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6772,7 +6772,8 @@ class HorizontalReduction {
       // in this case.
       // Do not perform analysis of remaining operands of ParentStackElem.first
       // instruction, this whole instruction is an extra argument.
-      ParentStackElem.second = ParentStackElem.first->getNumOperands();
+      OperationData OpData = getOperationData(ParentStackElem.first);
+      ParentStackElem.second = OpData.getNumberOfOperands();
     } else {
       // We ran into something like:
       // ParentStackElem.first += ... + ExtraArg + ...

From f6929c01952b3f144df620544ed937e801b9c945 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 22 Dec 2020 14:54:09 -0500
Subject: [PATCH 03/10] [SLP] add reduction tests for maxnum/minnum intrinsics;
 NFC

---
 .../Transforms/SLPVectorizer/X86/fmaxnum.ll   | 147 ++++++++++++++++++
 .../Transforms/SLPVectorizer/X86/fminnum.ll   | 147 ++++++++++++++++++
 2 files changed, 294 insertions(+)

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
index e03f3f808a4ff..23f2196b2425b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
@@ -338,4 +338,151 @@ define void @fmaxnum_16f32() #0 {
   ret void
 }
 
+define float @reduction_v4f32_fast(float* %p) {
+; CHECK-LABEL: @reduction_v4f32_fast(
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; CHECK-NEXT:    [[T0:%.*]] = load float, float* [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load float, float* [[G1]], align 4
+; CHECK-NEXT:    [[T2:%.*]] = load float, float* [[G2]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load float, float* [[G3]], align 4
+; CHECK-NEXT:    [[M1:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T1]], float [[T0]])
+; CHECK-NEXT:    [[M2:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T2]], float [[M1]])
+; CHECK-NEXT:    [[M3:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T3]], float [[M2]])
+; CHECK-NEXT:    ret float [[M3]]
+;
+  %g1 = getelementptr inbounds float, float* %p, i64 1
+  %g2 = getelementptr inbounds float, float* %p, i64 2
+  %g3 = getelementptr inbounds float, float* %p, i64 3
+  %t0 = load float, float* %p, align 4
+  %t1 = load float, float* %g1, align 4
+  %t2 = load float, float* %g2, align 4
+  %t3 = load float, float* %g3, align 4
+  %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0)
+  %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1)
+  %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2)
+  ret float %m3
+}
+
+define float @reduction_v4f32_nnan(float* %p) {
+; CHECK-LABEL: @reduction_v4f32_nnan(
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; CHECK-NEXT:    [[T0:%.*]] = load float, float* [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load float, float* [[G1]], align 4
+; CHECK-NEXT:    [[T2:%.*]] = load float, float* [[G2]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load float, float* [[G3]], align 4
+; CHECK-NEXT:    [[M1:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T1]], float [[T0]])
+; CHECK-NEXT:    [[M2:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T2]], float [[M1]])
+; CHECK-NEXT:    [[M3:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T3]], float [[M2]])
+; CHECK-NEXT:    ret float [[M3]]
+;
+  %g1 = getelementptr inbounds float, float* %p, i64 1
+  %g2 = getelementptr inbounds float, float* %p, i64 2
+  %g3 = getelementptr inbounds float, float* %p, i64 3
+  %t0 = load float, float* %p, align 4
+  %t1 = load float, float* %g1, align 4
+  %t2 = load float, float* %g2, align 4
+  %t3 = load float, float* %g3, align 4
+  %m1 = tail call nnan float @llvm.maxnum.f32(float %t1, float %t0)
+  %m2 = tail call nnan float @llvm.maxnum.f32(float %t2, float %m1)
+  %m3 = tail call nnan float @llvm.maxnum.f32(float %t3, float %m2)
+  ret float %m3
+}
+
+define float @reduction_v8f32_fast(float* %p) {
+; CHECK-LABEL: @reduction_v8f32_fast(
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; CHECK-NEXT:    [[G4:%.*]] = getelementptr inbounds float, float* [[P]], i64 4
+; CHECK-NEXT:    [[G5:%.*]] = getelementptr inbounds float, float* [[P]], i64 5
+; CHECK-NEXT:    [[G6:%.*]] = getelementptr inbounds float, float* [[P]], i64 6
+; CHECK-NEXT:    [[G7:%.*]] = getelementptr inbounds float, float* [[P]], i64 7
+; CHECK-NEXT:    [[T0:%.*]] = load float, float* [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load float, float* [[G1]], align 4
+; CHECK-NEXT:    [[T2:%.*]] = load float, float* [[G2]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load float, float* [[G3]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = load float, float* [[G4]], align 4
+; CHECK-NEXT:    [[T5:%.*]] = load float, float* [[G5]], align 4
+; CHECK-NEXT:    [[T6:%.*]] = load float, float* [[G6]], align 4
+; CHECK-NEXT:    [[T7:%.*]] = load float, float* [[G7]], align 4
+; CHECK-NEXT:    [[M1:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T1]], float [[T0]])
+; CHECK-NEXT:    [[M2:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T2]], float [[M1]])
+; CHECK-NEXT:    [[M3:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T3]], float [[M2]])
+; CHECK-NEXT:    [[M4:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T4]], float [[M3]])
+; CHECK-NEXT:    [[M5:%.*]] = tail call fast float @llvm.maxnum.f32(float [[M4]], float [[T6]])
+; CHECK-NEXT:    [[M6:%.*]] = tail call fast float @llvm.maxnum.f32(float [[M5]], float [[T5]])
+; CHECK-NEXT:    [[M7:%.*]] = tail call fast float @llvm.maxnum.f32(float [[M6]], float [[T7]])
+; CHECK-NEXT:    ret float [[M7]]
+;
+  %g1 = getelementptr inbounds float, float* %p, i64 1
+  %g2 = getelementptr inbounds float, float* %p, i64 2
+  %g3 = getelementptr inbounds float, float* %p, i64 3
+  %g4 = getelementptr inbounds float, float* %p, i64 4
+  %g5 = getelementptr inbounds float, float* %p, i64 5
+  %g6 = getelementptr inbounds float, float* %p, i64 6
+  %g7 = getelementptr inbounds float, float* %p, i64 7
+  %t0 = load float, float* %p, align 4
+  %t1 = load float, float* %g1, align 4
+  %t2 = load float, float* %g2, align 4
+  %t3 = load float, float* %g3, align 4
+  %t4 = load float, float* %g4, align 4
+  %t5 = load float, float* %g5, align 4
+  %t6 = load float, float* %g6, align 4
+  %t7 = load float, float* %g7, align 4
+  %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0)
+  %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1)
+  %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2)
+  %m4 = tail call fast float @llvm.maxnum.f32(float %t4, float %m3)
+  %m5 = tail call fast float @llvm.maxnum.f32(float %m4, float %t6)
+  %m6 = tail call fast float @llvm.maxnum.f32(float %m5, float %t5)
+  %m7 = tail call fast float @llvm.maxnum.f32(float %m6, float %t7)
+  ret float %m7
+}
+
+define double @reduction_v2f64_fast(double* %p) {
+; CHECK-LABEL: @reduction_v2f64_fast(
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[T0:%.*]] = load double, double* [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load double, double* [[G1]], align 4
+; CHECK-NEXT:    [[M1:%.*]] = tail call fast double @llvm.maxnum.f64(double [[T1]], double [[T0]])
+; CHECK-NEXT:    ret double [[M1]]
+;
+  %g1 = getelementptr inbounds double, double* %p, i64 1
+  %t0 = load double, double* %p, align 4
+  %t1 = load double, double* %g1, align 4
+  %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0)
+  ret double %m1
+}
+
+define double @reduction_v4f64_fast(double* %p) {
+; CHECK-LABEL: @reduction_v4f64_fast(
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3
+; CHECK-NEXT:    [[T0:%.*]] = load double, double* [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load double, double* [[G1]], align 4
+; CHECK-NEXT:    [[T2:%.*]] = load double, double* [[G2]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load double, double* [[G3]], align 4
+; CHECK-NEXT:    [[M1:%.*]] = tail call fast double @llvm.maxnum.f64(double [[T1]], double [[T0]])
+; CHECK-NEXT:    [[M2:%.*]] = tail call fast double @llvm.maxnum.f64(double [[T2]], double [[M1]])
+; CHECK-NEXT:    [[M3:%.*]] = tail call fast double @llvm.maxnum.f64(double [[T3]], double [[M2]])
+; CHECK-NEXT:    ret double [[M3]]
+;
+  %g1 = getelementptr inbounds double, double* %p, i64 1
+  %g2 = getelementptr inbounds double, double* %p, i64 2
+  %g3 = getelementptr inbounds double, double* %p, i64 3
+  %t0 = load double, double* %p, align 4
+  %t1 = load double, double* %g1, align 4
+  %t2 = load double, double* %g2, align 4
+  %t3 = load double, double* %g3, align 4
+  %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0)
+  %m2 = tail call fast double @llvm.maxnum.f64(double %t2, double %m1)
+  %m3 = tail call fast double @llvm.maxnum.f64(double %t3, double %m2)
+  ret double %m3
+}
+
 attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll
index b830d826c2e96..81bcfb2f1e9b7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll
@@ -338,4 +338,151 @@ define void @fminnum_16f32() #0 {
   ret void
 }
 
+define float @reduction_v4f32_fast(float* %p) {
+; CHECK-LABEL: @reduction_v4f32_fast(
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; CHECK-NEXT:    [[T0:%.*]] = load float, float* [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load float, float* [[G1]], align 4
+; CHECK-NEXT:    [[T2:%.*]] = load float, float* [[G2]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load float, float* [[G3]], align 4
+; CHECK-NEXT:    [[M1:%.*]] = tail call fast float @llvm.minnum.f32(float [[T1]], float [[T0]])
+; CHECK-NEXT:    [[M2:%.*]] = tail call fast float @llvm.minnum.f32(float [[T2]], float [[M1]])
+; CHECK-NEXT:    [[M3:%.*]] = tail call fast float @llvm.minnum.f32(float [[T3]], float [[M2]])
+; CHECK-NEXT:    ret float [[M3]]
+;
+  %g1 = getelementptr inbounds float, float* %p, i64 1
+  %g2 = getelementptr inbounds float, float* %p, i64 2
+  %g3 = getelementptr inbounds float, float* %p, i64 3
+  %t0 = load float, float* %p, align 4
+  %t1 = load float, float* %g1, align 4
+  %t2 = load float, float* %g2, align 4
+  %t3 = load float, float* %g3, align 4
+  %m1 = tail call fast float @llvm.minnum.f32(float %t1, float %t0)
+  %m2 = tail call fast float @llvm.minnum.f32(float %t2, float %m1)
+  %m3 = tail call fast float @llvm.minnum.f32(float %t3, float %m2)
+  ret float %m3
+}
+
+define float @reduction_v4f32_nnan(float* %p) {
+; CHECK-LABEL: @reduction_v4f32_nnan(
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; CHECK-NEXT:    [[T0:%.*]] = load float, float* [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load float, float* [[G1]], align 4
+; CHECK-NEXT:    [[T2:%.*]] = load float, float* [[G2]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load float, float* [[G3]], align 4
+; CHECK-NEXT:    [[M1:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T1]], float [[T0]])
+; CHECK-NEXT:    [[M2:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T2]], float [[M1]])
+; CHECK-NEXT:    [[M3:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T3]], float [[M2]])
+; CHECK-NEXT:    ret float [[M3]]
+;
+  %g1 = getelementptr inbounds float, float* %p, i64 1
+  %g2 = getelementptr inbounds float, float* %p, i64 2
+  %g3 = getelementptr inbounds float, float* %p, i64 3
+  %t0 = load float, float* %p, align 4
+  %t1 = load float, float* %g1, align 4
+  %t2 = load float, float* %g2, align 4
+  %t3 = load float, float* %g3, align 4
+  %m1 = tail call nnan float @llvm.minnum.f32(float %t1, float %t0)
+  %m2 = tail call nnan float @llvm.minnum.f32(float %t2, float %m1)
+  %m3 = tail call nnan float @llvm.minnum.f32(float %t3, float %m2)
+  ret float %m3
+}
+
+define float @reduction_v8f32_fast(float* %p) {
+; CHECK-LABEL: @reduction_v8f32_fast(
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; CHECK-NEXT:    [[G4:%.*]] = getelementptr inbounds float, float* [[P]], i64 4
+; CHECK-NEXT:    [[G5:%.*]] = getelementptr inbounds float, float* [[P]], i64 5
+; CHECK-NEXT:    [[G6:%.*]] = getelementptr inbounds float, float* [[P]], i64 6
+; CHECK-NEXT:    [[G7:%.*]] = getelementptr inbounds float, float* [[P]], i64 7
+; CHECK-NEXT:    [[T0:%.*]] = load float, float* [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load float, float* [[G1]], align 4
+; CHECK-NEXT:    [[T2:%.*]] = load float, float* [[G2]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load float, float* [[G3]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = load float, float* [[G4]], align 4
+; CHECK-NEXT:    [[T5:%.*]] = load float, float* [[G5]], align 4
+; CHECK-NEXT:    [[T6:%.*]] = load float, float* [[G6]], align 4
+; CHECK-NEXT:    [[T7:%.*]] = load float, float* [[G7]], align 4
+; CHECK-NEXT:    [[M1:%.*]] = tail call fast float @llvm.minnum.f32(float [[T1]], float [[T0]])
+; CHECK-NEXT:    [[M2:%.*]] = tail call fast float @llvm.minnum.f32(float [[T2]], float [[M1]])
+; CHECK-NEXT:    [[M3:%.*]] = tail call fast float @llvm.minnum.f32(float [[T3]], float [[M2]])
+; CHECK-NEXT:    [[M4:%.*]] = tail call fast float @llvm.minnum.f32(float [[T4]], float [[M3]])
+; CHECK-NEXT:    [[M5:%.*]] = tail call fast float @llvm.minnum.f32(float [[M4]], float [[T6]])
+; CHECK-NEXT:    [[M6:%.*]] = tail call fast float @llvm.minnum.f32(float [[M5]], float [[T5]])
+; CHECK-NEXT:    [[M7:%.*]] = tail call fast float @llvm.minnum.f32(float [[M6]], float [[T7]])
+; CHECK-NEXT:    ret float [[M7]]
+;
+  %g1 = getelementptr inbounds float, float* %p, i64 1
+  %g2 = getelementptr inbounds float, float* %p, i64 2
+  %g3 = getelementptr inbounds float, float* %p, i64 3
+  %g4 = getelementptr inbounds float, float* %p, i64 4
+  %g5 = getelementptr inbounds float, float* %p, i64 5
+  %g6 = getelementptr inbounds float, float* %p, i64 6
+  %g7 = getelementptr inbounds float, float* %p, i64 7
+  %t0 = load float, float* %p, align 4
+  %t1 = load float, float* %g1, align 4
+  %t2 = load float, float* %g2, align 4
+  %t3 = load float, float* %g3, align 4
+  %t4 = load float, float* %g4, align 4
+  %t5 = load float, float* %g5, align 4
+  %t6 = load float, float* %g6, align 4
+  %t7 = load float, float* %g7, align 4
+  %m1 = tail call fast float @llvm.minnum.f32(float %t1, float %t0)
+  %m2 = tail call fast float @llvm.minnum.f32(float %t2, float %m1)
+  %m3 = tail call fast float @llvm.minnum.f32(float %t3, float %m2)
+  %m4 = tail call fast float @llvm.minnum.f32(float %t4, float %m3)
+  %m5 = tail call fast float @llvm.minnum.f32(float %m4, float %t6)
+  %m6 = tail call fast float @llvm.minnum.f32(float %m5, float %t5)
+  %m7 = tail call fast float @llvm.minnum.f32(float %m6, float %t7)
+  ret float %m7
+}
+
+define double @reduction_v2f64_fast(double* %p) {
+; CHECK-LABEL: @reduction_v2f64_fast(
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[T0:%.*]] = load double, double* [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load double, double* [[G1]], align 4
+; CHECK-NEXT:    [[M1:%.*]] = tail call fast double @llvm.minnum.f64(double [[T1]], double [[T0]])
+; CHECK-NEXT:    ret double [[M1]]
+;
+  %g1 = getelementptr inbounds double, double* %p, i64 1
+  %t0 = load double, double* %p, align 4
+  %t1 = load double, double* %g1, align 4
+  %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0)
+  ret double %m1
+}
+
+define double @reduction_v4f64_fast(double* %p) {
+; CHECK-LABEL: @reduction_v4f64_fast(
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3
+; CHECK-NEXT:    [[T0:%.*]] = load double, double* [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load double, double* [[G1]], align 4
+; CHECK-NEXT:    [[T2:%.*]] = load double, double* [[G2]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load double, double* [[G3]], align 4
+; CHECK-NEXT:    [[M1:%.*]] = tail call fast double @llvm.minnum.f64(double [[T1]], double [[T0]])
+; CHECK-NEXT:    [[M2:%.*]] = tail call fast double @llvm.minnum.f64(double [[T2]], double [[M1]])
+; CHECK-NEXT:    [[M3:%.*]] = tail call fast double @llvm.minnum.f64(double [[T3]], double [[M2]])
+; CHECK-NEXT:    ret double [[M3]]
+;
+  %g1 = getelementptr inbounds double, double* %p, i64 1
+  %g2 = getelementptr inbounds double, double* %p, i64 2
+  %g3 = getelementptr inbounds double, double* %p, i64 3
+  %t0 = load double, double* %p, align 4
+  %t1 = load double, double* %g1, align 4
+  %t2 = load double, double* %g2, align 4
+  %t3 = load double, double* %g3, align 4
+  %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0)
+  %m2 = tail call fast double @llvm.minnum.f64(double %t2, double %m1)
+  %m3 = tail call fast double @llvm.minnum.f64(double %t3, double %m2)
+  ret double %m3
+}
+
 attributes #0 = { nounwind }

From 3dbe471a260392ec63dda8deb2709160afc56dde Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 22 Dec 2020 21:36:41 +0100
Subject: [PATCH 04/10] [clangd] Use atomics instead of locks to track periodic
 memory trimming

Instead of always locking/unlocking a contended mutex, we now do one atomic read
in the common case, and one read + one exchange if the timer has expried.

Also use this for memory profiling which has similar/compatible requirements.

Differential Revision: https://reviews.llvm.org/D93726
---
 clang-tools-extra/clangd/ClangdLSPServer.cpp  | 37 ++++---------------
 clang-tools-extra/clangd/ClangdLSPServer.h    | 11 ++----
 .../clangd/support/Threading.cpp              | 12 ++++++
 clang-tools-extra/clangd/support/Threading.h  | 29 +++++++++++++++
 .../unittests/support/ThreadingTests.cpp      | 21 +++++++++++
 5 files changed, 73 insertions(+), 37 deletions(-)

diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp
index 0c42f95fb5947..c606ccae4fdc0 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -1285,13 +1285,7 @@ void ClangdLSPServer::publishDiagnostics(
 }
 
 void ClangdLSPServer::maybeExportMemoryProfile() {
-  if (!trace::enabled())
-    return;
-  // Profiling might be expensive, so we throttle it to happen once every 5
-  // minutes.
-  static constexpr auto ProfileInterval = std::chrono::minutes(5);
-  auto Now = std::chrono::steady_clock::now();
-  if (Now < NextProfileTime)
+  if (!trace::enabled() || !ShouldProfile())
     return;
 
   static constexpr trace::Metric MemoryUsage(
@@ -1300,27 +1294,11 @@ void ClangdLSPServer::maybeExportMemoryProfile() {
   MemoryTree MT;
   profile(MT);
   record(MT, "clangd_lsp_server", MemoryUsage);
-  NextProfileTime = Now + ProfileInterval;
 }
 
 void ClangdLSPServer::maybeCleanupMemory() {
-  // Memory cleanup is probably expensive, throttle it
-  static constexpr auto MemoryCleanupInterval = std::chrono::minutes(1);
-
-  if (!Opts.MemoryCleanup)
+  if (!Opts.MemoryCleanup || !ShouldCleanupMemory())
     return;
-
-  // FIXME: this can probably be done without a mutex
-  // and the logic could be shared with maybeExportMemoryProfile
-  {
-    auto Now = std::chrono::steady_clock::now();
-    std::lock_guard<std::mutex> Lock(NextMemoryCleanupTimeMutex);
-    if (Now < NextMemoryCleanupTime)
-      return;
-    NextMemoryCleanupTime = Now + MemoryCleanupInterval;
-  }
-
-  vlog("Calling memory cleanup callback");
   Opts.MemoryCleanup();
 }
 
@@ -1481,10 +1459,15 @@ void ClangdLSPServer::onAST(const ASTParams &Params,
 ClangdLSPServer::ClangdLSPServer(class Transport &Transp,
                                  const ThreadsafeFS &TFS,
                                  const ClangdLSPServer::Options &Opts)
-    : BackgroundContext(Context::current().clone()), Transp(Transp),
+    : ShouldProfile(/*Period=*/std::chrono::minutes(5),
+                    /*Delay=*/std::chrono::minutes(1)),
+      ShouldCleanupMemory(/*Period=*/std::chrono::minutes(1),
+                          /*Delay=*/std::chrono::minutes(1)),
+      BackgroundContext(Context::current().clone()), Transp(Transp),
       MsgHandler(new MessageHandler(*this)), TFS(TFS),
       SupportedSymbolKinds(defaultSymbolKinds()),
       SupportedCompletionItemKinds(defaultCompletionItemKinds()), Opts(Opts) {
+
   // clang-format off
   MsgHandler->bind("initialize", &ClangdLSPServer::onInitialize);
   MsgHandler->bind("initialized", &ClangdLSPServer::onInitialized);
@@ -1529,10 +1512,6 @@ ClangdLSPServer::ClangdLSPServer(class Transport &Transp,
   if (Opts.FoldingRanges)
     MsgHandler->bind("textDocument/foldingRange", &ClangdLSPServer::onFoldingRange);
   // clang-format on
-
-  // Delay first profile and memory cleanup until we've finished warming up.
-  NextMemoryCleanupTime = NextProfileTime =
-      std::chrono::steady_clock::now() + std::chrono::minutes(1);
 }
 
 ClangdLSPServer::~ClangdLSPServer() {
diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h
index b5f9d2c9d766a..a41bc5666af33 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.h
+++ b/clang-tools-extra/clangd/ClangdLSPServer.h
@@ -19,6 +19,7 @@
 #include "support/Context.h"
 #include "support/MemoryTree.h"
 #include "support/Path.h"
+#include "support/Threading.h"
 #include "clang/Tooling/Core/Replacement.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringSet.h"
@@ -186,18 +187,12 @@ class ClangdLSPServer : private ClangdServer::Callbacks {
   /// Runs profiling and exports memory usage metrics if tracing is enabled and
   /// profiling hasn't happened recently.
   void maybeExportMemoryProfile();
+  PeriodicThrottler ShouldProfile;
 
   /// Run the MemoryCleanup callback if it's time.
   /// This method is thread safe.
   void maybeCleanupMemory();
-
-  /// Timepoint until which profiling is off. It is used to throttle profiling
-  /// requests.
-  std::chrono::steady_clock::time_point NextProfileTime;
-
-  /// Next time we want to call the MemoryCleanup callback.
-  std::mutex NextMemoryCleanupTimeMutex;
-  std::chrono::steady_clock::time_point NextMemoryCleanupTime;
+  PeriodicThrottler ShouldCleanupMemory;
 
   /// Since initialization of CDBs and ClangdServer is done lazily, the
   /// following context captures the one used while creating ClangdLSPServer and
diff --git a/clang-tools-extra/clangd/support/Threading.cpp b/clang-tools-extra/clangd/support/Threading.cpp
index 5f95888ae3e2d..7f3bd62be306c 100644
--- a/clang-tools-extra/clangd/support/Threading.cpp
+++ b/clang-tools-extra/clangd/support/Threading.cpp
@@ -116,5 +116,17 @@ void wait(std::unique_lock<std::mutex> &Lock, std::condition_variable &CV,
   CV.wait_until(Lock, D.time());
 }
 
+bool PeriodicThrottler::operator()() {
+  Rep Now = Stopwatch::now().time_since_epoch().count();
+  Rep OldNext = Next.load(std::memory_order_acquire);
+  if (Now < OldNext)
+    return false;
+  // We're ready to run (but may be racing other threads).
+  // Work out the updated target time, and run if we successfully bump it.
+  Rep NewNext = Now + Period;
+  return Next.compare_exchange_strong(OldNext, NewNext,
+                                      std::memory_order_acq_rel);
+}
+
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/clangd/support/Threading.h b/clang-tools-extra/clangd/support/Threading.h
index 5155ac193fd18..da9e3b8ea8b68 100644
--- a/clang-tools-extra/clangd/support/Threading.h
+++ b/clang-tools-extra/clangd/support/Threading.h
@@ -169,6 +169,35 @@ template <typename Container> class Memoize {
   }
 };
 
+/// Used to guard an operation that should run at most every N seconds.
+///
+/// Usage:
+///   mutable PeriodicThrottler ShouldLog(std::chrono::seconds(1));
+///   void calledFrequently() {
+///     if (ShouldLog())
+///       log("this is not spammy");
+///   }
+///
+/// This class is threadsafe. If multiple threads are involved, then the guarded
+/// operation still needs to be threadsafe!
+class PeriodicThrottler {
+  using Stopwatch = std::chrono::steady_clock;
+  using Rep = Stopwatch::duration::rep;
+
+  Rep Period;
+  std::atomic<Rep> Next;
+
+public:
+  /// If Period is zero, the throttler will return true every time.
+  PeriodicThrottler(Stopwatch::duration Period, Stopwatch::duration Delay = {})
+      : Period(Period.count()),
+        Next((Stopwatch::now() + Delay).time_since_epoch().count()) {}
+
+  /// Returns whether the operation should run at this time.
+  /// operator() is safe to call concurrently.
+  bool operator()();
+};
+
 } // namespace clangd
 } // namespace clang
 #endif
diff --git a/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp b/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp
index e265ad2eabeae..87002d3cfa86a 100644
--- a/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp
+++ b/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp
@@ -10,6 +10,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include <chrono>
 #include <mutex>
 
 namespace clang {
@@ -121,5 +122,25 @@ TEST_F(ThreadingTest, MemoizeDeterministic) {
   ASSERT_THAT(ValueA.load(), testing::AnyOf('A', 'B'));
 }
 
+// It's hard to write a real test of this class, std::chrono is awkward to mock.
+// But test some degenerate cases at least.
+TEST(PeriodicThrottlerTest, Minimal) {
+  PeriodicThrottler Once(std::chrono::hours(24));
+  EXPECT_TRUE(Once());
+  EXPECT_FALSE(Once());
+  EXPECT_FALSE(Once());
+
+  PeriodicThrottler Later(std::chrono::hours(24),
+                          /*Delay=*/std::chrono::hours(24));
+  EXPECT_FALSE(Later());
+  EXPECT_FALSE(Later());
+  EXPECT_FALSE(Later());
+
+  PeriodicThrottler Always(std::chrono::seconds(0));
+  EXPECT_TRUE(Always());
+  EXPECT_TRUE(Always());
+  EXPECT_TRUE(Always());
+}
+
 } // namespace clangd
 } // namespace clang

From df6cbd37f57fd330e413c394a4653ea55393fcef Mon Sep 17 00:00:00 2001
From: Christian Sigg <csigg@google.com>
Date: Tue, 22 Dec 2020 17:42:59 +0100
Subject: [PATCH 05/10] [mlir] Lower gpu.memcpy to GPU runtime calls.

Reviewed By: herhut

Differential Revision: https://reviews.llvm.org/D93204
---
 .../ConvertLaunchFuncToRuntimeCalls.cpp       | 65 +++++++++++++++++++
 .../lower-memcpy-to-gpu-runtime-calls.mlir    | 19 ++++++
 .../cuda-runtime-wrappers.cpp                 |  7 ++
 .../rocm-runtime-wrappers.cpp                 |  5 ++
 4 files changed, 96 insertions(+)
 create mode 100644 mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir

diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
index 3b4b39e57d557..41a079c44eea5 100644
--- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
+++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
@@ -151,6 +151,12 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern<OpTy> {
       "mgpuMemFree",
       llvmVoidType,
       {llvmPointerType /* void *ptr */, llvmPointerType /* void *stream */}};
+  FunctionCallBuilder memcpyCallBuilder = {
+      "mgpuMemcpy",
+      llvmVoidType,
+      {llvmPointerType /* void *dst */, llvmPointerType /* void *src */,
+       llvmIntPtrType /* intptr_t sizeBytes */,
+       llvmPointerType /* void *stream */}};
 };
 
 /// A rewrite pattern to convert gpu.host_register operations into a GPU runtime
@@ -268,6 +274,20 @@ class EraseGpuModuleOpPattern : public OpRewritePattern<gpu::GPUModuleOp> {
     return success();
   }
 };
+
+/// A rewrite pattern to convert gpu.memcpy operations into a GPU runtime
+/// call. Currently it supports CUDA and ROCm (HIP).
+class ConvertMemcpyOpToGpuRuntimeCallPattern
+    : public ConvertOpToGpuRuntimeCallPattern<gpu::MemcpyOp> {
+public:
+  ConvertMemcpyOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter)
+      : ConvertOpToGpuRuntimeCallPattern<gpu::MemcpyOp>(typeConverter) {}
+
+private:
+  LogicalResult
+  matchAndRewrite(gpu::MemcpyOp memcpyOp, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override;
+};
 } // namespace
 
 void GpuToLLVMConversionPass::runOnOperation() {
@@ -643,6 +663,50 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite(
   return success();
 }
 
+LogicalResult ConvertMemcpyOpToGpuRuntimeCallPattern::matchAndRewrite(
+    gpu::MemcpyOp memcpyOp, ArrayRef<Value> operands,
+    ConversionPatternRewriter &rewriter) const {
+  auto memRefType = memcpyOp.src().getType().cast<MemRefType>();
+
+  if (failed(areAllLLVMTypes(memcpyOp, operands, rewriter)) ||
+      !isSupportedMemRefType(memRefType) ||
+      failed(isAsyncWithOneDependency(rewriter, memcpyOp)))
+    return failure();
+
+  auto loc = memcpyOp.getLoc();
+  auto adaptor = gpu::MemcpyOpAdaptor(operands, memcpyOp->getAttrDictionary());
+
+  MemRefDescriptor srcDesc(adaptor.src());
+
+  Value numElements =
+      memRefType.hasStaticShape()
+          ? createIndexConstant(rewriter, loc, memRefType.getNumElements())
+          // For identity layouts (verified above), the number of elements is
+          // stride[0] * size[0].
+          : rewriter.create<LLVM::MulOp>(loc, srcDesc.stride(rewriter, loc, 0),
+                                         srcDesc.size(rewriter, loc, 0));
+
+  Type elementPtrType = getElementPtrType(memRefType);
+  Value nullPtr = rewriter.create<LLVM::NullOp>(loc, elementPtrType);
+  Value gepPtr = rewriter.create<LLVM::GEPOp>(
+      loc, elementPtrType, ArrayRef<Value>{nullPtr, numElements});
+  auto sizeBytes =
+      rewriter.create<LLVM::PtrToIntOp>(loc, getIndexType(), gepPtr);
+
+  auto src = rewriter.create<LLVM::BitcastOp>(
+      loc, llvmPointerType, srcDesc.alignedPtr(rewriter, loc));
+  auto dst = rewriter.create<LLVM::BitcastOp>(
+      loc, llvmPointerType,
+      MemRefDescriptor(adaptor.dst()).alignedPtr(rewriter, loc));
+
+  auto stream = adaptor.asyncDependencies().front();
+  memcpyCallBuilder.create(loc, rewriter, {dst, src, sizeBytes, stream});
+
+  rewriter.replaceOp(memcpyOp, {stream});
+
+  return success();
+}
+
 std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
 mlir::createGpuToLLVMConversionPass(StringRef gpuBinaryAnnotation) {
   return std::make_unique<GpuToLLVMConversionPass>(gpuBinaryAnnotation);
@@ -658,6 +722,7 @@ void mlir::populateGpuToLLVMConversionPatterns(
   patterns.insert<ConvertAllocOpToGpuRuntimeCallPattern,
                   ConvertDeallocOpToGpuRuntimeCallPattern,
                   ConvertHostRegisterOpToGpuRuntimeCallPattern,
+                  ConvertMemcpyOpToGpuRuntimeCallPattern,
                   ConvertWaitAsyncOpToGpuRuntimeCallPattern,
                   ConvertWaitOpToGpuRuntimeCallPattern>(converter);
   patterns.insert<ConvertLaunchFuncOpToGpuRuntimeCallPattern>(
diff --git a/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
new file mode 100644
index 0000000000000..790c92f92ec96
--- /dev/null
+++ b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
@@ -0,0 +1,19 @@
+// RUN: mlir-opt -allow-unregistered-dialect %s --gpu-to-llvm | FileCheck %s
+
+module attributes {gpu.container_module} {
+
+  // CHECK: func @foo
+  func @foo(%dst : memref<7xf32, 1>, %src : memref<7xf32>) {
+    // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate
+    %t0 = gpu.wait async
+    // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint
+    // CHECK: %[[src:.*]] = llvm.bitcast
+    // CHECK: %[[dst:.*]] = llvm.bitcast
+    // CHECK: llvm.call @mgpuMemcpy(%[[dst]], %[[src]], %[[size_bytes]], %[[t0]])
+    %t1 = gpu.memcpy async [%t0] %dst, %src : memref<7xf32, 1>, memref<7xf32>
+    // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]])
+    // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]])
+    gpu.wait [%t1]
+    return
+  }
+}
diff --git a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
index a6729b1c0b7d1..72d172889d301 100644
--- a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
+++ b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
@@ -117,6 +117,13 @@ extern "C" void mgpuMemFree(void *ptr, CUstream /*stream*/) {
   CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(ptr)));
 }
 
+extern "C" void mgpuMemcpy(void *dst, void *src, uint64_t sizeBytes,
+                           CUstream stream) {
+  CUDA_REPORT_IF_ERROR(cuMemcpyAsync(reinterpret_cast<CUdeviceptr>(dst),
+                                     reinterpret_cast<CUdeviceptr>(src),
+                                     sizeBytes, stream));
+}
+
 /// Helper functions for writing mlir example code
 
 // Allows to register byte array with the CUDA runtime. Helpful until we have
diff --git a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp b/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp
index aad7ae27ff892..4f62f204f4a83 100644
--- a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp
+++ b/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp
@@ -118,6 +118,11 @@ extern "C" void mgpuMemFree(void *ptr, hipStream_t /*stream*/) {
   HIP_REPORT_IF_ERROR(hipMemFree(ptr));
 }
 
+extern "C" void mgpuMemcpy(void *dst, void *src, uint64_t sizeBytes,
+                           hipStream_t stream) {
+  HIP_REPORT_IF_ERROR(hipMemcpyAsync(dst, src, sizeBytes, stream));
+}
+
 /// Helper functions for writing mlir example code
 
 // Allows to register byte array with the ROCM runtime. Helpful until we have

From f7a26127f21fb1ca8252879ca647835ea7c5903d Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 22 Dec 2020 22:58:39 +0100
Subject: [PATCH 06/10] [clangd] Release notes for b8c37153d5393aad96

---
 clang-tools-extra/docs/ReleaseNotes.rst | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 450b80fd45814..2960aad5a5569 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -47,7 +47,17 @@ Major New Features
 Improvements to clangd
 ----------------------
 
-The improvements are...
+- clangd's memory usage is significantly reduced on most Linux systems.
+  In particular, memory usage should not increase dramatically over time.
+
+  The standard allocator on most systems is glibc's ptmalloc2, and it creates
+  disproportionately large heaps when handling clangd's allocation patterns.
+  By default, clangd will now periodically call ``malloc_trim`` to release free
+  pages on glibc systems.
+
+  Users of other allocators (such as ``jemalloc`` or ``tcmalloc``) on glibc
+  systems can disable this using ``--malloc_trim=0`` or the CMake flag
+  ``-DCLANGD_MALLOC_TRIM=0``.
 
 Improvements to clang-doc
 -------------------------

From a781a706b961a348006b604cdff8b555e62a2fcb Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Tue, 22 Dec 2020 14:29:06 -0800
Subject: [PATCH 07/10] [WebAssembly][SIMD] Rename shuffle, swizzle, and
 load_splats

These instructions previously used prefixes like v8x16 to signify that they were
agnostic between float and int interpretations. We renamed these instructions to
remove this form of prefix in https://github.com/WebAssembly/simd/issues/297 and
https://github.com/WebAssembly/simd/issues/316 and this commit brings the names
in LLVM up to date.

Differential Revision: https://reviews.llvm.org/D93722
---
 .../MCTargetDesc/WebAssemblyMCTargetDesc.h    |  8 +-
 .../WebAssembly/WebAssemblyInstrSIMD.td       | 46 ++++-----
 .../CodeGen/WebAssembly/simd-build-vector.ll  |  6 +-
 .../CodeGen/WebAssembly/simd-intrinsics.ll    | 10 +-
 .../CodeGen/WebAssembly/simd-load-splat.ll    |  2 +-
 .../WebAssembly/simd-load-store-alignment.ll  | 36 +++----
 .../WebAssembly/simd-nested-shuffles.ll       |  2 +-
 llvm/test/CodeGen/WebAssembly/simd-offset.ll  | 96 +++++++++----------
 .../WebAssembly/simd-shift-complex-splats.ll  |  2 +-
 .../WebAssembly/simd-shuffle-bitcast.ll       |  2 +-
 llvm/test/CodeGen/WebAssembly/simd.ll         | 48 +++++-----
 .../test/MC/Disassembler/WebAssembly/wasm.txt |  2 +-
 llvm/test/MC/WebAssembly/simd-encodings.s     | 24 ++---
 13 files changed, 142 insertions(+), 142 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index fccee4b96ed51..4bc77aa68668b 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -194,7 +194,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
   WASM_LOAD_STORE(ATOMIC_RMW8_U_XCHG_I64)
   WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I32)
   WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I64)
-  WASM_LOAD_STORE(LOAD_SPLAT_v8x16)
+  WASM_LOAD_STORE(LOAD8_SPLAT)
   WASM_LOAD_STORE(LOAD_LANE_v16i8)
   WASM_LOAD_STORE(STORE_LANE_v16i8)
   return 0;
@@ -222,7 +222,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
   WASM_LOAD_STORE(ATOMIC_RMW16_U_XCHG_I64)
   WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I32)
   WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I64)
-  WASM_LOAD_STORE(LOAD_SPLAT_v16x8)
+  WASM_LOAD_STORE(LOAD16_SPLAT)
   WASM_LOAD_STORE(LOAD_LANE_v8i16)
   WASM_LOAD_STORE(STORE_LANE_v8i16)
   return 1;
@@ -253,7 +253,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
   WASM_LOAD_STORE(ATOMIC_RMW32_U_CMPXCHG_I64)
   WASM_LOAD_STORE(MEMORY_ATOMIC_NOTIFY)
   WASM_LOAD_STORE(MEMORY_ATOMIC_WAIT32)
-  WASM_LOAD_STORE(LOAD_SPLAT_v32x4)
+  WASM_LOAD_STORE(LOAD32_SPLAT)
   WASM_LOAD_STORE(LOAD_ZERO_v4i32)
   WASM_LOAD_STORE(LOAD_LANE_v4i32)
   WASM_LOAD_STORE(STORE_LANE_v4i32)
@@ -272,7 +272,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
   WASM_LOAD_STORE(ATOMIC_RMW_XCHG_I64)
   WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I64)
   WASM_LOAD_STORE(MEMORY_ATOMIC_WAIT64)
-  WASM_LOAD_STORE(LOAD_SPLAT_v64x2)
+  WASM_LOAD_STORE(LOAD64_SPLAT)
   WASM_LOAD_STORE(LOAD_EXTEND_S_v8i16)
   WASM_LOAD_STORE(LOAD_EXTEND_U_v8i16)
   WASM_LOAD_STORE(LOAD_EXTEND_S_v4i32)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 191cdea0c0aee..e48bbaebd47e9 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -64,55 +64,55 @@ defm : LoadPatOffsetOnly<vec_t, load, "LOAD_V128">;
 defm : LoadPatGlobalAddrOffOnly<vec_t, load, "LOAD_V128">;
 }
 
-// vNxM.load_splat
-multiclass SIMDLoadSplat<string vec, bits<32> simdop> {
+// v128.loadX_splat
+multiclass SIMDLoadSplat<int size, bits<32> simdop> {
   let mayLoad = 1, UseNamedOperandTable = 1 in {
-  defm LOAD_SPLAT_#vec#_A32 :
+  defm LOAD#size#_SPLAT_A32 :
     SIMD_I<(outs V128:$dst),
            (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
            (outs),
            (ins P2Align:$p2align, offset32_op:$off), [],
-           vec#".load_splat\t$dst, ${off}(${addr})$p2align",
-           vec#".load_splat\t$off$p2align", simdop>;
-  defm LOAD_SPLAT_#vec#_A64 :
+           "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align",
+           "v128.load"#size#"_splat\t$off$p2align", simdop>;
+  defm LOAD#size#_SPLAT_A64 :
     SIMD_I<(outs V128:$dst),
            (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
            (outs),
            (ins P2Align:$p2align, offset64_op:$off), [],
-           vec#".load_splat\t$dst, ${off}(${addr})$p2align",
-           vec#".load_splat\t$off$p2align", simdop>;
+           "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align",
+           "v128.load"#size#"_splat\t$off$p2align", simdop>;
   }
 }
 
-defm "" : SIMDLoadSplat<"v8x16", 7>;
-defm "" : SIMDLoadSplat<"v16x8", 8>;
-defm "" : SIMDLoadSplat<"v32x4", 9>;
-defm "" : SIMDLoadSplat<"v64x2", 10>;
+defm "" : SIMDLoadSplat<8, 7>;
+defm "" : SIMDLoadSplat<16, 8>;
+defm "" : SIMDLoadSplat<32, 9>;
+defm "" : SIMDLoadSplat<64, 10>;
 
 def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>;
 def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t,
                              [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>;
 
-foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"],
-                ["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in {
+foreach args = [["v16i8", "8"], ["v8i16", "16"], ["v4i32", "32"],
+                ["v2i64", "64"], ["v4f32", "32"], ["v2f64", "64"]] in {
 defm : LoadPatNoOffset<!cast<ValueType>(args[0]),
                        load_splat,
-                       "LOAD_SPLAT_"#args[1]>;
+                       "LOAD"#args[1]#"_SPLAT">;
 defm : LoadPatImmOff<!cast<ValueType>(args[0]),
                      load_splat,
                      regPlusImm,
-                     "LOAD_SPLAT_"#args[1]>;
+                     "LOAD"#args[1]#"_SPLAT">;
 defm : LoadPatImmOff<!cast<ValueType>(args[0]),
                      load_splat,
                      or_is_add,
-                     "LOAD_SPLAT_"#args[1]>;
+                     "LOAD"#args[1]#"_SPLAT">;
 defm : LoadPatOffsetOnly<!cast<ValueType>(args[0]),
                          load_splat,
-                         "LOAD_SPLAT_"#args[1]>;
+                         "LOAD"#args[1]#"_SPLAT">;
 defm : LoadPatGlobalAddrOffOnly<!cast<ValueType>(args[0]),
                                 load_splat,
-                                "LOAD_SPLAT_"#args[1]>;
+                                "LOAD"#args[1]#"_SPLAT">;
 }
 
 // Load and extend
@@ -401,10 +401,10 @@ defm SHUFFLE :
            vec_i8imm_op:$mC, vec_i8imm_op:$mD,
            vec_i8imm_op:$mE, vec_i8imm_op:$mF),
          [],
-         "v8x16.shuffle\t$dst, $x, $y, "#
+         "i8x16.shuffle\t$dst, $x, $y, "#
            "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
            "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
-         "v8x16.shuffle\t"#
+         "i8x16.shuffle\t"#
            "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
            "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
          13>;
@@ -433,14 +433,14 @@ def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y),
             (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>;
 }
 
-// Swizzle lanes: v8x16.swizzle
+// Swizzle lanes: i8x16.swizzle
 def wasm_swizzle_t : SDTypeProfile<1, 2, []>;
 def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>;
 defm SWIZZLE :
   SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
          [(set (v16i8 V128:$dst),
            (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
-         "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 14>;
+         "i8x16.swizzle\t$dst, $src, $mask", "i8x16.swizzle", 14>;
 
 def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)),
           (SWIZZLE V128:$src, V128:$mask)>;
diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
index 4f75887873784..1360e0172d3fd 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
@@ -178,7 +178,7 @@ define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) {
 
 ; CHECK-LABEL: swizzle_one_i8x16:
 ; CHECK-NEXT:  .functype       swizzle_one_i8x16 (v128, v128) -> (v128)
-; CHECK-NEXT:  v8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
+; CHECK-NEXT:  i8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
 ; CHECK-NEXT:  return          $pop[[L0]]
 define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) {
   %m0 = extractelement <16 x i8> %mask, i32 0
@@ -189,7 +189,7 @@ define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) {
 
 ; CHECK-LABEL: swizzle_all_i8x16:
 ; CHECK-NEXT:  .functype       swizzle_all_i8x16 (v128, v128) -> (v128)
-; CHECK-NEXT:  v8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
+; CHECK-NEXT:  i8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
 ; CHECK-NEXT:  return          $pop[[L0]]
 define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) {
   %m0 = extractelement <16 x i8> %mask, i32 0
@@ -256,7 +256,7 @@ define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) {
 
 ; CHECK-LABEL: mashup_swizzle_i8x16:
 ; CHECK-NEXT:  .functype       mashup_swizzle_i8x16 (v128, v128, i32) -> (v128)
-; CHECK-NEXT:  v8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
+; CHECK-NEXT:  i8x16.swizzle   $push[[L0:[0-9]+]]=, $0, $1
 ; CHECK:       i8x16.replace_lane
 ; CHECK:       i8x16.replace_lane
 ; CHECK:       i8x16.replace_lane
diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
index 23a7bfbde927a..da7343770de77 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -13,7 +13,7 @@ target triple = "wasm32-unknown-unknown"
 ; ==============================================================================
 ; CHECK-LABEL: swizzle_v16i8:
 ; SIMD128-NEXT: .functype swizzle_v16i8 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.swizzle $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: i8x16.swizzle $push[[R:[0-9]+]]=, $0, $1{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 declare <16 x i8> @llvm.wasm.swizzle(<16 x i8>, <16 x i8>)
 define <16 x i8> @swizzle_v16i8(<16 x i8> %x, <16 x i8> %y) {
@@ -164,9 +164,9 @@ define <16 x i8> @narrow_unsigned_v16i8(<8 x i16> %low, <8 x i16> %high) {
 }
 
 ; CHECK-LABEL: shuffle_v16i8:
-; NO-SIMD128-NOT: v8x16
+; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shuffle_v16i8 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
+; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
 ; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 declare <16 x i8> @llvm.wasm.shuffle(
@@ -180,9 +180,9 @@ define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) {
 }
 
 ; CHECK-LABEL: shuffle_undef_v16i8:
-; NO-SIMD128-NOT: v8x16
+; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shuffle_undef_v16i8 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
+; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
 ; SIMD128-SAME: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) {
diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll b/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll
index 4e693c285a3fa..3d08a586edb5a 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll
@@ -9,7 +9,7 @@ target triple = "wasm32-unknown-unknown"
 ; CHECK-LABEL: load_splat:
 ; CHECK-NEXT: .functype load_splat (i32, i32) -> (i32)
 ; CHECK-NEXT: i32.load8_u $[[E:[0-9]+]]=, 0($0){{$}}
-; CHECK-NEXT: v8x16.load_splat $push[[V:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: v128.load8_splat $push[[V:[0-9]+]]=, 0($0){{$}}
 ; CHECK-NEXT: v128.store 0($1), $pop[[V]]{{$}}
 ; CHECK-NEXT: return $[[E]]{{$}}
 define i8 @load_splat(i8* %p, <16 x i8>* %out) {
diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll b/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
index 8ebeb15ccc9a2..000b7730e3bf2 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
@@ -89,11 +89,11 @@ define void @store_v16i8_a32(<16 x i8> *%p, <16 x i8> %v) {
   ret void
 }
 
-; 1 is the default alignment for v8x16.load_splat so no attribute is needed.
+; 1 is the default alignment for v128.load8_splat so no attribute is needed.
 
 ; CHECK-LABEL: load_splat_v16i8_a1:
 ; CHECK-NEXT: .functype load_splat_v16i8_a1 (i32) -> (v128){{$}}
-; CHECK-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: v128.load8_splat $push[[R:[0-9]+]]=, 0($0){{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <16 x i8> @load_splat_v16i8_a1(i8* %p) {
   %e = load i8, i8* %p, align 1
@@ -106,7 +106,7 @@ define <16 x i8> @load_splat_v16i8_a1(i8* %p) {
 
 ; CHECK-LABEL: load_splat_v16i8_a2:
 ; CHECK-NEXT: .functype load_splat_v16i8_a2 (i32) -> (v128){{$}}
-; CHECK-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: v128.load8_splat $push[[R:[0-9]+]]=, 0($0){{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <16 x i8> @load_splat_v16i8_a2(i8* %p) {
   %e = load i8, i8* %p, align 2
@@ -304,7 +304,7 @@ define <8 x i16> @load_sext_v8i16_a16(<8 x i8>* %p) {
 
 ; CHECK-LABEL: load_splat_v8i16_a1:
 ; CHECK-NEXT: .functype load_splat_v8i16_a1 (i32) -> (v128){{$}}
-; CHECK-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}}
+; CHECK-NEXT: v128.load16_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <8 x i16> @load_splat_v8i16_a1(i16* %p) {
   %e = load i16, i16* %p, align 1
@@ -313,11 +313,11 @@ define <8 x i16> @load_splat_v8i16_a1(i16* %p) {
   ret <8 x i16> %v2
 }
 
-; 2 is the default alignment for v16x8.load_splat so no attribute is needed.
+; 2 is the default alignment for v128.load16_splat so no attribute is needed.
 
 ; CHECK-LABEL: load_splat_v8i16_a2:
 ; CHECK-NEXT: .functype load_splat_v8i16_a2 (i32) -> (v128){{$}}
-; CHECK-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: v128.load16_splat $push[[R:[0-9]+]]=, 0($0){{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <8 x i16> @load_splat_v8i16_a2(i16* %p) {
   %e = load i16, i16* %p, align 2
@@ -330,7 +330,7 @@ define <8 x i16> @load_splat_v8i16_a2(i16* %p) {
 
 ; CHECK-LABEL: load_splat_v8i16_a4:
 ; CHECK-NEXT: .functype load_splat_v8i16_a4 (i32) -> (v128){{$}}
-; CHECK-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: v128.load16_splat $push[[R:[0-9]+]]=, 0($0){{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <8 x i16> @load_splat_v8i16_a4(i16* %p) {
   %e = load i16, i16* %p, align 4
@@ -528,7 +528,7 @@ define <4 x i32> @load_sext_v4i32_a16(<4 x i16>* %p) {
 
 ; CHECK-LABEL: load_splat_v4i32_a1:
 ; CHECK-NEXT: .functype load_splat_v4i32_a1 (i32) -> (v128){{$}}
-; CHECK-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}}
+; CHECK-NEXT: v128.load32_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <4 x i32> @load_splat_v4i32_a1(i32* %addr) {
   %e = load i32, i32* %addr, align 1
@@ -539,7 +539,7 @@ define <4 x i32> @load_splat_v4i32_a1(i32* %addr) {
 
 ; CHECK-LABEL: load_splat_v4i32_a2:
 ; CHECK-NEXT: .functype load_splat_v4i32_a2 (i32) -> (v128){{$}}
-; CHECK-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=1{{$}}
+; CHECK-NEXT: v128.load32_splat $push[[R:[0-9]+]]=, 0($0):p2align=1{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <4 x i32> @load_splat_v4i32_a2(i32* %addr) {
   %e = load i32, i32* %addr, align 2
@@ -548,11 +548,11 @@ define <4 x i32> @load_splat_v4i32_a2(i32* %addr) {
   ret <4 x i32> %v2
 }
 
-; 4 is the default alignment for v32x4.load_splat so no attribute is needed.
+; 4 is the default alignment for v128.load32_splat so no attribute is needed.
 
 ; CHECK-LABEL: load_splat_v4i32_a4:
 ; CHECK-NEXT: .functype load_splat_v4i32_a4 (i32) -> (v128){{$}}
-; CHECK-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: v128.load32_splat $push[[R:[0-9]+]]=, 0($0){{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <4 x i32> @load_splat_v4i32_a4(i32* %addr) {
   %e = load i32, i32* %addr, align 4
@@ -565,7 +565,7 @@ define <4 x i32> @load_splat_v4i32_a4(i32* %addr) {
 
 ; CHECK-LABEL: load_splat_v4i32_a8:
 ; CHECK-NEXT: .functype load_splat_v4i32_a8 (i32) -> (v128){{$}}
-; CHECK-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: v128.load32_splat $push[[R:[0-9]+]]=, 0($0){{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <4 x i32> @load_splat_v4i32_a8(i32* %addr) {
   %e = load i32, i32* %addr, align 8
@@ -660,7 +660,7 @@ define void @store_v2i64_a32(<2 x i64> *%p, <2 x i64> %v) {
 
 ; CHECK-LABEL: load_splat_v2i64_a1:
 ; CHECK-NEXT: .functype load_splat_v2i64_a1 (i32) -> (v128){{$}}
-; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}}
+; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x i64> @load_splat_v2i64_a1(i64* %p) {
   %e = load i64, i64* %p, align 1
@@ -671,7 +671,7 @@ define <2 x i64> @load_splat_v2i64_a1(i64* %p) {
 
 ; CHECK-LABEL: load_splat_v2i64_a2:
 ; CHECK-NEXT: .functype load_splat_v2i64_a2 (i32) -> (v128){{$}}
-; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=1{{$}}
+; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0):p2align=1{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x i64> @load_splat_v2i64_a2(i64* %p) {
   %e = load i64, i64* %p, align 2
@@ -682,7 +682,7 @@ define <2 x i64> @load_splat_v2i64_a2(i64* %p) {
 
 ; CHECK-LABEL: load_splat_v2i64_a4:
 ; CHECK-NEXT: .functype load_splat_v2i64_a4 (i32) -> (v128){{$}}
-; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=2{{$}}
+; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0):p2align=2{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x i64> @load_splat_v2i64_a4(i64* %p) {
   %e = load i64, i64* %p, align 4
@@ -691,11 +691,11 @@ define <2 x i64> @load_splat_v2i64_a4(i64* %p) {
   ret <2 x i64> %v2
 }
 
-; 8 is the default alignment for v64x2.load_splat so no attribute is needed.
+; 8 is the default alignment for v128.load64_splat so no attribute is needed.
 
 ; CHECK-LABEL: load_splat_v2i64_a8:
 ; CHECK-NEXT: .functype load_splat_v2i64_a8 (i32) -> (v128){{$}}
-; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0){{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x i64> @load_splat_v2i64_a8(i64* %p) {
   %e = load i64, i64* %p, align 8
@@ -708,7 +708,7 @@ define <2 x i64> @load_splat_v2i64_a8(i64* %p) {
 
 ; CHECK-LABEL: load_splat_v2i64_a16:
 ; CHECK-NEXT: .functype load_splat_v2i64_a16 (i32) -> (v128){{$}}
-; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0){{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x i64> @load_splat_v2i64_a16(i64* %p) {
   %e = load i64, i64* %p, align 16
diff --git a/llvm/test/CodeGen/WebAssembly/simd-nested-shuffles.ll b/llvm/test/CodeGen/WebAssembly/simd-nested-shuffles.ll
index 597ab58e879e0..b72086a2d6cb9 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-nested-shuffles.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-nested-shuffles.ll
@@ -6,7 +6,7 @@
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-; CHECK: v8x16.shuffle
+; CHECK: i8x16.shuffle
 define <4 x i32> @foo(<4 x i32> %x) {
   %1 = shufflevector <4 x i32> %x, <4 x i32> undef,
     <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
diff --git a/llvm/test/CodeGen/WebAssembly/simd-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-offset.ll
index b2d32936df130..fb41653a514a0 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-offset.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-offset.ll
@@ -25,7 +25,7 @@ define <16 x i8> @load_splat_v16i8(i8* %p) {
 ; CHECK:         .functype load_splat_v16i8 (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v8x16.load_splat 0
+; CHECK-NEXT:    v128.load8_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %e = load i8, i8* %p
   %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
@@ -52,7 +52,7 @@ define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) {
 ; CHECK:         .functype load_splat_v16i8_with_folded_offset (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v8x16.load_splat 16
+; CHECK-NEXT:    v128.load8_splat 16
 ; CHECK-NEXT:    # fallthrough-return
   %q = ptrtoint i8* %p to i32
   %r = add nuw i32 %q, 16
@@ -80,7 +80,7 @@ define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) {
 ; CHECK:         .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v8x16.load_splat 1
+; CHECK-NEXT:    v128.load8_splat 1
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr inbounds i8, i8* %p, i32 1
   %e = load i8, i8* %s
@@ -110,7 +110,7 @@ define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const -1
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v8x16.load_splat 0
+; CHECK-NEXT:    v128.load8_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr inbounds i8, i8* %p, i32 -1
   %e = load i8, i8* %s
@@ -142,7 +142,7 @@ define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v8x16.load_splat 0
+; CHECK-NEXT:    v128.load8_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %q = ptrtoint i8* %p to i32
   %r = add nsw i32 %q, 16
@@ -174,7 +174,7 @@ define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 1
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v8x16.load_splat 0
+; CHECK-NEXT:    v128.load8_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr i8, i8* %p, i32 1
   %e = load i8, i8* %s
@@ -200,7 +200,7 @@ define <16 x i8> @load_splat_v16i8_from_numeric_address() {
 ; CHECK:         .functype load_splat_v16i8_from_numeric_address () -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    v8x16.load_splat 32
+; CHECK-NEXT:    v128.load8_splat 32
 ; CHECK-NEXT:    # fallthrough-return
   %s = inttoptr i32 32 to i8*
   %e = load i8, i8* %s
@@ -227,7 +227,7 @@ define <16 x i8> @load_splat_v16i8_from_global_address() {
 ; CHECK:         .functype load_splat_v16i8_from_global_address () -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    v8x16.load_splat gv_i8
+; CHECK-NEXT:    v128.load8_splat gv_i8
 ; CHECK-NEXT:    # fallthrough-return
   %e = load i8, i8* @gv_i8
   %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
@@ -366,7 +366,7 @@ define <8 x i16> @load_splat_v8i16(i16* %p) {
 ; CHECK:         .functype load_splat_v8i16 (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v16x8.load_splat 0
+; CHECK-NEXT:    v128.load16_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %e = load i16, i16* %p
   %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
@@ -428,7 +428,7 @@ define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) {
 ; CHECK:         .functype load_splat_v8i16_with_folded_offset (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v16x8.load_splat 16
+; CHECK-NEXT:    v128.load16_splat 16
 ; CHECK-NEXT:    # fallthrough-return
   %q = ptrtoint i16* %p to i32
   %r = add nuw i32 %q, 16
@@ -500,7 +500,7 @@ define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) {
 ; CHECK:         .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v16x8.load_splat 2
+; CHECK-NEXT:    v128.load16_splat 2
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr inbounds i16, i16* %p, i32 1
   %e = load i16, i16* %s
@@ -568,7 +568,7 @@ define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const -2
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v16x8.load_splat 0
+; CHECK-NEXT:    v128.load16_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr inbounds i16, i16* %p, i32 -1
   %e = load i16, i16* %s
@@ -644,7 +644,7 @@ define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v16x8.load_splat 0
+; CHECK-NEXT:    v128.load16_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %q = ptrtoint i16* %p to i32
   %r = add nsw i32 %q, 16
@@ -726,7 +726,7 @@ define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 2
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v16x8.load_splat 0
+; CHECK-NEXT:    v128.load16_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr i16, i16* %p, i32 1
   %e = load i16, i16* %s
@@ -796,7 +796,7 @@ define <8 x i16> @load_splat_v8i16_from_numeric_address() {
 ; CHECK:         .functype load_splat_v8i16_from_numeric_address () -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    v16x8.load_splat 32
+; CHECK-NEXT:    v128.load16_splat 32
 ; CHECK-NEXT:    # fallthrough-return
   %s = inttoptr i32 32 to i16*
   %e = load i16, i16* %s
@@ -861,7 +861,7 @@ define <8 x i16> @load_splat_v8i16_from_global_address() {
 ; CHECK:         .functype load_splat_v8i16_from_global_address () -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    v16x8.load_splat gv_i16
+; CHECK-NEXT:    v128.load16_splat gv_i16
 ; CHECK-NEXT:    # fallthrough-return
   %e = load i16, i16* @gv_i16
   %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
@@ -1197,7 +1197,7 @@ define <4 x i32> @load_splat_v4i32(i32* %addr) {
 ; CHECK:         .functype load_splat_v4i32 (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v32x4.load_splat 0
+; CHECK-NEXT:    v128.load32_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %e = load i32, i32* %addr, align 4
   %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
@@ -1259,7 +1259,7 @@ define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) {
 ; CHECK:         .functype load_splat_v4i32_with_folded_offset (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v32x4.load_splat 16
+; CHECK-NEXT:    v128.load32_splat 16
 ; CHECK-NEXT:    # fallthrough-return
   %q = ptrtoint i32* %p to i32
   %r = add nuw i32 %q, 16
@@ -1331,7 +1331,7 @@ define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) {
 ; CHECK:         .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v32x4.load_splat 4
+; CHECK-NEXT:    v128.load32_splat 4
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr inbounds i32, i32* %p, i32 1
   %e = load i32, i32* %s
@@ -1399,7 +1399,7 @@ define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const -4
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v32x4.load_splat 0
+; CHECK-NEXT:    v128.load32_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr inbounds i32, i32* %p, i32 -1
   %e = load i32, i32* %s
@@ -1475,7 +1475,7 @@ define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v32x4.load_splat 0
+; CHECK-NEXT:    v128.load32_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %q = ptrtoint i32* %p to i32
   %r = add nsw i32 %q, 16
@@ -1557,7 +1557,7 @@ define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 4
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v32x4.load_splat 0
+; CHECK-NEXT:    v128.load32_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr i32, i32* %p, i32 1
   %e = load i32, i32* %s
@@ -1627,7 +1627,7 @@ define <4 x i32> @load_splat_v4i32_from_numeric_address() {
 ; CHECK:         .functype load_splat_v4i32_from_numeric_address () -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    v32x4.load_splat 32
+; CHECK-NEXT:    v128.load32_splat 32
 ; CHECK-NEXT:    # fallthrough-return
   %s = inttoptr i32 32 to i32*
   %e = load i32, i32* %s
@@ -1692,7 +1692,7 @@ define <4 x i32> @load_splat_v4i32_from_global_address() {
 ; CHECK:         .functype load_splat_v4i32_from_global_address () -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    v32x4.load_splat gv_i32
+; CHECK-NEXT:    v128.load32_splat gv_i32
 ; CHECK-NEXT:    # fallthrough-return
   %e = load i32, i32* @gv_i32
   %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
@@ -2027,7 +2027,7 @@ define <2 x i64> @load_splat_v2i64(i64* %p) {
 ; CHECK:         .functype load_splat_v2i64 (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v64x2.load_splat 0
+; CHECK-NEXT:    v128.load64_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %e = load i64, i64* %p
   %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
@@ -2089,7 +2089,7 @@ define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) {
 ; CHECK:         .functype load_splat_v2i64_with_folded_offset (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v64x2.load_splat 16
+; CHECK-NEXT:    v128.load64_splat 16
 ; CHECK-NEXT:    # fallthrough-return
   %q = ptrtoint i64* %p to i32
   %r = add nuw i32 %q, 16
@@ -2161,7 +2161,7 @@ define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) {
 ; CHECK:         .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v64x2.load_splat 8
+; CHECK-NEXT:    v128.load64_splat 8
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr inbounds i64, i64* %p, i32 1
   %e = load i64, i64* %s
@@ -2229,7 +2229,7 @@ define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const -8
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v64x2.load_splat 0
+; CHECK-NEXT:    v128.load64_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr inbounds i64, i64* %p, i32 -1
   %e = load i64, i64* %s
@@ -2305,7 +2305,7 @@ define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v64x2.load_splat 0
+; CHECK-NEXT:    v128.load64_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %q = ptrtoint i64* %p to i32
   %r = add nsw i32 %q, 16
@@ -2387,7 +2387,7 @@ define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 8
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v64x2.load_splat 0
+; CHECK-NEXT:    v128.load64_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr i64, i64* %p, i32 1
   %e = load i64, i64* %s
@@ -2457,7 +2457,7 @@ define <2 x i64> @load_splat_v2i64_from_numeric_address() {
 ; CHECK:         .functype load_splat_v2i64_from_numeric_address () -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    v64x2.load_splat 32
+; CHECK-NEXT:    v128.load64_splat 32
 ; CHECK-NEXT:    # fallthrough-return
   %s = inttoptr i32 32 to i64*
   %e = load i64, i64* %s
@@ -2522,7 +2522,7 @@ define <2 x i64> @load_splat_v2i64_from_global_address() {
 ; CHECK:         .functype load_splat_v2i64_from_global_address () -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    v64x2.load_splat gv_i64
+; CHECK-NEXT:    v128.load64_splat gv_i64
 ; CHECK-NEXT:    # fallthrough-return
   %e = load i64, i64* @gv_i64
   %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
@@ -2697,7 +2697,7 @@ define <4 x float> @load_splat_v4f32(float* %p) {
 ; CHECK:         .functype load_splat_v4f32 (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v32x4.load_splat 0
+; CHECK-NEXT:    v128.load32_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %e = load float, float* %p
   %v1 = insertelement <4 x float> undef, float %e, i32 0
@@ -2724,7 +2724,7 @@ define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) {
 ; CHECK:         .functype load_splat_v4f32_with_folded_offset (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v32x4.load_splat 16
+; CHECK-NEXT:    v128.load32_splat 16
 ; CHECK-NEXT:    # fallthrough-return
   %q = ptrtoint float* %p to i32
   %r = add nuw i32 %q, 16
@@ -2752,7 +2752,7 @@ define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) {
 ; CHECK:         .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v32x4.load_splat 4
+; CHECK-NEXT:    v128.load32_splat 4
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr inbounds float, float* %p, i32 1
   %e = load float, float* %s
@@ -2782,7 +2782,7 @@ define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const -4
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v32x4.load_splat 0
+; CHECK-NEXT:    v128.load32_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr inbounds float, float* %p, i32 -1
   %e = load float, float* %s
@@ -2814,7 +2814,7 @@ define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v32x4.load_splat 0
+; CHECK-NEXT:    v128.load32_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %q = ptrtoint float* %p to i32
   %r = add nsw i32 %q, 16
@@ -2846,7 +2846,7 @@ define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 4
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v32x4.load_splat 0
+; CHECK-NEXT:    v128.load32_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr float, float* %p, i32 1
   %e = load float, float* %s
@@ -2872,7 +2872,7 @@ define <4 x float> @load_splat_v4f32_from_numeric_address() {
 ; CHECK:         .functype load_splat_v4f32_from_numeric_address () -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    v32x4.load_splat 32
+; CHECK-NEXT:    v128.load32_splat 32
 ; CHECK-NEXT:    # fallthrough-return
   %s = inttoptr i32 32 to float*
   %e = load float, float* %s
@@ -2899,7 +2899,7 @@ define <4 x float> @load_splat_v4f32_from_global_address() {
 ; CHECK:         .functype load_splat_v4f32_from_global_address () -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    v32x4.load_splat gv_f32
+; CHECK-NEXT:    v128.load32_splat gv_f32
 ; CHECK-NEXT:    # fallthrough-return
   %e = load float, float* @gv_f32
   %v1 = insertelement <4 x float> undef, float %e, i32 0
@@ -3038,7 +3038,7 @@ define <2 x double> @load_splat_v2f64(double* %p) {
 ; CHECK:         .functype load_splat_v2f64 (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v64x2.load_splat 0
+; CHECK-NEXT:    v128.load64_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %e = load double, double* %p
   %v1 = insertelement <2 x double> undef, double %e, i32 0
@@ -3065,7 +3065,7 @@ define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) {
 ; CHECK:         .functype load_splat_v2f64_with_folded_offset (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v64x2.load_splat 16
+; CHECK-NEXT:    v128.load64_splat 16
 ; CHECK-NEXT:    # fallthrough-return
   %q = ptrtoint double* %p to i32
   %r = add nuw i32 %q, 16
@@ -3093,7 +3093,7 @@ define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) {
 ; CHECK:         .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    v64x2.load_splat 8
+; CHECK-NEXT:    v128.load64_splat 8
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr inbounds double, double* %p, i32 1
   %e = load double, double* %s
@@ -3123,7 +3123,7 @@ define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double*
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const -8
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v64x2.load_splat 0
+; CHECK-NEXT:    v128.load64_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr inbounds double, double* %p, i32 -1
   %e = load double, double* %s
@@ -3155,7 +3155,7 @@ define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v64x2.load_splat 0
+; CHECK-NEXT:    v128.load64_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %q = ptrtoint double* %p to i32
   %r = add nsw i32 %q, 16
@@ -3187,7 +3187,7 @@ define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 8
 ; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    v64x2.load_splat 0
+; CHECK-NEXT:    v128.load64_splat 0
 ; CHECK-NEXT:    # fallthrough-return
   %s = getelementptr double, double* %p, i32 1
   %e = load double, double* %s
@@ -3213,7 +3213,7 @@ define <2 x double> @load_splat_v2f64_from_numeric_address() {
 ; CHECK:         .functype load_splat_v2f64_from_numeric_address () -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    v64x2.load_splat 32
+; CHECK-NEXT:    v128.load64_splat 32
 ; CHECK-NEXT:    # fallthrough-return
   %s = inttoptr i32 32 to double*
   %e = load double, double* %s
@@ -3240,7 +3240,7 @@ define <2 x double> @load_splat_v2f64_from_global_address() {
 ; CHECK:         .functype load_splat_v2f64_from_global_address () -> (v128)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    v64x2.load_splat gv_f64
+; CHECK-NEXT:    v128.load64_splat gv_f64
 ; CHECK-NEXT:    # fallthrough-return
   %e = load double, double* @gv_f64
   %v1 = insertelement <2 x double> undef, double %e, i32 0
diff --git a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
index 2473f0b27b7e8..4582bc62216a7 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
@@ -67,7 +67,7 @@ define <16 x i8> @shl_abs(<16 x i8> %v, i8 %a) {
 ; CHECK-NEXT: i8x16.splat $push1=, $1
 ; CHECK-NEXT: i8x16.splat $push0=, $2
 ; CHECK-NEXT: i8x16.add $push2=, $pop1, $pop0
-; CHECK-NEXT: v8x16.shuffle $push3=, $pop2, $0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle $push3=, $pop2, $0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 ; CHECK-NEXT: i8x16.abs $push101=, $pop3
 ; CHECK-NEXT: local.tee $push100=, $3=, $pop101
 ; CHECK-NEXT: i8x16.extract_lane_u $push9=, $pop100, 0
diff --git a/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll b/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
index b3e2db98861d5..3e73b47c5a5b8 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
@@ -21,7 +21,7 @@ define <4 x i32> @f32x4_splat(float %x) {
 ; CHECK-LABEL: not_a_vec:
 ; CHECK-NEXT: .functype not_a_vec (i64, i64) -> (v128){{$}}
 ; CHECK-NEXT: i64x2.splat $push[[L1:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L1]], $2, 0, 1, 2, 3
+; CHECK-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L1]], $2, 0, 1, 2, 3
 ; CHECK-NEXT: return $pop[[R]]
 define <4 x i32> @not_a_vec(i128 %x) {
   %a = bitcast i128 %x to <4 x i32>
diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll
index 25e647f07230a..c8053293ebac0 100644
--- a/llvm/test/CodeGen/WebAssembly/simd.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd.ll
@@ -202,9 +202,9 @@ define <16 x i8> @replace_zero_v16i8(<16 x i8> %v, i8 %x) {
 }
 
 ; CHECK-LABEL: shuffle_v16i8:
-; NO-SIMD128-NOT: v8x16
+; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shuffle_v16i8 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
+; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
 ; SIMD128-SAME: 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) {
@@ -215,9 +215,9 @@ define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) {
 }
 
 ; CHECK-LABEL: shuffle_undef_v16i8:
-; NO-SIMD128-NOT: v8x16
+; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shuffle_undef_v16i8 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
+; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
 ; SIMD128-SAME: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) {
@@ -472,9 +472,9 @@ define <8 x i16> @replace_zero_v8i16(<8 x i16> %v, i16 %x) {
 }
 
 ; CHECK-LABEL: shuffle_v8i16:
-; NO-SIMD128-NOT: v8x16
+; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shuffle_v8i16 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
+; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
 ; SIMD128-SAME: 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <8 x i16> @shuffle_v8i16(<8 x i16> %x, <8 x i16> %y) {
@@ -484,9 +484,9 @@ define <8 x i16> @shuffle_v8i16(<8 x i16> %x, <8 x i16> %y) {
 }
 
 ; CHECK-LABEL: shuffle_undef_v8i16:
-; NO-SIMD128-NOT: v8x16
+; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shuffle_undef_v8i16 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
+; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
 ; SIMD128-SAME: 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) {
@@ -634,9 +634,9 @@ define <4 x i32> @replace_zero_v4i32(<4 x i32> %v, i32 %x) {
 }
 
 ; CHECK-LABEL: shuffle_v4i32:
-; NO-SIMD128-NOT: v8x16
+; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shuffle_v4i32 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
+; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
 ; SIMD128-SAME: 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <4 x i32> @shuffle_v4i32(<4 x i32> %x, <4 x i32> %y) {
@@ -646,9 +646,9 @@ define <4 x i32> @shuffle_v4i32(<4 x i32> %x, <4 x i32> %y) {
 }
 
 ; CHECK-LABEL: shuffle_undef_v4i32:
-; NO-SIMD128-NOT: v8x16
+; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shuffle_undef_v4i32 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
+; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
 ; SIMD128-SAME: 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) {
@@ -785,9 +785,9 @@ define <2 x i64> @replace_zero_v2i64(<2 x i64> %v, i64 %x) {
 }
 
 ; CHECK-LABEL: shuffle_v2i64:
-; NO-SIMD128-NOT: v8x16
+; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shuffle_v2i64 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
+; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
 ; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <2 x i64> @shuffle_v2i64(<2 x i64> %x, <2 x i64> %y) {
@@ -796,9 +796,9 @@ define <2 x i64> @shuffle_v2i64(<2 x i64> %x, <2 x i64> %y) {
 }
 
 ; CHECK-LABEL: shuffle_undef_v2i64:
-; NO-SIMD128-NOT: v8x16
+; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shuffle_undef_v2i64 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
+; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
 ; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <2 x i64> @shuffle_undef_v2i64(<2 x i64> %x, <2 x i64> %y) {
@@ -934,9 +934,9 @@ define <4 x float> @replace_zero_v4f32(<4 x float> %v, float %x) {
 }
 
 ; CHECK-LABEL: shuffle_v4f32:
-; NO-SIMD128-NOT: v8x16
+; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shuffle_v4f32 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
+; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
 ; SIMD128-SAME: 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <4 x float> @shuffle_v4f32(<4 x float> %x, <4 x float> %y) {
@@ -946,9 +946,9 @@ define <4 x float> @shuffle_v4f32(<4 x float> %x, <4 x float> %y) {
 }
 
 ; CHECK-LABEL: shuffle_undef_v4f32:
-; NO-SIMD128-NOT: v8x16
+; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shuffle_undef_v4f32 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
+; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
 ; SIMD128-SAME: 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) {
@@ -1085,9 +1085,9 @@ define <2 x double> @replace_zero_v2f64(<2 x double> %v, double %x) {
 }
 
 ; CHECK-LABEL: shuffle_v2f64:
-; NO-SIMD128-NOT: v8x16
+; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shuffle_v2f64 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
+; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
 ; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <2 x double> @shuffle_v2f64(<2 x double> %x, <2 x double> %y) {
@@ -1097,9 +1097,9 @@ define <2 x double> @shuffle_v2f64(<2 x double> %x, <2 x double> %y) {
 }
 
 ; CHECK-LABEL: shuffle_undef_v2f64:
-; NO-SIMD128-NOT: v8x16
+; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shuffle_undef_v2f64 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
+; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
 ; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <2 x double> @shuffle_undef_v2f64(<2 x double> %x, <2 x double> %y) {
diff --git a/llvm/test/MC/Disassembler/WebAssembly/wasm.txt b/llvm/test/MC/Disassembler/WebAssembly/wasm.txt
index bb50b646ab549..8201213e54b26 100644
--- a/llvm/test/MC/Disassembler/WebAssembly/wasm.txt
+++ b/llvm/test/MC/Disassembler/WebAssembly/wasm.txt
@@ -36,7 +36,7 @@
 # CHECK: v128.const 50462976, 117835012, 185207048, 252579084
 0xFD 0x0C 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F
 
-# CHECK: v8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+# CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 0xFD 0x0D 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F
 
 # Check LEB128 encoding of SIMD instructions
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 509f4246475f0..91e1f07fe44c4 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -24,17 +24,17 @@ main:
     # CHECK: i64x2.load32x2_u 32 # encoding: [0xfd,0x06,0x03,0x20]
     i64x2.load32x2_u 32
 
-    # CHECK: v8x16.load_splat 48 # encoding: [0xfd,0x07,0x00,0x30]
-    v8x16.load_splat 48
+    # CHECK: v128.load8_splat 48 # encoding: [0xfd,0x07,0x00,0x30]
+    v128.load8_splat 48
 
-    # CHECK: v16x8.load_splat 48 # encoding: [0xfd,0x08,0x01,0x30]
-    v16x8.load_splat 48
+    # CHECK: v128.load16_splat 48 # encoding: [0xfd,0x08,0x01,0x30]
+    v128.load16_splat 48
 
-    # CHECK: v32x4.load_splat 48 # encoding: [0xfd,0x09,0x02,0x30]
-    v32x4.load_splat 48
+    # CHECK: v128.load32_splat 48 # encoding: [0xfd,0x09,0x02,0x30]
+    v128.load32_splat 48
 
-    # CHECK: v64x2.load_splat 48 # encoding: [0xfd,0x0a,0x03,0x30]
-    v64x2.load_splat 48
+    # CHECK: v128.load64_splat 48 # encoding: [0xfd,0x0a,0x03,0x30]
+    v128.load64_splat 48
 
     # CHECK: v128.store 48 # encoding: [0xfd,0x0b,0x04,0x30]
     v128.store 48
@@ -66,15 +66,15 @@ main:
     # CHECK-SAME: 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f]
     v128.const 0x1.60504030201p-911, 0x1.e0d0c0b0a0908p-783
 
-    # CHECK: v8x16.shuffle 0, 17, 2, 19, 4, 21, 6, 23,
+    # CHECK: i8x16.shuffle 0, 17, 2, 19, 4, 21, 6, 23,
     # CHECK-SAME:          8, 25, 10, 27, 12, 29, 14, 31
     # CHECK-SAME: # encoding: [0xfd,0x0d,
     # CHECK-SAME: 0x00,0x11,0x02,0x13,0x04,0x15,0x06,0x17,
     # CHECK-SAME: 0x08,0x19,0x0a,0x1b,0x0c,0x1d,0x0e,0x1f]
-    v8x16.shuffle 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31
+    i8x16.shuffle 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31
 
-    # CHECK: v8x16.swizzle # encoding: [0xfd,0x0e]
-    v8x16.swizzle
+    # CHECK: i8x16.swizzle # encoding: [0xfd,0x0e]
+    i8x16.swizzle
 
     # CHECK: i8x16.splat # encoding: [0xfd,0x0f]
     i8x16.splat

From 8de43b926f0e960bbc5b6a53d1b613c46b7c774b Mon Sep 17 00:00:00 2001
From: Alex Zinenko <zinenko@google.com>
Date: Tue, 22 Dec 2020 11:22:21 +0100
Subject: [PATCH 08/10] [mlir] Remove instance methods from LLVMType

LLVMType contains multiple instance methods that were introduced initially for
compatibility with LLVM API. These methods boil down to `cast` followed by
type-specific call. Arguably, they are mostly used in an LLVM cast-follows-isa
anti-pattern. This doesn't connect nicely to the rest of the MLIR
infrastructure and actively prevents it from making the LLVM dialect type
system more open, e.g., reusing built-in types when appropriate. Remove such
instance methods and replaces their uses with apporpriate casts and methods on
derived classes. In some cases, the result may look slightly more verbose, but
most cases should actually use a stricter subtype of LLVMType anyway and avoid
the isa/cast.

Reviewed By: mehdi_amini

Differential Revision: https://reviews.llvm.org/D93680
---
 .../StandardToLLVM/ConvertStandardToLLVM.h    |   3 +-
 mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td   |  28 ++-
 mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h  |  75 ++-----
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td   |   9 +-
 .../Conversion/AsyncToLLVM/AsyncToLLVM.cpp    |  10 +-
 .../ConvertLaunchFuncToRuntimeCalls.cpp       |  15 +-
 .../lib/Conversion/GPUCommon/GPUOpsLowering.h |  18 +-
 .../GPUCommon/OpToFuncCallLowering.h          |   9 +-
 .../ConvertLaunchFuncToVulkanCalls.cpp        |  45 ++--
 .../SPIRVToLLVM/ConvertSPIRVToLLVM.cpp        |   8 +-
 .../StandardToLLVM/StandardToLLVM.cpp         | 159 +++++++------
 .../VectorToLLVM/ConvertVectorToLLVM.cpp      |  22 +-
 .../VectorToROCDL/VectorToROCDL.cpp           |   6 +-
 mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp    | 211 +++++++++---------
 mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp      | 170 ++++----------
 mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp    |   5 +-
 mlir/lib/ExecutionEngine/JitRunner.cpp        |  19 +-
 mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp  |  45 ++--
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp  |   4 +-
 mlir/test/Dialect/LLVMIR/invalid.mlir         |   8 +-
 20 files changed, 402 insertions(+), 467 deletions(-)

diff --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
index 7c069c9cd5566..63ff16a84ab87 100644
--- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
+++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
@@ -446,7 +446,8 @@ class UnrankedMemRefDescriptor : public StructBuilder {
   /// Builds IR extracting the pointer to the first element of the size array.
   static Value sizeBasePtr(OpBuilder &builder, Location loc,
                            LLVMTypeConverter &typeConverter,
-                           Value memRefDescPtr, LLVM::LLVMType elemPtrPtrType);
+                           Value memRefDescPtr,
+                           LLVM::LLVMPointerType elemPtrPtrType);
   /// Builds IR extracting the size[index] from the descriptor.
   static Value size(OpBuilder &builder, Location loc,
                     LLVMTypeConverter typeConverter, Value sizeBasePtr,
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index df022ef47b331..552fe15e68997 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -51,7 +51,7 @@ def LLVM_VoidResultTypeOpBuilder :
   [{
     auto llvmType = resultType.dyn_cast<LLVMType>(); (void)llvmType;
     assert(llvmType && "result must be an LLVM type");
-    assert(llvmType.isVoidTy() &&
+    assert(llvmType.isa<LLVMVoidType>() &&
            "for zero-result operands, only 'void' is accepted as result type");
     build($_builder, $_state, operands, attributes);
   }]>;
@@ -288,7 +288,7 @@ def LLVM_LoadOp : LLVM_Op<"load">, MemoryOpWithAlignmentAndAttributes {
     OpBuilderDAG<(ins "Value":$addr, CArg<"unsigned", "0">:$alignment,
       CArg<"bool", "false">:$isVolatile, CArg<"bool", "false">:$isNonTemporal),
     [{
-      auto type = addr.getType().cast<LLVMType>().getPointerElementTy();
+      auto type = addr.getType().cast<LLVMPointerType>().getElementType();
       build($_builder, $_state, type, addr, alignment, isVolatile, isNonTemporal);
     }]>,
     OpBuilderDAG<(ins "Type":$t, "Value":$addr,
@@ -443,8 +443,8 @@ def LLVM_CallOp : LLVM_Op<"call"> {
     OpBuilderDAG<(ins "LLVMFuncOp":$func, "ValueRange":$operands,
       CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes),
     [{
-      LLVMType resultType = func.getType().getFunctionResultType();
-      if (!resultType.isVoidTy())
+      LLVMType resultType = func.getType().getReturnType();
+      if (!resultType.isa<LLVM::LLVMVoidType>())
         $_state.addTypes(resultType);
       $_state.addAttribute("callee", $_builder.getSymbolRefAttr(func));
       $_state.addAttributes(attributes);
@@ -515,12 +515,10 @@ def LLVM_ShuffleVectorOp : LLVM_Op<"shufflevector", [NoSideEffect]> {
     OpBuilderDAG<(ins "Value":$v1, "Value":$v2, "ArrayAttr":$mask,
       CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>];
   let verifier = [{
-    auto wrappedVectorType1 = v1().getType().cast<LLVMType>();
-    auto wrappedVectorType2 = v2().getType().cast<LLVMType>();
-    if (!wrappedVectorType2.isVectorTy())
-      return emitOpError("expected LLVM IR Dialect vector type for operand #2");
-    if (wrappedVectorType1.getVectorElementType() !=
-        wrappedVectorType2.getVectorElementType())
+    auto wrappedVectorType1 = v1().getType().cast<LLVMVectorType>();
+    auto wrappedVectorType2 = v2().getType().cast<LLVMVectorType>();
+    if (wrappedVectorType1.getElementType() !=
+        wrappedVectorType2.getElementType())
       return emitOpError("expected matching LLVM IR Dialect element types");
     return success();
   }];
@@ -768,13 +766,13 @@ def LLVM_AddressOfOp : LLVM_Op<"mlir.addressof"> {
       CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs),
     [{
       build($_builder, $_state,
-            global.getType().getPointerTo(global.addr_space()),
+            LLVM::LLVMPointerType::get(global.getType(), global.addr_space()),
             global.sym_name(), attrs);}]>,
     OpBuilderDAG<(ins "LLVMFuncOp":$func,
       CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs),
     [{
       build($_builder, $_state,
-            func.getType().getPointerTo(), func.getName(), attrs);}]>
+            LLVM::LLVMPointerType::get(func.getType()), func.getName(), attrs);}]>
   ];
 
   let extraClassDeclaration = [{
@@ -970,12 +968,12 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func",
     // to match the signature of the function.
     Block *addEntryBlock();
 
-    LLVMType getType() {
+    LLVMFunctionType getType() {
       return (*this)->getAttrOfType<TypeAttr>(getTypeAttrName())
-          .getValue().cast<LLVMType>();
+          .getValue().cast<LLVMFunctionType>();
     }
     bool isVarArg() {
-      return getType().isFunctionVarArg();
+      return getType().isVarArg();
     }
 
     // Hook for OpTrait::FunctionLike, returns the number of function arguments`.
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h
index f92bdf9e3041a..e1938c12c809e 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h
@@ -80,58 +80,6 @@ class LLVMType : public Type {
 
   LLVMDialect &getDialect();
 
-  /// Returns the size of a primitive type (including vectors) in bits, for
-  /// example, the size of !llvm.i16 is 16 and the size of !llvm.vec<4 x i16>
-  /// is 64. Returns 0 for non-primitive (aggregates such as struct) or types
-  /// that don't have a size (such as void).
-  llvm::TypeSize getPrimitiveSizeInBits();
-
-  /// Floating-point type utilities.
-  bool isBFloatTy() { return isa<LLVMBFloatType>(); }
-  bool isHalfTy() { return isa<LLVMHalfType>(); }
-  bool isFloatTy() { return isa<LLVMFloatType>(); }
-  bool isDoubleTy() { return isa<LLVMDoubleType>(); }
-  bool isFP128Ty() { return isa<LLVMFP128Type>(); }
-  bool isX86_FP80Ty() { return isa<LLVMX86FP80Type>(); }
-  bool isFloatingPointTy() {
-    return isa<LLVMHalfType>() || isa<LLVMBFloatType>() ||
-           isa<LLVMFloatType>() || isa<LLVMDoubleType>() ||
-           isa<LLVMFP128Type>() || isa<LLVMX86FP80Type>();
-  }
-
-  /// Array type utilities.
-  LLVMType getArrayElementType();
-  unsigned getArrayNumElements();
-  bool isArrayTy();
-
-  /// Integer type utilities.
-  bool isIntegerTy() { return isa<LLVMIntegerType>(); }
-  bool isIntegerTy(unsigned bitwidth);
-  unsigned getIntegerBitWidth();
-
-  /// Vector type utilities.
-  LLVMType getVectorElementType();
-  unsigned getVectorNumElements();
-  llvm::ElementCount getVectorElementCount();
-  bool isVectorTy();
-
-  /// Function type utilities.
-  LLVMType getFunctionParamType(unsigned argIdx);
-  unsigned getFunctionNumParams();
-  LLVMType getFunctionResultType();
-  bool isFunctionTy();
-  bool isFunctionVarArg();
-
-  /// Pointer type utilities.
-  LLVMType getPointerTo(unsigned addrSpace = 0);
-  LLVMType getPointerElementTy();
-  bool isPointerTy();
-
-  /// Struct type utilities.
-  LLVMType getStructElementType(unsigned i);
-  unsigned getStructNumElements();
-  bool isStructTy();
-
   /// Utilities used to generate floating point types.
   static LLVMType getDoubleTy(MLIRContext *context);
   static LLVMType getFloatTy(MLIRContext *context);
@@ -148,9 +96,7 @@ class LLVMType : public Type {
   static LLVMType getInt8Ty(MLIRContext *context) {
     return getIntNTy(context, /*numBits=*/8);
   }
-  static LLVMType getInt8PtrTy(MLIRContext *context) {
-    return getInt8Ty(context).getPointerTo();
-  }
+  static LLVMType getInt8PtrTy(MLIRContext *context);
   static LLVMType getInt16Ty(MLIRContext *context) {
     return getIntNTy(context, /*numBits=*/16);
   }
@@ -184,7 +130,6 @@ class LLVMType : public Type {
 
   /// Void type utilities.
   static LLVMType getVoidTy(MLIRContext *context);
-  bool isVoidTy();
 
   // Creation and setting of LLVM's identified struct types
   static LLVMType createStructTy(MLIRContext *context,
@@ -585,6 +530,24 @@ LLVMType parseType(DialectAsmParser &parser);
 void printType(LLVMType type, DialectAsmPrinter &printer);
 } // namespace detail
 
+//===----------------------------------------------------------------------===//
+// Utility functions.
+//===----------------------------------------------------------------------===//
+
+/// Returns `true` if the given type is compatible with the LLVM dialect.
+inline bool isCompatibleType(Type type) { return type.isa<LLVMType>(); }
+
+inline bool isCompatibleFloatingPointType(Type type) {
+  return type.isa<LLVMHalfType, LLVMBFloatType, LLVMFloatType, LLVMDoubleType,
+                  LLVMFP128Type, LLVMX86FP80Type>();
+}
+
+/// Returns the size of the given primitive LLVM dialect-compatible type
+/// (including vectors) in bits, for example, the size of !llvm.i16 is 16 and
+/// the size of !llvm.vec<4 x i16> is 64. Returns 0 for non-primitive
+/// (aggregates such as struct) or types that don't have a size (such as void).
+llvm::TypeSize getPrimitiveTypeSizeInBits(Type type);
+
 } // namespace LLVM
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 1f9b860eb52eb..3c73cdf64eb70 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -109,10 +109,11 @@ def NVVM_ShflBflyOp :
   let verifier = [{
     if (!(*this)->getAttrOfType<UnitAttr>("return_value_and_is_valid"))
       return success();
-    auto type = getType().cast<LLVM::LLVMType>();
-    if (!type.isStructTy() || type.getStructNumElements() != 2 ||
-        !type.getStructElementType(1).isIntegerTy(
-            /*Bitwidth=*/1))
+    auto type = getType().dyn_cast<LLVM::LLVMStructType>();
+    auto elementType = (type && type.getBody().size() == 2)
+                     ? type.getBody()[1].dyn_cast<LLVM::LLVMIntegerType>()
+                     : nullptr;
+    if (!elementType || elementType.getBitWidth() != 1)
       return emitError("expected return type to be a two-element struct with "
                        "i1 as the second element");
     return success();
diff --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
index 273754fe2480c..65545d8ab2de1 100644
--- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
+++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
@@ -79,7 +79,7 @@ struct AsyncAPI {
 
   static FunctionType executeFunctionType(MLIRContext *ctx) {
     auto hdl = LLVM::LLVMType::getInt8PtrTy(ctx);
-    auto resume = resumeFunctionType(ctx).getPointerTo();
+    auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx));
     return FunctionType::get(ctx, {hdl, resume}, {});
   }
 
@@ -91,13 +91,13 @@ struct AsyncAPI {
 
   static FunctionType awaitAndExecuteFunctionType(MLIRContext *ctx) {
     auto hdl = LLVM::LLVMType::getInt8PtrTy(ctx);
-    auto resume = resumeFunctionType(ctx).getPointerTo();
+    auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx));
     return FunctionType::get(ctx, {TokenType::get(ctx), hdl, resume}, {});
   }
 
   static FunctionType awaitAllAndExecuteFunctionType(MLIRContext *ctx) {
     auto hdl = LLVM::LLVMType::getInt8PtrTy(ctx);
-    auto resume = resumeFunctionType(ctx).getPointerTo();
+    auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx));
     return FunctionType::get(ctx, {GroupType::get(ctx), hdl, resume}, {});
   }
 
@@ -507,7 +507,7 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) {
   // A pointer to coroutine resume intrinsic wrapper.
   auto resumeFnTy = AsyncAPI::resumeFunctionType(ctx);
   auto resumePtr = builder.create<LLVM::AddressOfOp>(
-      loc, resumeFnTy.getPointerTo(), kResume);
+      loc, LLVM::LLVMPointerType::get(resumeFnTy), kResume);
 
   // Save the coroutine state: @llvm.coro.save
   auto coroSave = builder.create<LLVM::CallOp>(
@@ -750,7 +750,7 @@ class AwaitOpLoweringBase : public ConversionPattern {
       // A pointer to coroutine resume intrinsic wrapper.
       auto resumeFnTy = AsyncAPI::resumeFunctionType(ctx);
       auto resumePtr = builder.create<LLVM::AddressOfOp>(
-          loc, resumeFnTy.getPointerTo(), kResume);
+          loc, LLVM::LLVMPointerType::get(resumeFnTy), kResume);
 
       // Save the coroutine state: @llvm.coro.save
       auto coroSave = builder.create<LLVM::CallOp>(
diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
index 41a079c44eea5..bbb2bf1e04ff2 100644
--- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
+++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
@@ -55,14 +55,14 @@ class FunctionCallBuilder {
   FunctionCallBuilder(StringRef functionName, LLVM::LLVMType returnType,
                       ArrayRef<LLVM::LLVMType> argumentTypes)
       : functionName(functionName),
-        functionType(LLVM::LLVMType::getFunctionTy(returnType, argumentTypes,
-                                                   /*isVarArg=*/false)) {}
+        functionType(LLVM::LLVMFunctionType::get(returnType, argumentTypes,
+                                                 /*isVarArg=*/false)) {}
   LLVM::CallOp create(Location loc, OpBuilder &builder,
                       ArrayRef<Value> arguments) const;
 
 private:
   StringRef functionName;
-  LLVM::LLVMType functionType;
+  LLVM::LLVMFunctionType functionType;
 };
 
 template <typename OpTy>
@@ -76,7 +76,8 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern<OpTy> {
 
   LLVM::LLVMType llvmVoidType = LLVM::LLVMType::getVoidTy(context);
   LLVM::LLVMType llvmPointerType = LLVM::LLVMType::getInt8PtrTy(context);
-  LLVM::LLVMType llvmPointerPointerType = llvmPointerType.getPointerTo();
+  LLVM::LLVMType llvmPointerPointerType =
+      LLVM::LLVMPointerType::get(llvmPointerType);
   LLVM::LLVMType llvmInt8Type = LLVM::LLVMType::getInt8Ty(context);
   LLVM::LLVMType llvmInt32Type = LLVM::LLVMType::getInt32Ty(context);
   LLVM::LLVMType llvmInt64Type = LLVM::LLVMType::getInt64Ty(context);
@@ -312,7 +313,7 @@ LLVM::CallOp FunctionCallBuilder::create(Location loc, OpBuilder &builder,
         .create<LLVM::LLVMFuncOp>(loc, functionName, functionType);
   }();
   return builder.create<LLVM::CallOp>(
-      loc, const_cast<LLVM::LLVMType &>(functionType).getFunctionResultType(),
+      loc, const_cast<LLVM::LLVMFunctionType &>(functionType).getReturnType(),
       builder.getSymbolRefAttr(function), arguments);
 }
 
@@ -518,7 +519,7 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray(
   auto one = builder.create<LLVM::ConstantOp>(loc, llvmInt32Type,
                                               builder.getI32IntegerAttr(1));
   auto structPtr = builder.create<LLVM::AllocaOp>(
-      loc, structType.getPointerTo(), one, /*alignment=*/0);
+      loc, LLVM::LLVMPointerType::get(structType), one, /*alignment=*/0);
   auto arraySize = builder.create<LLVM::ConstantOp>(
       loc, llvmInt32Type, builder.getI32IntegerAttr(numArguments));
   auto arrayPtr = builder.create<LLVM::AllocaOp>(loc, llvmPointerPointerType,
@@ -529,7 +530,7 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray(
     auto index = builder.create<LLVM::ConstantOp>(
         loc, llvmInt32Type, builder.getI32IntegerAttr(en.index()));
     auto fieldPtr = builder.create<LLVM::GEPOp>(
-        loc, argumentTypes[en.index()].getPointerTo(), structPtr,
+        loc, LLVM::LLVMPointerType::get(argumentTypes[en.index()]), structPtr,
         ArrayRef<Value>{zero, index.getResult()});
     builder.create<LLVM::StoreOp>(loc, en.value(), fieldPtr);
     auto elementPtr = builder.create<LLVM::GEPOp>(loc, llvmPointerPointerType,
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
index bf17200e594f1..914b7ee50cf93 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
@@ -51,8 +51,8 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
 
     // Rewrite the original GPU function to an LLVM function.
     auto funcType = typeConverter->convertType(gpuFuncOp.getType())
-                        .template cast<LLVM::LLVMType>()
-                        .getPointerElementTy();
+                        .template cast<LLVM::LLVMPointerType>()
+                        .getElementType();
 
     // Remap proper input types.
     TypeConverter::SignatureConversion signatureConversion(
@@ -94,10 +94,11 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
       for (auto en : llvm::enumerate(workgroupBuffers)) {
         LLVM::GlobalOp global = en.value();
         Value address = rewriter.create<LLVM::AddressOfOp>(loc, global);
-        auto elementType = global.getType().getArrayElementType();
+        auto elementType =
+            global.getType().cast<LLVM::LLVMArrayType>().getElementType();
         Value memory = rewriter.create<LLVM::GEPOp>(
-            loc, elementType.getPointerTo(global.addr_space()), address,
-            ArrayRef<Value>{zero, zero});
+            loc, LLVM::LLVMPointerType::get(elementType, global.addr_space()),
+            address, ArrayRef<Value>{zero, zero});
 
         // Build a memref descriptor pointing to the buffer to plug with the
         // existing memref infrastructure. This may use more registers than
@@ -123,9 +124,10 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
         // Explicitly drop memory space when lowering private memory
         // attributions since NVVM models it as `alloca`s in the default
         // memory space and does not support `alloca`s with addrspace(5).
-        auto ptrType = typeConverter->convertType(type.getElementType())
-                           .template cast<LLVM::LLVMType>()
-                           .getPointerTo(AllocaAddrSpace);
+        auto ptrType = LLVM::LLVMPointerType::get(
+            typeConverter->convertType(type.getElementType())
+                .template cast<LLVM::LLVMType>(),
+            AllocaAddrSpace);
         Value numElements = rewriter.create<LLVM::ConstantOp>(
             gpuFuncOp.getLoc(), int64Ty,
             rewriter.getI64IntegerAttr(type.getNumElements()));
diff --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
index 9d08aeee19061..b2887aa1d7829 100644
--- a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
@@ -57,7 +57,8 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
     LLVMType resultType =
         castedOperands.front().getType().cast<LLVM::LLVMType>();
     LLVMType funcType = getFunctionType(resultType, castedOperands);
-    StringRef funcName = getFunctionName(funcType.getFunctionResultType());
+    StringRef funcName = getFunctionName(
+        funcType.cast<LLVM::LLVMFunctionType>().getReturnType());
     if (funcName.empty())
       return failure();
 
@@ -80,7 +81,7 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
 private:
   Value maybeCast(Value operand, PatternRewriter &rewriter) const {
     LLVM::LLVMType type = operand.getType().cast<LLVM::LLVMType>();
-    if (!type.isHalfTy())
+    if (!type.isa<LLVM::LLVMHalfType>())
       return operand;
 
     return rewriter.create<LLVM::FPExtOp>(
@@ -100,9 +101,9 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
   }
 
   StringRef getFunctionName(LLVM::LLVMType type) const {
-    if (type.isFloatTy())
+    if (type.isa<LLVM::LLVMFloatType>())
       return f32Func;
-    if (type.isDoubleTy())
+    if (type.isa<LLVM::LLVMDoubleType>())
       return f64Func;
     return "";
   }
diff --git a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp
index 355bced96ae75..c676cd256d66a 100644
--- a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp
+++ b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp
@@ -75,7 +75,7 @@ class VulkanLaunchFuncToVulkanCallsPass
     //   int64_t sizes[Rank]; // omitted when rank == 0
     //   int64_t strides[Rank]; // omitted when rank == 0
     // };
-    auto llvmPtrToElementType = elemenType.getPointerTo();
+    auto llvmPtrToElementType = LLVM::LLVMPointerType::get(elemenType);
     auto llvmArrayRankElementSizeType =
         LLVM::LLVMType::getArrayTy(getInt64Type(), rank);
 
@@ -131,16 +131,18 @@ class VulkanLaunchFuncToVulkanCallsPass
 
   /// Returns a string representation from the given `type`.
   StringRef stringifyType(LLVM::LLVMType type) {
-    if (type.isFloatTy())
+    if (type.isa<LLVM::LLVMFloatType>())
       return "Float";
-    if (type.isHalfTy())
+    if (type.isa<LLVM::LLVMHalfType>())
       return "Half";
-    if (type.isIntegerTy(32))
-      return "Int32";
-    if (type.isIntegerTy(16))
-      return "Int16";
-    if (type.isIntegerTy(8))
-      return "Int8";
+    if (auto intType = type.dyn_cast<LLVM::LLVMIntegerType>()) {
+      if (intType.getBitWidth() == 32)
+        return "Int32";
+      if (intType.getBitWidth() == 16)
+        return "Int16";
+      if (intType.getBitWidth() == 8)
+        return "Int8";
+    }
 
     llvm_unreachable("unsupported type");
   }
@@ -238,11 +240,11 @@ void VulkanLaunchFuncToVulkanCallsPass::createBindMemRefCalls(
         llvm::formatv("bindMemRef{0}D{1}", rank, stringifyType(type)).str();
     // Special case for fp16 type. Since it is not a supported type in C we use
     // int16_t and bitcast the descriptor.
-    if (type.isHalfTy()) {
+    if (type.isa<LLVM::LLVMHalfType>()) {
       auto memRefTy =
           getMemRefType(rank, LLVM::LLVMType::getInt16Ty(&getContext()));
       ptrToMemRefDescriptor = builder.create<LLVM::BitcastOp>(
-          loc, memRefTy.getPointerTo(), ptrToMemRefDescriptor);
+          loc, LLVM::LLVMPointerType::get(memRefTy), ptrToMemRefDescriptor);
     }
     // Create call to `bindMemRef`.
     builder.create<LLVM::CallOp>(
@@ -257,11 +259,12 @@ void VulkanLaunchFuncToVulkanCallsPass::createBindMemRefCalls(
 LogicalResult VulkanLaunchFuncToVulkanCallsPass::deduceMemRefRankAndType(
     Value ptrToMemRefDescriptor, uint32_t &rank, LLVM::LLVMType &type) {
   auto llvmPtrDescriptorTy =
-      ptrToMemRefDescriptor.getType().dyn_cast<LLVM::LLVMType>();
+      ptrToMemRefDescriptor.getType().dyn_cast<LLVM::LLVMPointerType>();
   if (!llvmPtrDescriptorTy)
     return failure();
 
-  auto llvmDescriptorTy = llvmPtrDescriptorTy.getPointerElementTy();
+  auto llvmDescriptorTy =
+      llvmPtrDescriptorTy.getElementType().dyn_cast<LLVM::LLVMStructType>();
   // template <typename Elem, size_t Rank>
   // struct {
   //   Elem *allocated;
@@ -270,15 +273,19 @@ LogicalResult VulkanLaunchFuncToVulkanCallsPass::deduceMemRefRankAndType(
   //   int64_t sizes[Rank]; // omitted when rank == 0
   //   int64_t strides[Rank]; // omitted when rank == 0
   // };
-  if (!llvmDescriptorTy || !llvmDescriptorTy.isStructTy())
+  if (!llvmDescriptorTy)
     return failure();
 
-  type = llvmDescriptorTy.getStructElementType(0).getPointerElementTy();
-  if (llvmDescriptorTy.getStructNumElements() == 3) {
+  type = llvmDescriptorTy.getBody()[0]
+             .cast<LLVM::LLVMPointerType>()
+             .getElementType();
+  if (llvmDescriptorTy.getBody().size() == 3) {
     rank = 0;
     return success();
   }
-  rank = llvmDescriptorTy.getStructElementType(3).getArrayNumElements();
+  rank = llvmDescriptorTy.getBody()[3]
+             .cast<LLVM::LLVMArrayType>()
+             .getNumElements();
   return success();
 }
 
@@ -326,13 +333,13 @@ void VulkanLaunchFuncToVulkanCallsPass::declareVulkanFunctions(Location loc) {
                                 LLVM::LLVMType::getHalfTy(&getContext())}) {
       std::string fnName = "bindMemRef" + std::to_string(i) + "D" +
                            std::string(stringifyType(type));
-      if (type.isHalfTy())
+      if (type.isa<LLVM::LLVMHalfType>())
         type = LLVM::LLVMType::getInt16Ty(&getContext());
       if (!module.lookupSymbol(fnName)) {
         auto fnType = LLVM::LLVMType::getFunctionTy(
             getVoidType(),
             {getPointerType(), getInt32Type(), getInt32Type(),
-             getMemRefType(i, type).getPointerTo()},
+             LLVM::LLVMPointerType::get(getMemRefType(i, type))},
             /*isVarArg=*/false);
         builder.create<LLVM::LLVMFuncOp>(loc, fnName, fnType);
       }
diff --git a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp
index cacb4787edd4e..7da9c47f92199 100644
--- a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp
+++ b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp
@@ -66,8 +66,10 @@ static unsigned getBitWidth(Type type) {
 
 /// Returns the bit width of LLVMType integer or vector.
 static unsigned getLLVMTypeBitWidth(LLVM::LLVMType type) {
-  return type.isVectorTy() ? type.getVectorElementType().getIntegerBitWidth()
-                           : type.getIntegerBitWidth();
+  auto vectorType = type.dyn_cast<LLVM::LLVMVectorType>();
+  return (vectorType ? vectorType.getElementType() : type)
+      .cast<LLVM::LLVMIntegerType>()
+      .getBitWidth();
 }
 
 /// Creates `IntegerAttribute` with all bits set for given type
@@ -265,7 +267,7 @@ static Type convertPointerType(spirv::PointerType type,
                                TypeConverter &converter) {
   auto pointeeType =
       converter.convertType(type.getPointeeType()).cast<LLVM::LLVMType>();
-  return pointeeType.getPointerTo();
+  return LLVM::LLVMPointerType::get(pointeeType);
 }
 
 /// Converts SPIR-V runtime array to LLVM array. Since LLVM allows indexing over
diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
index 6fbcc220a86b0..e37e7e2dc0c11 100644
--- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
+++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
@@ -215,7 +215,7 @@ Type LLVMTypeConverter::convertFunctionType(FunctionType type) {
   SignatureConversion conversion(type.getNumInputs());
   LLVM::LLVMType converted =
       convertFunctionSignature(type, /*isVariadic=*/false, conversion);
-  return converted.getPointerTo();
+  return LLVM::LLVMPointerType::get(converted);
 }
 
 
@@ -267,7 +267,7 @@ LLVMTypeConverter::convertFunctionTypeCWrapper(FunctionType type) {
     if (!converted)
       return {};
     if (t.isa<MemRefType, UnrankedMemRefType>())
-      converted = converted.getPointerTo();
+      converted = LLVM::LLVMPointerType::get(converted);
     inputs.push_back(converted);
   }
 
@@ -324,7 +324,7 @@ LLVMTypeConverter::getMemRefDescriptorFields(MemRefType type,
   LLVM::LLVMType elementType = unwrap(convertType(type.getElementType()));
   if (!elementType)
     return {};
-  auto ptrTy = elementType.getPointerTo(type.getMemorySpace());
+  auto ptrTy = LLVM::LLVMPointerType::get(elementType, type.getMemorySpace());
   auto indexTy = getIndexType();
 
   SmallVector<LLVM::LLVMType, 5> results = {ptrTy, ptrTy, indexTy};
@@ -396,7 +396,7 @@ Type LLVMTypeConverter::convertMemRefToBarePtr(BaseMemRefType type) {
   LLVM::LLVMType elementType = unwrap(convertType(type.getElementType()));
   if (!elementType)
     return {};
-  return elementType.getPointerTo(type.getMemorySpace());
+  return LLVM::LLVMPointerType::get(elementType, type.getMemorySpace());
 }
 
 // Convert an n-D vector type to an LLVM vector type via (n-1)-D array type when
@@ -460,7 +460,7 @@ StructBuilder::StructBuilder(Value v) : value(v) {
 
 Value StructBuilder::extractPtr(OpBuilder &builder, Location loc,
                                 unsigned pos) {
-  Type type = structType.cast<LLVM::LLVMType>().getStructElementType(pos);
+  Type type = structType.cast<LLVM::LLVMStructType>().getBody()[pos];
   return builder.create<LLVM::ExtractValueOp>(loc, type, value,
                                               builder.getI64ArrayAttr(pos));
 }
@@ -507,8 +507,9 @@ Value ComplexStructBuilder::imaginary(OpBuilder &builder, Location loc) {
 MemRefDescriptor::MemRefDescriptor(Value descriptor)
     : StructBuilder(descriptor) {
   assert(value != nullptr && "value cannot be null");
-  indexType = value.getType().cast<LLVM::LLVMType>().getStructElementType(
-      kOffsetPosInMemRefDescriptor);
+  indexType = value.getType()
+                  .cast<LLVM::LLVMStructType>()
+                  .getBody()[kOffsetPosInMemRefDescriptor];
 }
 
 /// Builds IR creating an `undef` value of the descriptor type.
@@ -618,9 +619,9 @@ Value MemRefDescriptor::size(OpBuilder &builder, Location loc, unsigned pos) {
 Value MemRefDescriptor::size(OpBuilder &builder, Location loc, Value pos,
                              int64_t rank) {
   auto indexTy = indexType.cast<LLVM::LLVMType>();
-  auto indexPtrTy = indexTy.getPointerTo();
+  auto indexPtrTy = LLVM::LLVMPointerType::get(indexTy);
   auto arrayTy = LLVM::LLVMType::getArrayTy(indexTy, rank);
-  auto arrayPtrTy = arrayTy.getPointerTo();
+  auto arrayPtrTy = LLVM::LLVMPointerType::get(arrayTy);
 
   // Copy size values to stack-allocated memory.
   auto zero = createIndexAttrConstant(builder, loc, indexType, 0);
@@ -675,8 +676,8 @@ void MemRefDescriptor::setConstantStride(OpBuilder &builder, Location loc,
 
 LLVM::LLVMPointerType MemRefDescriptor::getElementPtrType() {
   return value.getType()
-      .cast<LLVM::LLVMType>()
-      .getStructElementType(kAlignedPtrPosInMemRefDescriptor)
+      .cast<LLVM::LLVMStructType>()
+      .getBody()[kAlignedPtrPosInMemRefDescriptor]
       .cast<LLVM::LLVMPointerType>();
 }
 
@@ -922,7 +923,7 @@ Value UnrankedMemRefDescriptor::offset(OpBuilder &builder, Location loc,
   Value offsetGep = builder.create<LLVM::GEPOp>(
       loc, elemPtrPtrType, elementPtrPtr, ValueRange({two}));
   offsetGep = builder.create<LLVM::BitcastOp>(
-      loc, typeConverter.getIndexType().getPointerTo(), offsetGep);
+      loc, LLVM::LLVMPointerType::get(typeConverter.getIndexType()), offsetGep);
   return builder.create<LLVM::LoadOp>(loc, offsetGep);
 }
 
@@ -939,19 +940,17 @@ void UnrankedMemRefDescriptor::setOffset(OpBuilder &builder, Location loc,
   Value offsetGep = builder.create<LLVM::GEPOp>(
       loc, elemPtrPtrType, elementPtrPtr, ValueRange({two}));
   offsetGep = builder.create<LLVM::BitcastOp>(
-      loc, typeConverter.getIndexType().getPointerTo(), offsetGep);
+      loc, LLVM::LLVMPointerType::get(typeConverter.getIndexType()), offsetGep);
   builder.create<LLVM::StoreOp>(loc, offset, offsetGep);
 }
 
-Value UnrankedMemRefDescriptor::sizeBasePtr(OpBuilder &builder, Location loc,
-                                            LLVMTypeConverter &typeConverter,
-                                            Value memRefDescPtr,
-                                            LLVM::LLVMType elemPtrPtrType) {
-  LLVM::LLVMType elemPtrTy = elemPtrPtrType.getPointerElementTy();
+Value UnrankedMemRefDescriptor::sizeBasePtr(
+    OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter,
+    Value memRefDescPtr, LLVM::LLVMPointerType elemPtrPtrType) {
+  LLVM::LLVMType elemPtrTy = elemPtrPtrType.getElementType();
   LLVM::LLVMType indexTy = typeConverter.getIndexType();
-  LLVM::LLVMType structPtrTy =
-      LLVM::LLVMType::getStructTy(elemPtrTy, elemPtrTy, indexTy, indexTy)
-          .getPointerTo();
+  LLVM::LLVMType structPtrTy = LLVM::LLVMPointerType::get(
+      LLVM::LLVMType::getStructTy(elemPtrTy, elemPtrTy, indexTy, indexTy));
   Value structPtr =
       builder.create<LLVM::BitcastOp>(loc, structPtrTy, memRefDescPtr);
 
@@ -961,14 +960,15 @@ Value UnrankedMemRefDescriptor::sizeBasePtr(OpBuilder &builder, Location loc,
       createIndexAttrConstant(builder, loc, typeConverter.getIndexType(), 0);
   Value three = builder.create<LLVM::ConstantOp>(loc, int32_type,
                                                  builder.getI32IntegerAttr(3));
-  return builder.create<LLVM::GEPOp>(loc, indexTy.getPointerTo(), structPtr,
-                                     ValueRange({zero, three}));
+  return builder.create<LLVM::GEPOp>(loc, LLVM::LLVMPointerType::get(indexTy),
+                                     structPtr, ValueRange({zero, three}));
 }
 
 Value UnrankedMemRefDescriptor::size(OpBuilder &builder, Location loc,
                                      LLVMTypeConverter typeConverter,
                                      Value sizeBasePtr, Value index) {
-  LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo();
+  LLVM::LLVMType indexPtrTy =
+      LLVM::LLVMPointerType::get(typeConverter.getIndexType());
   Value sizeStoreGep = builder.create<LLVM::GEPOp>(loc, indexPtrTy, sizeBasePtr,
                                                    ValueRange({index}));
   return builder.create<LLVM::LoadOp>(loc, sizeStoreGep);
@@ -978,7 +978,8 @@ void UnrankedMemRefDescriptor::setSize(OpBuilder &builder, Location loc,
                                        LLVMTypeConverter typeConverter,
                                        Value sizeBasePtr, Value index,
                                        Value size) {
-  LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo();
+  LLVM::LLVMType indexPtrTy =
+      LLVM::LLVMPointerType::get(typeConverter.getIndexType());
   Value sizeStoreGep = builder.create<LLVM::GEPOp>(loc, indexPtrTy, sizeBasePtr,
                                                    ValueRange({index}));
   builder.create<LLVM::StoreOp>(loc, size, sizeStoreGep);
@@ -987,7 +988,8 @@ void UnrankedMemRefDescriptor::setSize(OpBuilder &builder, Location loc,
 Value UnrankedMemRefDescriptor::strideBasePtr(OpBuilder &builder, Location loc,
                                               LLVMTypeConverter &typeConverter,
                                               Value sizeBasePtr, Value rank) {
-  LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo();
+  LLVM::LLVMType indexPtrTy =
+      LLVM::LLVMPointerType::get(typeConverter.getIndexType());
   return builder.create<LLVM::GEPOp>(loc, indexPtrTy, sizeBasePtr,
                                      ValueRange({rank}));
 }
@@ -996,7 +998,8 @@ Value UnrankedMemRefDescriptor::stride(OpBuilder &builder, Location loc,
                                        LLVMTypeConverter typeConverter,
                                        Value strideBasePtr, Value index,
                                        Value stride) {
-  LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo();
+  LLVM::LLVMType indexPtrTy =
+      LLVM::LLVMPointerType::get(typeConverter.getIndexType());
   Value strideStoreGep = builder.create<LLVM::GEPOp>(
       loc, indexPtrTy, strideBasePtr, ValueRange({index}));
   return builder.create<LLVM::LoadOp>(loc, strideStoreGep);
@@ -1006,7 +1009,8 @@ void UnrankedMemRefDescriptor::setStride(OpBuilder &builder, Location loc,
                                          LLVMTypeConverter typeConverter,
                                          Value strideBasePtr, Value index,
                                          Value stride) {
-  LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo();
+  LLVM::LLVMType indexPtrTy =
+      LLVM::LLVMPointerType::get(typeConverter.getIndexType());
   Value strideStoreGep = builder.create<LLVM::GEPOp>(
       loc, indexPtrTy, strideBasePtr, ValueRange({index}));
   builder.create<LLVM::StoreOp>(loc, stride, strideStoreGep);
@@ -1100,7 +1104,7 @@ bool ConvertToLLVMPattern::isSupportedMemRefType(MemRefType type) const {
 Type ConvertToLLVMPattern::getElementPtrType(MemRefType type) const {
   auto elementType = type.getElementType();
   auto structElementType = unwrap(typeConverter->convertType(elementType));
-  return structElementType.getPointerTo(type.getMemorySpace());
+  return LLVM::LLVMPointerType::get(structElementType, type.getMemorySpace());
 }
 
 void ConvertToLLVMPattern::getMemRefDescriptorSizes(
@@ -1158,8 +1162,8 @@ Value ConvertToLLVMPattern::getSizeInBytes(
   //   %0 = getelementptr %elementType* null, %indexType 1
   //   %1 = ptrtoint %elementType* %0 to %indexType
   // which is a common pattern of getting the size of a type in bytes.
-  auto convertedPtrType =
-      typeConverter->convertType(type).cast<LLVM::LLVMType>().getPointerTo();
+  auto convertedPtrType = LLVM::LLVMPointerType::get(
+      typeConverter->convertType(type).cast<LLVM::LLVMType>());
   auto nullPtr = rewriter.create<LLVM::NullOp>(loc, convertedPtrType);
   auto gep = rewriter.create<LLVM::GEPOp>(
       loc, convertedPtrType,
@@ -1315,7 +1319,8 @@ static void wrapExternalFunction(OpBuilder &builder, Location loc,
                     builder, loc, typeConverter, unrankedMemRefType,
                     wrapperArgsRange.take_front(numToDrop));
 
-      auto ptrTy = packed.getType().cast<LLVM::LLVMType>().getPointerTo();
+      auto ptrTy =
+          LLVM::LLVMPointerType::get(packed.getType().cast<LLVM::LLVMType>());
       Value one = builder.create<LLVM::ConstantOp>(
           loc, typeConverter.convertType(builder.getIndexType()),
           builder.getIntegerAttr(builder.getIndexType(), 1));
@@ -1512,11 +1517,12 @@ static NDVectorTypeInfo extractNDVectorTypeInfo(VectorType vectorType,
     return info;
   info.arraySizes.reserve(vectorType.getRank() - 1);
   auto llvmTy = info.llvmArrayTy;
-  while (llvmTy.isArrayTy()) {
-    info.arraySizes.push_back(llvmTy.getArrayNumElements());
-    llvmTy = llvmTy.getArrayElementType();
+  while (llvmTy.isa<LLVM::LLVMArrayType>()) {
+    info.arraySizes.push_back(
+        llvmTy.cast<LLVM::LLVMArrayType>().getNumElements());
+    llvmTy = llvmTy.cast<LLVM::LLVMArrayType>().getElementType();
   }
-  if (!llvmTy.isVectorTy())
+  if (!llvmTy.isa<LLVM::LLVMVectorType>())
     return info;
   info.llvmVectorTy = llvmTy;
   return info;
@@ -1644,7 +1650,7 @@ LogicalResult LLVM::detail::vectorOneToOneRewrite(
     return failure();
 
   auto llvmArrayTy = operands[0].getType().cast<LLVM::LLVMType>();
-  if (!llvmArrayTy.isArrayTy())
+  if (!llvmArrayTy.isa<LLVM::LLVMArrayType>())
     return oneToOneRewrite(op, targetOp, operands, typeConverter, rewriter);
 
   auto callback = [op, targetOp, &rewriter](LLVM::LLVMType llvmVectorTy,
@@ -2457,13 +2463,14 @@ struct GetGlobalMemrefOpLowering : public AllocLikeOpLowering {
     LLVM::LLVMType arrayTy =
         convertGlobalMemrefTypeToLLVM(type, *getTypeConverter());
     auto addressOf = rewriter.create<LLVM::AddressOfOp>(
-        loc, arrayTy.getPointerTo(memSpace), getGlobalOp.name());
+        loc, LLVM::LLVMPointerType::get(arrayTy, memSpace), getGlobalOp.name());
 
     // Get the address of the first element in the array by creating a GEP with
     // the address of the GV as the base, and (rank + 1) number of 0 indices.
     LLVM::LLVMType elementType =
         unwrap(typeConverter->convertType(type.getElementType()));
-    LLVM::LLVMType elementPtrType = elementType.getPointerTo(memSpace);
+    LLVM::LLVMType elementPtrType =
+        LLVM::LLVMPointerType::get(elementType, memSpace);
 
     SmallVector<Value, 4> operands = {addressOf};
     operands.insert(operands.end(), type.getRank() + 1,
@@ -2504,9 +2511,9 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern<RsqrtOp> {
     auto floatType = getElementTypeOrSelf(resultType).cast<FloatType>();
     auto floatOne = rewriter.getFloatAttr(floatType, 1.0);
 
-    if (!operandType.isArrayTy()) {
+    if (!operandType.isa<LLVM::LLVMArrayType>()) {
       LLVM::ConstantOp one;
-      if (operandType.isVectorTy()) {
+      if (operandType.isa<LLVM::LLVMVectorType>()) {
         one = rewriter.create<LLVM::ConstantOp>(
             loc, operandType,
             SplatElementsAttr::get(resultType.cast<ShapedType>(), floatOne));
@@ -2526,8 +2533,10 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern<RsqrtOp> {
         op.getOperation(), operands, *getTypeConverter(),
         [&](LLVM::LLVMType llvmVectorTy, ValueRange operands) {
           auto splatAttr = SplatElementsAttr::get(
-              mlir::VectorType::get({llvmVectorTy.getVectorNumElements()},
-                                    floatType),
+              mlir::VectorType::get(
+                  {llvmVectorTy.cast<LLVM::LLVMFixedVectorType>()
+                       .getNumElements()},
+                  floatType),
               floatOne);
           auto one =
               rewriter.create<LLVM::ConstantOp>(loc, llvmVectorTy, splatAttr);
@@ -2614,12 +2623,13 @@ struct MemRefCastOpLowering : public ConvertOpToLLVMPattern<MemRefCastOp> {
       // ptr = ExtractValueOp src, 1
       auto ptr = memRefDesc.memRefDescPtr(rewriter, loc);
       // castPtr = BitCastOp i8* to structTy*
-      auto castPtr =
-          rewriter
-              .create<LLVM::BitcastOp>(
-                  loc, targetStructType.cast<LLVM::LLVMType>().getPointerTo(),
-                  ptr)
-              .getResult();
+      auto castPtr = rewriter
+                         .create<LLVM::BitcastOp>(
+                             loc,
+                             LLVM::LLVMPointerType::get(
+                                 targetStructType.cast<LLVM::LLVMType>()),
+                             ptr)
+                         .getResult();
       // struct = LoadOp castPtr
       auto loadOp = rewriter.create<LLVM::LoadOp>(loc, castPtr);
       rewriter.replaceOp(memRefCastOp, loadOp.getResult());
@@ -2654,8 +2664,8 @@ static void extractPointersAndOffset(Location loc,
   Type elementType = operandType.cast<UnrankedMemRefType>().getElementType();
   LLVM::LLVMType llvmElementType =
       unwrap(typeConverter.convertType(elementType));
-  LLVM::LLVMType elementPtrPtrType =
-      llvmElementType.getPointerTo(memorySpace).getPointerTo();
+  LLVM::LLVMType elementPtrPtrType = LLVM::LLVMPointerType::get(
+      LLVM::LLVMPointerType::get(llvmElementType, memorySpace));
 
   // Extract pointer to the underlying ranked memref descriptor and cast it to
   // ElemType**.
@@ -2700,8 +2710,8 @@ struct MemRefReinterpretCastOpLowering
     MemRefType targetMemRefType =
         castOp.getResult().getType().cast<MemRefType>();
     auto llvmTargetDescriptorTy = typeConverter->convertType(targetMemRefType)
-                                      .dyn_cast_or_null<LLVM::LLVMType>();
-    if (!llvmTargetDescriptorTy || !llvmTargetDescriptorTy.isStructTy())
+                                      .dyn_cast_or_null<LLVM::LLVMStructType>();
+    if (!llvmTargetDescriptorTy)
       return failure();
 
     // Create descriptor.
@@ -2804,8 +2814,8 @@ struct MemRefReshapeOpLowering
     // Set pointers and offset.
     LLVM::LLVMType llvmElementType =
         unwrap(typeConverter->convertType(elementType));
-    LLVM::LLVMType elementPtrPtrType =
-        llvmElementType.getPointerTo(addressSpace).getPointerTo();
+    auto elementPtrPtrType = LLVM::LLVMPointerType::get(
+        LLVM::LLVMPointerType::get(llvmElementType, addressSpace));
     UnrankedMemRefDescriptor::setAllocatedPtr(rewriter, loc, underlyingDescPtr,
                                               elementPtrPtrType, allocatedPtr);
     UnrankedMemRefDescriptor::setAlignedPtr(rewriter, loc, *getTypeConverter(),
@@ -2858,7 +2868,7 @@ struct MemRefReshapeOpLowering
     rewriter.setInsertionPointToStart(bodyBlock);
 
     // Copy size from shape to descriptor.
-    LLVM::LLVMType llvmIndexPtrType = indexType.getPointerTo();
+    LLVM::LLVMType llvmIndexPtrType = LLVM::LLVMPointerType::get(indexType);
     Value sizeLoadGep = rewriter.create<LLVM::GEPOp>(
         loc, llvmIndexPtrType, shapeOperandPtr, ValueRange{indexArg});
     Value size = rewriter.create<LLVM::LoadOp>(loc, sizeLoadGep);
@@ -2950,14 +2960,14 @@ struct DimOpLowering : public ConvertOpToLLVMPattern<DimOp> {
     Value underlyingRankedDesc = unrankedDesc.memRefDescPtr(rewriter, loc);
     Value scalarMemRefDescPtr = rewriter.create<LLVM::BitcastOp>(
         loc,
-        typeConverter->convertType(scalarMemRefType)
-            .cast<LLVM::LLVMType>()
-            .getPointerTo(addressSpace),
+        LLVM::LLVMPointerType::get(
+            typeConverter->convertType(scalarMemRefType).cast<LLVM::LLVMType>(),
+            addressSpace),
         underlyingRankedDesc);
 
     // Get pointer to offset field of memref<element_type> descriptor.
-    Type indexPtrTy =
-        getTypeConverter()->getIndexType().getPointerTo(addressSpace);
+    Type indexPtrTy = LLVM::LLVMPointerType::get(
+        getTypeConverter()->getIndexType(), addressSpace);
     Value two = rewriter.create<LLVM::ConstantOp>(
         loc, typeConverter->convertType(rewriter.getI32Type()),
         rewriter.getI32IntegerAttr(2));
@@ -3120,10 +3130,10 @@ struct IndexCastOpLowering : public ConvertOpToLLVMPattern<IndexCastOp> {
 
     auto targetType =
         typeConverter->convertType(indexCastOp.getResult().getType())
-            .cast<LLVM::LLVMType>();
-    auto sourceType = transformed.in().getType().cast<LLVM::LLVMType>();
-    unsigned targetBits = targetType.getIntegerBitWidth();
-    unsigned sourceBits = sourceType.getIntegerBitWidth();
+            .cast<LLVM::LLVMIntegerType>();
+    auto sourceType = transformed.in().getType().cast<LLVM::LLVMIntegerType>();
+    unsigned targetBits = targetType.getBitWidth();
+    unsigned sourceBits = sourceType.getBitWidth();
 
     if (targetBits == sourceBits)
       rewriter.replaceOp(indexCastOp, transformed.in());
@@ -3462,14 +3472,18 @@ struct SubViewOpLowering : public ConvertOpToLLVMPattern<SubViewOp> {
     // Copy the buffer pointer from the old descriptor to the new one.
     Value extracted = sourceMemRef.allocatedPtr(rewriter, loc);
     Value bitcastPtr = rewriter.create<LLVM::BitcastOp>(
-        loc, targetElementTy.getPointerTo(viewMemRefType.getMemorySpace()),
+        loc,
+        LLVM::LLVMPointerType::get(targetElementTy,
+                                   viewMemRefType.getMemorySpace()),
         extracted);
     targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr);
 
     // Copy the buffer pointer from the old descriptor to the new one.
     extracted = sourceMemRef.alignedPtr(rewriter, loc);
     bitcastPtr = rewriter.create<LLVM::BitcastOp>(
-        loc, targetElementTy.getPointerTo(viewMemRefType.getMemorySpace()),
+        loc,
+        LLVM::LLVMPointerType::get(targetElementTy,
+                                   viewMemRefType.getMemorySpace()),
         extracted);
     targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr);
 
@@ -3662,7 +3676,9 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern<ViewOp> {
     Value allocatedPtr = sourceMemRef.allocatedPtr(rewriter, loc);
     auto srcMemRefType = viewOp.source().getType().cast<MemRefType>();
     Value bitcastPtr = rewriter.create<LLVM::BitcastOp>(
-        loc, targetElementTy.getPointerTo(srcMemRefType.getMemorySpace()),
+        loc,
+        LLVM::LLVMPointerType::get(targetElementTy,
+                                   srcMemRefType.getMemorySpace()),
         allocatedPtr);
     targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr);
 
@@ -3671,7 +3687,9 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern<ViewOp> {
     alignedPtr = rewriter.create<LLVM::GEPOp>(loc, alignedPtr.getType(),
                                               alignedPtr, adaptor.byte_shift());
     bitcastPtr = rewriter.create<LLVM::BitcastOp>(
-        loc, targetElementTy.getPointerTo(srcMemRefType.getMemorySpace()),
+        loc,
+        LLVM::LLVMPointerType::get(targetElementTy,
+                                   srcMemRefType.getMemorySpace()),
         alignedPtr);
     targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr);
 
@@ -4064,7 +4082,8 @@ Value LLVMTypeConverter::promoteOneMemRefDescriptor(Location loc, Value operand,
   auto indexType = IndexType::get(context);
   // Alloca with proper alignment. We do not expect optimizations of this
   // alloca op and so we omit allocating at the entry block.
-  auto ptrType = operand.getType().cast<LLVM::LLVMType>().getPointerTo();
+  auto ptrType =
+      LLVM::LLVMPointerType::get(operand.getType().cast<LLVM::LLVMType>());
   Value one = builder.create<LLVM::ConstantOp>(loc, int64Ty,
                                                IntegerAttr::get(indexType, 1));
   Value allocated =
diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
index a982b90e0e93b..bcc91e304e72d 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -193,7 +193,7 @@ static LogicalResult getBasePtr(ConversionPatternRewriter &rewriter,
   Value base;
   if (failed(getBase(rewriter, loc, memref, memRefType, base)))
     return failure();
-  auto pType = type.template cast<LLVM::LLVMType>().getPointerTo();
+  auto pType = LLVM::LLVMPointerType::get(type.template cast<LLVM::LLVMType>());
   base = rewriter.create<LLVM::BitcastOp>(loc, pType, base);
   ptr = rewriter.create<LLVM::GEPOp>(loc, pType, base);
   return success();
@@ -1100,14 +1100,14 @@ class VectorTypeCastOpConversion
       return failure();
 
     auto llvmSourceDescriptorTy =
-        operands[0].getType().dyn_cast<LLVM::LLVMType>();
-    if (!llvmSourceDescriptorTy || !llvmSourceDescriptorTy.isStructTy())
+        operands[0].getType().dyn_cast<LLVM::LLVMStructType>();
+    if (!llvmSourceDescriptorTy)
       return failure();
     MemRefDescriptor sourceMemRef(operands[0]);
 
     auto llvmTargetDescriptorTy = typeConverter->convertType(targetMemRefType)
-                                      .dyn_cast_or_null<LLVM::LLVMType>();
-    if (!llvmTargetDescriptorTy || !llvmTargetDescriptorTy.isStructTy())
+                                      .dyn_cast_or_null<LLVM::LLVMStructType>();
+    if (!llvmTargetDescriptorTy)
       return failure();
 
     // Only contiguous source buffers supported atm.
@@ -1231,15 +1231,15 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern<ConcreteOp> {
     // TODO: support alignment when possible.
     Value dataPtr = this->getStridedElementPtr(
         loc, memRefType, adaptor.source(), adaptor.indices(), rewriter);
-    auto vecTy =
-        toLLVMTy(xferOp.getVectorType()).template cast<LLVM::LLVMType>();
+    auto vecTy = toLLVMTy(xferOp.getVectorType())
+                     .template cast<LLVM::LLVMFixedVectorType>();
     Value vectorDataPtr;
     if (memRefType.getMemorySpace() == 0)
-      vectorDataPtr =
-          rewriter.create<LLVM::BitcastOp>(loc, vecTy.getPointerTo(), dataPtr);
+      vectorDataPtr = rewriter.create<LLVM::BitcastOp>(
+          loc, LLVM::LLVMPointerType::get(vecTy), dataPtr);
     else
       vectorDataPtr = rewriter.create<LLVM::AddrSpaceCastOp>(
-          loc, vecTy.getPointerTo(), dataPtr);
+          loc, LLVM::LLVMPointerType::get(vecTy), dataPtr);
 
     if (!xferOp.isMaskedDim(0))
       return replaceTransferOpWithLoadOrStore(rewriter,
@@ -1253,7 +1253,7 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern<ConcreteOp> {
     //
     // TODO: when the leaf transfer rank is k > 1, we need the last `k`
     //       dimensions here.
-    unsigned vecWidth = vecTy.getVectorNumElements();
+    unsigned vecWidth = vecTy.getNumElements();
     unsigned lastIndex = llvm::size(xferOp.indices()) - 1;
     Value off = xferOp.indices()[lastIndex];
     Value dim = rewriter.create<DimOp>(loc, xferOp.source(), lastIndex);
diff --git a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp
index 973b116ef498c..1335f33e10aa7 100644
--- a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp
+++ b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp
@@ -78,9 +78,9 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern<ConcreteOp> {
     auto toLLVMTy = [&](Type t) {
       return this->getTypeConverter()->convertType(t);
     };
-    LLVM::LLVMType vecTy =
-        toLLVMTy(xferOp.getVectorType()).template cast<LLVM::LLVMType>();
-    unsigned vecWidth = vecTy.getVectorNumElements();
+    auto vecTy = toLLVMTy(xferOp.getVectorType())
+                     .template cast<LLVM::LLVMFixedVectorType>();
+    unsigned vecWidth = vecTy.getNumElements();
     Location loc = xferOp->getLoc();
 
     // The backend result vector scalarization have trouble scalarize
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index 7b1300da1783f..2bdbb877ec84c 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -105,9 +105,10 @@ static ParseResult parseCmpOp(OpAsmParser &parser, OperationState &result) {
   auto argType = type.dyn_cast<LLVM::LLVMType>();
   if (!argType)
     return parser.emitError(trailingTypeLoc, "expected LLVM IR dialect type");
-  if (argType.isVectorTy())
-    resultType =
-        LLVMType::getVectorTy(resultType, argType.getVectorNumElements());
+  if (auto vecArgType = argType.dyn_cast<LLVM::LLVMFixedVectorType>())
+    resultType = LLVMType::getVectorTy(resultType, vecArgType.getNumElements());
+  assert(!argType.isa<LLVM::LLVMScalableVectorType>() &&
+         "unhandled scalable vector");
 
   result.addTypes({resultType});
   return success();
@@ -118,7 +119,7 @@ static ParseResult parseCmpOp(OpAsmParser &parser, OperationState &result) {
 //===----------------------------------------------------------------------===//
 
 static void printAllocaOp(OpAsmPrinter &p, AllocaOp &op) {
-  auto elemTy = op.getType().cast<LLVM::LLVMType>().getPointerElementTy();
+  auto elemTy = op.getType().cast<LLVM::LLVMPointerType>().getElementType();
 
   auto funcTy = FunctionType::get(op.getContext(), {op.arraySize().getType()},
                                   {op.getType()});
@@ -363,14 +364,11 @@ static void printLoadOp(OpAsmPrinter &p, LoadOp &op) {
 // the resulting type wrapped in MLIR, or nullptr on error.
 static Type getLoadStoreElementType(OpAsmParser &parser, Type type,
                                     llvm::SMLoc trailingTypeLoc) {
-  auto llvmTy = type.dyn_cast<LLVM::LLVMType>();
+  auto llvmTy = type.dyn_cast<LLVM::LLVMPointerType>();
   if (!llvmTy)
-    return parser.emitError(trailingTypeLoc, "expected LLVM IR dialect type"),
-           nullptr;
-  if (!llvmTy.isPointerTy())
     return parser.emitError(trailingTypeLoc, "expected LLVM pointer type"),
            nullptr;
-  return llvmTy.getPointerElementTy();
+  return llvmTy.getElementType();
 }
 
 // <operation> ::= `llvm.load` `volatile` ssa-use attribute-dict? `:` type
@@ -569,7 +567,7 @@ static ParseResult parseInvokeOp(OpAsmParser &parser, OperationState &result) {
 
     auto llvmFuncType = LLVM::LLVMType::getFunctionTy(llvmResultType, argTypes,
                                                       /*isVarArg=*/false);
-    auto wrappedFuncType = llvmFuncType.getPointerTo();
+    auto wrappedFuncType = LLVM::LLVMPointerType::get(llvmFuncType);
 
     auto funcArguments = llvm::makeArrayRef(operands).drop_front();
 
@@ -613,7 +611,7 @@ static LogicalResult verify(LandingpadOp op) {
 
   for (unsigned idx = 0, ie = op.getNumOperands(); idx < ie; idx++) {
     value = op.getOperand(idx);
-    bool isFilter = value.getType().cast<LLVMType>().isArrayTy();
+    bool isFilter = value.getType().isa<LLVMArrayType>();
     if (isFilter) {
       // FIXME: Verify filter clauses when arrays are appropriately handled
     } else {
@@ -646,7 +644,7 @@ static void printLandingpadOp(OpAsmPrinter &p, LandingpadOp &op) {
   for (auto value : op.getOperands()) {
     // Similar to llvm - if clause is an array type then it is filter
     // clause else catch clause
-    bool isArrayTy = value.getType().cast<LLVMType>().isArrayTy();
+    bool isArrayTy = value.getType().isa<LLVMArrayType>();
     p << '(' << (isArrayTy ? "filter " : "catch ") << value << " : "
       << value.getType() << ") ";
   }
@@ -728,37 +726,37 @@ static LogicalResult verify(CallOp &op) {
 
     fnType = fn.getType();
   }
-  if (!fnType.isFunctionTy())
+
+  LLVMFunctionType funcType = fnType.dyn_cast<LLVMFunctionType>();
+  if (!funcType)
     return op.emitOpError("callee does not have a functional type: ") << fnType;
 
   // Verify that the operand and result types match the callee.
 
-  if (!fnType.isFunctionVarArg() &&
-      fnType.getFunctionNumParams() != (op.getNumOperands() - isIndirect))
+  if (!funcType.isVarArg() &&
+      funcType.getNumParams() != (op.getNumOperands() - isIndirect))
     return op.emitOpError()
            << "incorrect number of operands ("
            << (op.getNumOperands() - isIndirect)
-           << ") for callee (expecting: " << fnType.getFunctionNumParams()
-           << ")";
+           << ") for callee (expecting: " << funcType.getNumParams() << ")";
 
-  if (fnType.getFunctionNumParams() > (op.getNumOperands() - isIndirect))
+  if (funcType.getNumParams() > (op.getNumOperands() - isIndirect))
     return op.emitOpError() << "incorrect number of operands ("
                             << (op.getNumOperands() - isIndirect)
                             << ") for varargs callee (expecting at least: "
-                            << fnType.getFunctionNumParams() << ")";
+                            << funcType.getNumParams() << ")";
 
-  for (unsigned i = 0, e = fnType.getFunctionNumParams(); i != e; ++i)
-    if (op.getOperand(i + isIndirect).getType() !=
-        fnType.getFunctionParamType(i))
+  for (unsigned i = 0, e = funcType.getNumParams(); i != e; ++i)
+    if (op.getOperand(i + isIndirect).getType() != funcType.getParamType(i))
       return op.emitOpError() << "operand type mismatch for operand " << i
                               << ": " << op.getOperand(i + isIndirect).getType()
-                              << " != " << fnType.getFunctionParamType(i);
+                              << " != " << funcType.getParamType(i);
 
   if (op.getNumResults() &&
-      op.getResult(0).getType() != fnType.getFunctionResultType())
+      op.getResult(0).getType() != funcType.getReturnType())
     return op.emitOpError()
            << "result type mismatch: " << op.getResult(0).getType()
-           << " != " << fnType.getFunctionResultType();
+           << " != " << funcType.getReturnType();
 
   return success();
 }
@@ -848,7 +846,7 @@ static ParseResult parseCallOp(OpAsmParser &parser, OperationState &result) {
     }
     auto llvmFuncType = LLVM::LLVMType::getFunctionTy(llvmResultType, argTypes,
                                                       /*isVarArg=*/false);
-    auto wrappedFuncType = llvmFuncType.getPointerTo();
+    auto wrappedFuncType = LLVM::LLVMPointerType::get(llvmFuncType);
 
     auto funcArguments =
         ArrayRef<OpAsmParser::OperandType>(operands).drop_front();
@@ -875,8 +873,8 @@ static ParseResult parseCallOp(OpAsmParser &parser, OperationState &result) {
 void LLVM::ExtractElementOp::build(OpBuilder &b, OperationState &result,
                                    Value vector, Value position,
                                    ArrayRef<NamedAttribute> attrs) {
-  auto wrappedVectorType = vector.getType().cast<LLVM::LLVMType>();
-  auto llvmType = wrappedVectorType.getVectorElementType();
+  auto vectorType = vector.getType().cast<LLVM::LLVMVectorType>();
+  auto llvmType = vectorType.getElementType();
   build(b, result, llvmType, vector, position);
   result.addAttributes(attrs);
 }
@@ -903,11 +901,11 @@ static ParseResult parseExtractElementOp(OpAsmParser &parser,
       parser.resolveOperand(vector, type, result.operands) ||
       parser.resolveOperand(position, positionType, result.operands))
     return failure();
-  auto wrappedVectorType = type.dyn_cast<LLVM::LLVMType>();
-  if (!wrappedVectorType || !wrappedVectorType.isVectorTy())
+  auto vectorType = type.dyn_cast<LLVM::LLVMVectorType>();
+  if (!vectorType)
     return parser.emitError(
         loc, "expected LLVM IR dialect vector type for operand #1");
-  result.addTypes(wrappedVectorType.getVectorElementType());
+  result.addTypes(vectorType.getElementType());
   return success();
 }
 
@@ -930,8 +928,8 @@ static LLVM::LLVMType getInsertExtractValueElementType(OpAsmParser &parser,
                                                        ArrayAttr positionAttr,
                                                        llvm::SMLoc attributeLoc,
                                                        llvm::SMLoc typeLoc) {
-  auto wrappedContainerType = containerType.dyn_cast<LLVM::LLVMType>();
-  if (!wrappedContainerType)
+  auto llvmType = containerType.dyn_cast<LLVM::LLVMType>();
+  if (!llvmType)
     return parser.emitError(typeLoc, "expected LLVM IR Dialect type"), nullptr;
 
   // Infer the element type from the structure type: iteratively step inside the
@@ -945,26 +943,24 @@ static LLVM::LLVMType getInsertExtractValueElementType(OpAsmParser &parser,
                               "expected an array of integer literals"),
              nullptr;
     int position = positionElementAttr.getInt();
-    if (wrappedContainerType.isArrayTy()) {
-      if (position < 0 || static_cast<unsigned>(position) >=
-                              wrappedContainerType.getArrayNumElements())
+    if (auto arrayType = llvmType.dyn_cast<LLVMArrayType>()) {
+      if (position < 0 ||
+          static_cast<unsigned>(position) >= arrayType.getNumElements())
         return parser.emitError(attributeLoc, "position out of bounds"),
                nullptr;
-      wrappedContainerType = wrappedContainerType.getArrayElementType();
-    } else if (wrappedContainerType.isStructTy()) {
-      if (position < 0 || static_cast<unsigned>(position) >=
-                              wrappedContainerType.getStructNumElements())
+      llvmType = arrayType.getElementType();
+    } else if (auto structType = llvmType.dyn_cast<LLVMStructType>()) {
+      if (position < 0 ||
+          static_cast<unsigned>(position) >= structType.getBody().size())
         return parser.emitError(attributeLoc, "position out of bounds"),
                nullptr;
-      wrappedContainerType =
-          wrappedContainerType.getStructElementType(position);
+      llvmType = structType.getBody()[position];
     } else {
-      return parser.emitError(typeLoc,
-                              "expected wrapped LLVM IR structure/array type"),
+      return parser.emitError(typeLoc, "expected LLVM IR structure/array type"),
              nullptr;
     }
   }
-  return wrappedContainerType;
+  return llvmType;
 }
 
 // <operation> ::= `llvm.extractvalue` ssa-use
@@ -1021,11 +1017,11 @@ static ParseResult parseInsertElementOp(OpAsmParser &parser,
       parser.parseColonType(vectorType))
     return failure();
 
-  auto wrappedVectorType = vectorType.dyn_cast<LLVM::LLVMType>();
-  if (!wrappedVectorType || !wrappedVectorType.isVectorTy())
+  auto llvmVectorType = vectorType.dyn_cast<LLVM::LLVMVectorType>();
+  if (!llvmVectorType)
     return parser.emitError(
         loc, "expected LLVM IR dialect vector type for operand #1");
-  auto valueType = wrappedVectorType.getVectorElementType();
+  Type valueType = llvmVectorType.getElementType();
   if (!valueType)
     return failure();
 
@@ -1145,12 +1141,14 @@ static LogicalResult verify(AddressOfOp op) {
     return op.emitOpError(
         "must reference a global defined by 'llvm.mlir.global' or 'llvm.func'");
 
-  if (global && global.getType().getPointerTo(global.addr_space()) !=
-                    op.getResult().getType())
+  if (global &&
+      LLVM::LLVMPointerType::get(global.getType(), global.addr_space()) !=
+          op.getResult().getType())
     return op.emitOpError(
         "the type must be a pointer to the type of the referenced global");
 
-  if (function && function.getType().getPointerTo() != op.getResult().getType())
+  if (function && LLVM::LLVMPointerType::get(function.getType()) !=
+                      op.getResult().getType())
     return op.emitOpError(
         "the type must be a pointer to the type of the referenced function");
 
@@ -1276,11 +1274,11 @@ static LogicalResult verifyCast(DialectCastOp op, LLVMType llvmType,
     if (vectorType.getRank() != 1)
       return op->emitOpError("only 1-d vector is allowed");
 
-    auto llvmVector = llvmType.dyn_cast<LLVMVectorType>();
-    if (llvmVector.isa<LLVMScalableVectorType>())
+    auto llvmVector = llvmType.dyn_cast<LLVMFixedVectorType>();
+    if (!llvmVector)
       return op->emitOpError("only fixed-sized vector is allowed");
 
-    if (vectorType.getDimSize(0) != llvmVector.getVectorNumElements())
+    if (vectorType.getDimSize(0) != llvmVector.getNumElements())
       return op->emitOpError(
           "invalid cast between vectors with mismatching sizes");
 
@@ -1375,7 +1373,10 @@ static LogicalResult verifyCast(DialectCastOp op, LLVMType llvmType,
                              "be an index-compatible integer");
 
     auto ptrType = structType.getBody()[1].dyn_cast<LLVMPointerType>();
-    if (!ptrType || !ptrType.getPointerElementTy().isIntegerTy(8))
+    auto ptrElementType =
+        ptrType ? ptrType.getElementType().dyn_cast<LLVMIntegerType>()
+                : nullptr;
+    if (!ptrElementType || ptrElementType.getBitWidth() != 8)
       return op->emitOpError("expected second element of a memref descriptor "
                              "to be an !llvm.ptr<i8>");
 
@@ -1503,9 +1504,11 @@ static LogicalResult verify(GlobalOp op) {
     return op.emitOpError("must appear at the module level");
 
   if (auto strAttr = op.getValueOrNull().dyn_cast_or_null<StringAttr>()) {
-    auto type = op.getType();
-    if (!type.isArrayTy() || !type.getArrayElementType().isIntegerTy(8) ||
-        type.getArrayNumElements() != strAttr.getValue().size())
+    auto type = op.getType().dyn_cast<LLVMArrayType>();
+    LLVMIntegerType elementType =
+        type ? type.getElementType().dyn_cast<LLVMIntegerType>() : nullptr;
+    if (!elementType || elementType.getBitWidth() != 8 ||
+        type.getNumElements() != strAttr.getValue().size())
       return op.emitOpError(
           "requires an i8 array type of the length equal to that of the string "
           "attribute");
@@ -1534,9 +1537,9 @@ static LogicalResult verify(GlobalOp op) {
 void LLVM::ShuffleVectorOp::build(OpBuilder &b, OperationState &result,
                                   Value v1, Value v2, ArrayAttr mask,
                                   ArrayRef<NamedAttribute> attrs) {
-  auto wrappedContainerType1 = v1.getType().cast<LLVM::LLVMType>();
-  auto vType = LLVMType::getVectorTy(
-      wrappedContainerType1.getVectorElementType(), mask.size());
+  auto containerType = v1.getType().cast<LLVM::LLVMVectorType>();
+  auto vType =
+      LLVMType::getVectorTy(containerType.getElementType(), mask.size());
   build(b, result, vType, v1, v2, mask);
   result.addAttributes(attrs);
 }
@@ -1566,12 +1569,12 @@ static ParseResult parseShuffleVectorOp(OpAsmParser &parser,
       parser.resolveOperand(v1, typeV1, result.operands) ||
       parser.resolveOperand(v2, typeV2, result.operands))
     return failure();
-  auto wrappedContainerType1 = typeV1.dyn_cast<LLVM::LLVMType>();
-  if (!wrappedContainerType1 || !wrappedContainerType1.isVectorTy())
+  auto containerType = typeV1.dyn_cast<LLVM::LLVMVectorType>();
+  if (!containerType)
     return parser.emitError(
         loc, "expected LLVM IR dialect vector type for operand #1");
-  auto vType = LLVMType::getVectorTy(
-      wrappedContainerType1.getVectorElementType(), maskAttr.size());
+  auto vType =
+      LLVMType::getVectorTy(containerType.getElementType(), maskAttr.size());
   result.addTypes(vType);
   return success();
 }
@@ -1588,9 +1591,9 @@ Block *LLVMFuncOp::addEntryBlock() {
   auto *entry = new Block;
   push_back(entry);
 
-  LLVMType type = getType();
-  for (unsigned i = 0, e = type.getFunctionNumParams(); i < e; ++i)
-    entry->addArgument(type.getFunctionParamType(i));
+  LLVMFunctionType type = getType();
+  for (unsigned i = 0, e = type.getNumParams(); i < e; ++i)
+    entry->addArgument(type.getParamType(i));
   return entry;
 }
 
@@ -1608,7 +1611,7 @@ void LLVMFuncOp::build(OpBuilder &builder, OperationState &result,
   if (argAttrs.empty())
     return;
 
-  unsigned numInputs = type.getFunctionNumParams();
+  unsigned numInputs = type.cast<LLVMFunctionType>().getNumParams();
   assert(numInputs == argAttrs.size() &&
          "expected as many argument attribute lists as arguments");
   SmallString<8> argAttrName;
@@ -1711,15 +1714,15 @@ static void printLLVMFuncOp(OpAsmPrinter &p, LLVMFuncOp op) {
     p << stringifyLinkage(op.linkage()) << ' ';
   p.printSymbolName(op.getName());
 
-  LLVMType fnType = op.getType();
+  LLVMFunctionType fnType = op.getType();
   SmallVector<Type, 8> argTypes;
   SmallVector<Type, 1> resTypes;
-  argTypes.reserve(fnType.getFunctionNumParams());
-  for (unsigned i = 0, e = fnType.getFunctionNumParams(); i < e; ++i)
-    argTypes.push_back(fnType.getFunctionParamType(i));
+  argTypes.reserve(fnType.getNumParams());
+  for (unsigned i = 0, e = fnType.getNumParams(); i < e; ++i)
+    argTypes.push_back(fnType.getParamType(i));
 
-  LLVMType returnType = fnType.getFunctionResultType();
-  if (!returnType.isVoidTy())
+  LLVMType returnType = fnType.getReturnType();
+  if (!returnType.isa<LLVMVoidType>())
     resTypes.push_back(returnType);
 
   impl::printFunctionSignature(p, op, argTypes, op.isVarArg(), resTypes);
@@ -1737,8 +1740,8 @@ static void printLLVMFuncOp(OpAsmPrinter &p, LLVMFuncOp op) {
 // attribute is present.  This can check for preconditions of the
 // getNumArguments hook not failing.
 LogicalResult LLVMFuncOp::verifyType() {
-  auto llvmType = getTypeAttr().getValue().dyn_cast_or_null<LLVMType>();
-  if (!llvmType || !llvmType.isFunctionTy())
+  auto llvmType = getTypeAttr().getValue().dyn_cast_or_null<LLVMFunctionType>();
+  if (!llvmType)
     return emitOpError("requires '" + getTypeAttrName() +
                        "' attribute of wrapped LLVM function type");
 
@@ -1747,9 +1750,7 @@ LogicalResult LLVMFuncOp::verifyType() {
 
 // Hook for OpTrait::FunctionLike, returns the number of function arguments.
 // Depends on the type attribute being correct as checked by verifyType
-unsigned LLVMFuncOp::getNumFuncArguments() {
-  return getType().getFunctionNumParams();
-}
+unsigned LLVMFuncOp::getNumFuncArguments() { return getType().getNumParams(); }
 
 // Hook for OpTrait::FunctionLike, returns the number of function results.
 // Depends on the type attribute being correct as checked by verifyType
@@ -1759,7 +1760,7 @@ unsigned LLVMFuncOp::getNumFuncResults() {
   // If we modeled a void return as one result, then it would be possible to
   // attach an MLIR result attribute to it, and it isn't clear what semantics we
   // would assign to that.
-  if (getType().getFunctionResultType().isVoidTy())
+  if (getType().getReturnType().isa<LLVMVoidType>())
     return 0;
   return 1;
 }
@@ -1788,7 +1789,7 @@ static LogicalResult verify(LLVMFuncOp op) {
   if (op.isVarArg())
     return op.emitOpError("only external functions can be variadic");
 
-  unsigned numArguments = op.getType().getFunctionNumParams();
+  unsigned numArguments = op.getType().getNumParams();
   Block &entryBlock = op.front();
   for (unsigned i = 0; i < numArguments; ++i) {
     Type argType = entryBlock.getArgument(i).getType();
@@ -1796,7 +1797,7 @@ static LogicalResult verify(LLVMFuncOp op) {
     if (!argLLVMType)
       return op.emitOpError("entry block argument #")
              << i << " is not of LLVM type";
-    if (op.getType().getFunctionParamType(i) != argLLVMType)
+    if (op.getType().getParamType(i) != argLLVMType)
       return op.emitOpError("the type of entry block argument #")
              << i << " does not match the function signature";
   }
@@ -1896,7 +1897,8 @@ static ParseResult parseAtomicRMWOp(OpAsmParser &parser,
       parseAtomicOrdering(parser, result, "ordering") ||
       parser.parseOptionalAttrDict(result.attributes) ||
       parser.parseColonType(type) ||
-      parser.resolveOperand(ptr, type.getPointerTo(), result.operands) ||
+      parser.resolveOperand(ptr, LLVM::LLVMPointerType::get(type),
+                            result.operands) ||
       parser.resolveOperand(val, type, result.operands))
     return failure();
 
@@ -1905,9 +1907,9 @@ static ParseResult parseAtomicRMWOp(OpAsmParser &parser,
 }
 
 static LogicalResult verify(AtomicRMWOp op) {
-  auto ptrType = op.ptr().getType().cast<LLVM::LLVMType>();
+  auto ptrType = op.ptr().getType().cast<LLVM::LLVMPointerType>();
   auto valType = op.val().getType().cast<LLVM::LLVMType>();
-  if (valType != ptrType.getPointerElementTy())
+  if (valType != ptrType.getElementType())
     return op.emitOpError("expected LLVM IR element type for operand #0 to "
                           "match type for operand #1");
   auto resType = op.res().getType().cast<LLVM::LLVMType>();
@@ -1915,17 +1917,21 @@ static LogicalResult verify(AtomicRMWOp op) {
     return op.emitOpError(
         "expected LLVM IR result type to match type for operand #1");
   if (op.bin_op() == AtomicBinOp::fadd || op.bin_op() == AtomicBinOp::fsub) {
-    if (!valType.isFloatingPointTy())
+    if (!mlir::LLVM::isCompatibleFloatingPointType(valType))
       return op.emitOpError("expected LLVM IR floating point type");
   } else if (op.bin_op() == AtomicBinOp::xchg) {
-    if (!valType.isIntegerTy(8) && !valType.isIntegerTy(16) &&
-        !valType.isIntegerTy(32) && !valType.isIntegerTy(64) &&
-        !valType.isBFloatTy() && !valType.isHalfTy() && !valType.isFloatTy() &&
-        !valType.isDoubleTy())
+    auto intType = valType.dyn_cast<LLVMIntegerType>();
+    unsigned intBitWidth = intType ? intType.getBitWidth() : 0;
+    if (intBitWidth != 8 && intBitWidth != 16 && intBitWidth != 32 &&
+        intBitWidth != 64 && !valType.isa<LLVMBFloatType>() &&
+        !valType.isa<LLVMHalfType>() && !valType.isa<LLVMFloatType>() &&
+        !valType.isa<LLVMDoubleType>())
       return op.emitOpError("unexpected LLVM IR type for 'xchg' bin_op");
   } else {
-    if (!valType.isIntegerTy(8) && !valType.isIntegerTy(16) &&
-        !valType.isIntegerTy(32) && !valType.isIntegerTy(64))
+    auto intType = valType.dyn_cast<LLVMIntegerType>();
+    unsigned intBitWidth = intType ? intType.getBitWidth() : 0;
+    if (intBitWidth != 8 && intBitWidth != 16 && intBitWidth != 32 &&
+        intBitWidth != 64)
       return op.emitOpError("expected LLVM IR integer type");
   }
   return success();
@@ -1958,7 +1964,8 @@ static ParseResult parseAtomicCmpXchgOp(OpAsmParser &parser,
       parseAtomicOrdering(parser, result, "failure_ordering") ||
       parser.parseOptionalAttrDict(result.attributes) ||
       parser.parseColonType(type) ||
-      parser.resolveOperand(ptr, type.getPointerTo(), result.operands) ||
+      parser.resolveOperand(ptr, LLVM::LLVMPointerType::get(type),
+                            result.operands) ||
       parser.resolveOperand(cmp, type, result.operands) ||
       parser.resolveOperand(val, type, result.operands))
     return failure();
@@ -1971,18 +1978,20 @@ static ParseResult parseAtomicCmpXchgOp(OpAsmParser &parser,
 }
 
 static LogicalResult verify(AtomicCmpXchgOp op) {
-  auto ptrType = op.ptr().getType().cast<LLVM::LLVMType>();
-  if (!ptrType.isPointerTy())
+  auto ptrType = op.ptr().getType().cast<LLVM::LLVMPointerType>();
+  if (!ptrType)
     return op.emitOpError("expected LLVM IR pointer type for operand #0");
   auto cmpType = op.cmp().getType().cast<LLVM::LLVMType>();
   auto valType = op.val().getType().cast<LLVM::LLVMType>();
-  if (cmpType != ptrType.getPointerElementTy() || cmpType != valType)
+  if (cmpType != ptrType.getElementType() || cmpType != valType)
     return op.emitOpError("expected LLVM IR element type for operand #0 to "
                           "match type for all other operands");
-  if (!valType.isPointerTy() && !valType.isIntegerTy(8) &&
-      !valType.isIntegerTy(16) && !valType.isIntegerTy(32) &&
-      !valType.isIntegerTy(64) && !valType.isBFloatTy() &&
-      !valType.isHalfTy() && !valType.isFloatTy() && !valType.isDoubleTy())
+  auto intType = valType.dyn_cast<LLVMIntegerType>();
+  unsigned intBitWidth = intType ? intType.getBitWidth() : 0;
+  if (!valType.isa<LLVMPointerType>() && intBitWidth != 8 &&
+      intBitWidth != 16 && intBitWidth != 32 && intBitWidth != 64 &&
+      !valType.isa<LLVMBFloatType>() && !valType.isa<LLVMHalfType>() &&
+      !valType.isa<LLVMFloatType>() && !valType.isa<LLVMDoubleType>())
     return op.emitOpError("unexpected LLVM IR type");
   if (op.success_ordering() < AtomicOrdering::monotonic ||
       op.failure_ordering() < AtomicOrdering::monotonic)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp
index a89287b764e5d..0616efb7ef3f9 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp
@@ -36,129 +36,6 @@ LLVMDialect &LLVMType::getDialect() {
   return static_cast<LLVMDialect &>(Type::getDialect());
 }
 
-//----------------------------------------------------------------------------//
-// Misc type utilities.
-
-llvm::TypeSize LLVMType::getPrimitiveSizeInBits() {
-  return llvm::TypeSwitch<LLVMType, llvm::TypeSize>(*this)
-      .Case<LLVMHalfType, LLVMBFloatType>(
-          [](LLVMType) { return llvm::TypeSize::Fixed(16); })
-      .Case<LLVMFloatType>([](LLVMType) { return llvm::TypeSize::Fixed(32); })
-      .Case<LLVMDoubleType, LLVMX86MMXType>(
-          [](LLVMType) { return llvm::TypeSize::Fixed(64); })
-      .Case<LLVMIntegerType>([](LLVMIntegerType intTy) {
-        return llvm::TypeSize::Fixed(intTy.getBitWidth());
-      })
-      .Case<LLVMX86FP80Type>([](LLVMType) { return llvm::TypeSize::Fixed(80); })
-      .Case<LLVMPPCFP128Type, LLVMFP128Type>(
-          [](LLVMType) { return llvm::TypeSize::Fixed(128); })
-      .Case<LLVMVectorType>([](LLVMVectorType t) {
-        llvm::TypeSize elementSize =
-            t.getElementType().getPrimitiveSizeInBits();
-        llvm::ElementCount elementCount = t.getElementCount();
-        assert(!elementSize.isScalable() &&
-               "vector type should have fixed-width elements");
-        return llvm::TypeSize(elementSize.getFixedSize() *
-                                  elementCount.getKnownMinValue(),
-                              elementCount.isScalable());
-      })
-      .Default([](LLVMType ty) {
-        assert((ty.isa<LLVMVoidType, LLVMLabelType, LLVMMetadataType,
-                       LLVMTokenType, LLVMStructType, LLVMArrayType,
-                       LLVMPointerType, LLVMFunctionType>()) &&
-               "unexpected missing support for primitive type");
-        return llvm::TypeSize::Fixed(0);
-      });
-}
-
-//----------------------------------------------------------------------------//
-// Integer type utilities.
-
-bool LLVMType::isIntegerTy(unsigned bitwidth) {
-  if (auto intType = dyn_cast<LLVMIntegerType>())
-    return intType.getBitWidth() == bitwidth;
-  return false;
-}
-unsigned LLVMType::getIntegerBitWidth() {
-  return cast<LLVMIntegerType>().getBitWidth();
-}
-
-LLVMType LLVMType::getArrayElementType() {
-  return cast<LLVMArrayType>().getElementType();
-}
-
-//----------------------------------------------------------------------------//
-// Array type utilities.
-
-unsigned LLVMType::getArrayNumElements() {
-  return cast<LLVMArrayType>().getNumElements();
-}
-
-bool LLVMType::isArrayTy() { return isa<LLVMArrayType>(); }
-
-//----------------------------------------------------------------------------//
-// Vector type utilities.
-
-LLVMType LLVMType::getVectorElementType() {
-  return cast<LLVMVectorType>().getElementType();
-}
-
-unsigned LLVMType::getVectorNumElements() {
-  return cast<LLVMFixedVectorType>().getNumElements();
-}
-llvm::ElementCount LLVMType::getVectorElementCount() {
-  return cast<LLVMVectorType>().getElementCount();
-}
-
-bool LLVMType::isVectorTy() { return isa<LLVMVectorType>(); }
-
-//----------------------------------------------------------------------------//
-// Function type utilities.
-
-LLVMType LLVMType::getFunctionParamType(unsigned argIdx) {
-  return cast<LLVMFunctionType>().getParamType(argIdx);
-}
-
-unsigned LLVMType::getFunctionNumParams() {
-  return cast<LLVMFunctionType>().getNumParams();
-}
-
-LLVMType LLVMType::getFunctionResultType() {
-  return cast<LLVMFunctionType>().getReturnType();
-}
-
-bool LLVMType::isFunctionTy() { return isa<LLVMFunctionType>(); }
-
-bool LLVMType::isFunctionVarArg() {
-  return cast<LLVMFunctionType>().isVarArg();
-}
-
-//----------------------------------------------------------------------------//
-// Pointer type utilities.
-
-LLVMType LLVMType::getPointerTo(unsigned addrSpace) {
-  return LLVMPointerType::get(*this, addrSpace);
-}
-
-LLVMType LLVMType::getPointerElementTy() {
-  return cast<LLVMPointerType>().getElementType();
-}
-
-bool LLVMType::isPointerTy() { return isa<LLVMPointerType>(); }
-
-//----------------------------------------------------------------------------//
-// Struct type utilities.
-
-LLVMType LLVMType::getStructElementType(unsigned i) {
-  return cast<LLVMStructType>().getBody()[i];
-}
-
-unsigned LLVMType::getStructNumElements() {
-  return cast<LLVMStructType>().getBody().size();
-}
-
-bool LLVMType::isStructTy() { return isa<LLVMStructType>(); }
-
 //----------------------------------------------------------------------------//
 // Utilities used to generate floating point types.
 
@@ -193,6 +70,10 @@ LLVMType LLVMType::getIntNTy(MLIRContext *context, unsigned numBits) {
   return LLVMIntegerType::get(context, numBits);
 }
 
+LLVMType LLVMType::getInt8PtrTy(MLIRContext *context) {
+  return LLVMPointerType::get(LLVMIntegerType::get(context, 8));
+}
+
 //----------------------------------------------------------------------------//
 // Utilities used to generate other miscellaneous types.
 
@@ -221,8 +102,6 @@ LLVMType LLVMType::getVoidTy(MLIRContext *context) {
   return LLVMVoidType::get(context);
 }
 
-bool LLVMType::isVoidTy() { return isa<LLVMVoidType>(); }
-
 //----------------------------------------------------------------------------//
 // Creation and setting of LLVM's identified struct types
 
@@ -470,7 +349,7 @@ LLVMStructType::verifyConstructionInvariants(Location loc,
 
 bool LLVMVectorType::isValidElementType(LLVMType type) {
   return type.isa<LLVMIntegerType, LLVMPointerType>() ||
-         type.isFloatingPointTy();
+         mlir::LLVM::isCompatibleFloatingPointType(type);
 }
 
 /// Support type casting functionality.
@@ -536,3 +415,42 @@ LLVMScalableVectorType::getChecked(Location loc, LLVMType elementType,
 unsigned LLVMScalableVectorType::getMinNumElements() {
   return getImpl()->numElements;
 }
+
+//===----------------------------------------------------------------------===//
+// Utility functions.
+//===----------------------------------------------------------------------===//
+
+llvm::TypeSize mlir::LLVM::getPrimitiveTypeSizeInBits(Type type) {
+  assert(isCompatibleType(type) &&
+         "expected a type compatible with the LLVM dialect");
+
+  return llvm::TypeSwitch<Type, llvm::TypeSize>(type)
+      .Case<LLVMHalfType, LLVMBFloatType>(
+          [](LLVMType) { return llvm::TypeSize::Fixed(16); })
+      .Case<LLVMFloatType>([](LLVMType) { return llvm::TypeSize::Fixed(32); })
+      .Case<LLVMDoubleType, LLVMX86MMXType>(
+          [](LLVMType) { return llvm::TypeSize::Fixed(64); })
+      .Case<LLVMIntegerType>([](LLVMIntegerType intTy) {
+        return llvm::TypeSize::Fixed(intTy.getBitWidth());
+      })
+      .Case<LLVMX86FP80Type>([](LLVMType) { return llvm::TypeSize::Fixed(80); })
+      .Case<LLVMPPCFP128Type, LLVMFP128Type>(
+          [](LLVMType) { return llvm::TypeSize::Fixed(128); })
+      .Case<LLVMVectorType>([](LLVMVectorType t) {
+        llvm::TypeSize elementSize =
+            getPrimitiveTypeSizeInBits(t.getElementType());
+        llvm::ElementCount elementCount = t.getElementCount();
+        assert(!elementSize.isScalable() &&
+               "vector type should have fixed-width elements");
+        return llvm::TypeSize(elementSize.getFixedSize() *
+                                  elementCount.getKnownMinValue(),
+                              elementCount.isScalable());
+      })
+      .Default([](Type ty) {
+        assert((ty.isa<LLVMVoidType, LLVMLabelType, LLVMMetadataType,
+                       LLVMTokenType, LLVMStructType, LLVMArrayType,
+                       LLVMPointerType, LLVMFunctionType>()) &&
+               "unexpected missing support for primitive type");
+        return llvm::TypeSize::Fixed(0);
+      });
+}
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 707ff7c1b089b..c202075fa2066 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -57,8 +57,9 @@ static ParseResult parseNVVMShflSyncBflyOp(OpAsmParser &parser,
   for (auto &attr : result.attributes) {
     if (attr.first != "return_value_and_is_valid")
       continue;
-    if (type.isStructTy() && type.getStructNumElements() > 0)
-      type = type.getStructElementType(0);
+    auto structType = type.dyn_cast<LLVM::LLVMStructType>();
+    if (structType && !structType.getBody().empty())
+      type = structType.getBody()[0];
     break;
   }
 
diff --git a/mlir/lib/ExecutionEngine/JitRunner.cpp b/mlir/lib/ExecutionEngine/JitRunner.cpp
index a323e68170c1b..bfdae2b4588d5 100644
--- a/mlir/lib/ExecutionEngine/JitRunner.cpp
+++ b/mlir/lib/ExecutionEngine/JitRunner.cpp
@@ -196,19 +196,30 @@ template <typename Type>
 Error checkCompatibleReturnType(LLVM::LLVMFuncOp mainFunction);
 template <>
 Error checkCompatibleReturnType<int32_t>(LLVM::LLVMFuncOp mainFunction) {
-  if (!mainFunction.getType().getFunctionResultType().isIntegerTy(32))
+  auto resultType = mainFunction.getType()
+                        .cast<LLVM::LLVMFunctionType>()
+                        .getReturnType()
+                        .dyn_cast<LLVM::LLVMIntegerType>();
+  if (!resultType || resultType.getBitWidth() != 32)
     return make_string_error("only single llvm.i32 function result supported");
   return Error::success();
 }
 template <>
 Error checkCompatibleReturnType<int64_t>(LLVM::LLVMFuncOp mainFunction) {
-  if (!mainFunction.getType().getFunctionResultType().isIntegerTy(64))
+  auto resultType = mainFunction.getType()
+                        .cast<LLVM::LLVMFunctionType>()
+                        .getReturnType()
+                        .dyn_cast<LLVM::LLVMIntegerType>();
+  if (!resultType || resultType.getBitWidth() != 64)
     return make_string_error("only single llvm.i64 function result supported");
   return Error::success();
 }
 template <>
 Error checkCompatibleReturnType<float>(LLVM::LLVMFuncOp mainFunction) {
-  if (!mainFunction.getType().getFunctionResultType().isFloatTy())
+  if (!mainFunction.getType()
+           .cast<LLVM::LLVMFunctionType>()
+           .getReturnType()
+           .isa<LLVM::LLVMFloatType>())
     return make_string_error("only single llvm.f32 function result supported");
   return Error::success();
 }
@@ -220,7 +231,7 @@ Error compileAndExecuteSingleReturnFunction(Options &options, ModuleOp module,
   if (!mainFunction || mainFunction.isExternal())
     return make_string_error("entry point not found");
 
-  if (mainFunction.getType().getFunctionNumParams() != 0)
+  if (mainFunction.getType().cast<LLVM::LLVMFunctionType>().getNumParams() != 0)
     return make_string_error("function inputs not supported");
 
   if (Error error = checkCompatibleReturnType<Type>(mainFunction))
diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
index 7f89a41de5db7..9786751ef4b0d 100644
--- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
+++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
@@ -172,57 +172,57 @@ Type Importer::getStdTypeForAttr(LLVMType type) {
   if (!type)
     return nullptr;
 
-  if (type.isIntegerTy())
-    return b.getIntegerType(type.getIntegerBitWidth());
+  if (auto intType = type.dyn_cast<LLVMIntegerType>())
+    return b.getIntegerType(intType.getBitWidth());
 
-  if (type.isFloatTy())
+  if (type.isa<LLVMFloatType>())
     return b.getF32Type();
 
-  if (type.isDoubleTy())
+  if (type.isa<LLVMDoubleType>())
     return b.getF64Type();
 
   // LLVM vectors can only contain scalars.
-  if (type.isVectorTy()) {
-    auto numElements = type.getVectorElementCount();
+  if (auto vectorType = type.dyn_cast<LLVM::LLVMVectorType>()) {
+    auto numElements = vectorType.getElementCount();
     if (numElements.isScalable()) {
       emitError(unknownLoc) << "scalable vectors not supported";
       return nullptr;
     }
-    Type elementType = getStdTypeForAttr(type.getVectorElementType());
+    Type elementType = getStdTypeForAttr(vectorType.getElementType());
     if (!elementType)
       return nullptr;
     return VectorType::get(numElements.getKnownMinValue(), elementType);
   }
 
   // LLVM arrays can contain other arrays or vectors.
-  if (type.isArrayTy()) {
+  if (auto arrayType = type.dyn_cast<LLVMArrayType>()) {
     // Recover the nested array shape.
     SmallVector<int64_t, 4> shape;
-    shape.push_back(type.getArrayNumElements());
-    while (type.getArrayElementType().isArrayTy()) {
-      type = type.getArrayElementType();
-      shape.push_back(type.getArrayNumElements());
+    shape.push_back(arrayType.getNumElements());
+    while (arrayType.getElementType().isa<LLVMArrayType>()) {
+      arrayType = arrayType.getElementType().cast<LLVMArrayType>();
+      shape.push_back(arrayType.getNumElements());
     }
 
     // If the innermost type is a vector, use the multi-dimensional vector as
     // attribute type.
-    if (type.getArrayElementType().isVectorTy()) {
-      LLVMType vectorType = type.getArrayElementType();
-      auto numElements = vectorType.getVectorElementCount();
+    if (auto vectorType =
+            arrayType.getElementType().dyn_cast<LLVMVectorType>()) {
+      auto numElements = vectorType.getElementCount();
       if (numElements.isScalable()) {
         emitError(unknownLoc) << "scalable vectors not supported";
         return nullptr;
       }
       shape.push_back(numElements.getKnownMinValue());
 
-      Type elementType = getStdTypeForAttr(vectorType.getVectorElementType());
+      Type elementType = getStdTypeForAttr(vectorType.getElementType());
       if (!elementType)
         return nullptr;
       return VectorType::get(shape, elementType);
     }
 
     // Otherwise use a tensor.
-    Type elementType = getStdTypeForAttr(type.getArrayElementType());
+    Type elementType = getStdTypeForAttr(arrayType.getElementType());
     if (!elementType)
       return nullptr;
     return RankedTensorType::get(shape, elementType);
@@ -261,7 +261,7 @@ Attribute Importer::getConstantAsAttr(llvm::Constant *value) {
     if (!attrType)
       return nullptr;
 
-    if (type.isIntegerTy()) {
+    if (type.isa<LLVMIntegerType>()) {
       SmallVector<APInt, 8> values;
       values.reserve(cd->getNumElements());
       for (unsigned i = 0, e = cd->getNumElements(); i < e; ++i)
@@ -269,7 +269,7 @@ Attribute Importer::getConstantAsAttr(llvm::Constant *value) {
       return DenseElementsAttr::get(attrType, values);
     }
 
-    if (type.isFloatTy() || type.isDoubleTy()) {
+    if (type.isa<LLVMFloatType>() || type.isa<LLVMDoubleType>()) {
       SmallVector<APFloat, 8> values;
       values.reserve(cd->getNumElements());
       for (unsigned i = 0, e = cd->getNumElements(); i < e; ++i)
@@ -777,7 +777,8 @@ LogicalResult Importer::processFunction(llvm::Function *f) {
   instMap.clear();
   unknownInstMap.clear();
 
-  LLVMType functionType = processType(f->getFunctionType());
+  auto functionType =
+      processType(f->getFunctionType()).dyn_cast<LLVMFunctionType>();
   if (!functionType)
     return failure();
 
@@ -805,8 +806,8 @@ LogicalResult Importer::processFunction(llvm::Function *f) {
 
   // Add function arguments to the entry block.
   for (auto kv : llvm::enumerate(f->args()))
-    instMap[&kv.value()] = blockList[0]->addArgument(
-        functionType.getFunctionParamType(kv.index()));
+    instMap[&kv.value()] =
+        blockList[0]->addArgument(functionType.getParamType(kv.index()));
 
   for (auto bbs : llvm::zip(*f, blockList)) {
     if (failed(processBasicBlock(&std::get<0>(bbs), std::get<1>(bbs))))
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 8c650506e2d74..ae0745b0be28f 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -969,7 +969,7 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) {
       // NB: Attribute already verified to be boolean, so check if we can indeed
       // attach the attribute to this argument, based on its type.
       auto argTy = mlirArg.getType().dyn_cast<LLVM::LLVMType>();
-      if (!argTy.isPointerTy())
+      if (!argTy.isa<LLVM::LLVMPointerType>())
         return func.emitError(
             "llvm.noalias attribute attached to LLVM non-pointer argument");
       if (attr.getValue())
@@ -981,7 +981,7 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) {
       // NB: Attribute already verified to be int, so check if we can indeed
       // attach the attribute to this argument, based on its type.
       auto argTy = mlirArg.getType().dyn_cast<LLVM::LLVMType>();
-      if (!argTy.isPointerTy())
+      if (!argTy.isa<LLVM::LLVMPointerType>())
         return func.emitError(
             "llvm.align attribute attached to LLVM non-pointer argument");
       llvmArg.addAttrs(
diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir
index 9461ebbd9ede9..d02c252c0bf36 100644
--- a/mlir/test/Dialect/LLVMIR/invalid.mlir
+++ b/mlir/test/Dialect/LLVMIR/invalid.mlir
@@ -98,7 +98,7 @@ func @gep_non_function_type(%pos : !llvm.i64, %base : !llvm.ptr<float>) {
 // -----
 
 func @load_non_llvm_type(%foo : memref<f32>) {
-  // expected-error@+1 {{expected LLVM IR dialect type}}
+  // expected-error@+1 {{expected LLVM pointer type}}
   llvm.load %foo : memref<f32>
 }
 
@@ -112,7 +112,7 @@ func @load_non_ptr_type(%foo : !llvm.float) {
 // -----
 
 func @store_non_llvm_type(%foo : memref<f32>, %bar : !llvm.float) {
-  // expected-error@+1 {{expected LLVM IR dialect type}}
+  // expected-error@+1 {{expected LLVM pointer type}}
   llvm.store %bar, %foo : memref<f32>
 }
 
@@ -267,7 +267,7 @@ func @insertvalue_array_out_of_bounds() {
 // -----
 
 func @insertvalue_wrong_nesting() {
-  // expected-error@+1 {{expected wrapped LLVM IR structure/array type}}
+  // expected-error@+1 {{expected LLVM IR structure/array type}}
   llvm.insertvalue %a, %b[0,0] : !llvm.struct<(i32)>
 }
 
@@ -311,7 +311,7 @@ func @extractvalue_array_out_of_bounds() {
 // -----
 
 func @extractvalue_wrong_nesting() {
-  // expected-error@+1 {{expected wrapped LLVM IR structure/array type}}
+  // expected-error@+1 {{expected LLVM IR structure/array type}}
   llvm.extractvalue %b[0,0] : !llvm.struct<(i32)>
 }
 

From 1c19804ebf4c97666a5c7de86ca7432c6b020205 Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6@vols.utk.edu>
Date: Tue, 22 Dec 2020 15:14:30 -0500
Subject: [PATCH 09/10] [OpenMP] Add OpenMP Documentation for Libomptarget
 environment variables

Add support to the OpenMP web pages for environment variables supported
by Libomptarget and their usage.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D93723
---
 openmp/docs/design/Runtimes.rst | 82 +++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index 61491060ea047..39ed256c48569 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -16,6 +16,88 @@ the LLVM/OpenMP host runtime, aka.  `libomp.so`, is available as a `pdf
 LLVM/OpenMP Target Host Runtime (``libomptarget``)
 --------------------------------------------------
 
+Environment Variables
+^^^^^^^^^^^^^^^^^^^^^
+
+``libomptarget`` uses environment variables to control different features of the
+library at runtime. This allows the user to obtain useful runtime information as
+well as enable or disable certain features. A full list of supported environment
+variables is defined below.
+
+    * ``LIBOMPTARGET_DEBUG=<Num>``
+    * ``LIBOMPTARGET_PROFILE=<Filename>``
+    * ``LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD=<Num>``
+    * ``LIBOMPTARGET_INFO=<Num>``
+
+LIBOMPTARGET_DEBUG
+""""""""""""""""""
+
+``LIBOMPTARGET_DEBUG`` controls whether or not debugging information will be
+displayed. This feature is only availible if ``libomptarget`` was built with
+``-DOMPTARGET_DEBUG``. The debugging output provided is intended for use by
+``libomptarget`` developers. More user-friendly output is presented when using
+``LIBOMPTARGET_INFO``.
+
+LIBOMPTARGET_PROFILE
+""""""""""""""""""""
+``LIBOMPTARGET_PROFILE`` allows ``libomptarget`` to generate time profile output
+similar to Clang's ``-ftime-trace`` option. This generates a JSON file based on
+`Chrome Tracing`_ that can be viewed with ``chrome://tracing`` or the
+`Speedscope App`_. Building this feature depends on the `LLVM Support Library`_
+for time trace output. Using this library is enabled by default when building
+using the CMake option ``OPENMP_ENABLE_LIBOMPTARGET_PROFILING``. The output will
+be saved to the filename specified by the environment variable.
+
+.. _`Chrome Tracing`: https://www.chromium.org/developers/how-tos/trace-event-profiling-tool
+
+.. _`Speedscope App`: https://www.speedscope.app/
+
+.. _`LLVM Support Library`: https://llvm.org/docs/SupportLibrary.html
+
+LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD
+"""""""""""""""""""""""""""""""""""""
+
+``LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`` sets the threshold size for which the
+``libomptarget`` memory manager will handle the allocation. Any allocations
+larger than this threshold will not use the memory manager and be freed after
+the device kernel exits The default threshold value is ``8Kb``. If
+``LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`` is set to ``0`` the memory manager
+will be completely disabled.
+
+LIBOMPTARGET_INFO
+"""""""""""""""""
+
+``LIBOMPTARGET_INFO`` allows the user to request different types runtime
+information from ``libomptarget``. ``LIBOMPTARGET_INFO`` uses a 32-bit field to
+enable or disable different types of information. This includes information
+about data-mappings and kernel execution. It is recommended to build your
+application with debugging information enabled, this will enable filenames and
+variable declarations in the information messages. OpenMP Debugging information
+is enabled at any level of debugging so a full debug runtime is not required.
+For minimal debugging information compile with `-gline-tables-only`, or compile
+with `-g` for full debug information. A full list of flags supported by
+``LIBOMPTARGET_INFO`` is given below. 
+
+    * Print all data arguments upon entering an OpenMP device kernel: ``0x01``
+    * Indicate when a mapped address already exists in the device mapping table:
+      ``0x02``
+    * Dump the contents of the device pointer map at kernel exit: ``0x04``
+    * Print OpenMP kernel information from device plugins: ``0x10``
+
+Any combination of these flags can be used by setting the appropriate bits. For
+example, to enable printing all data active in an OpenMP target region along
+with ``CUDA`` information, run the following ``bash`` command.
+
+.. code-block:: console
+
+   $ env LIBOMPTARGET_INFO=$((1 << 0x1 | 1 << 0x10)) ./your-application
+
+Or, to enable every flag run with every bit set.
+
+.. code-block:: console
+
+   $ env LIBOMPTARGET_INFO=-1 ./your-application
+
 LLVM/OpenMP Target Host Runtime Plugins (``libomptarget.rtl.XXXX``)
 -------------------------------------------------------------------
 

From 75a3f326c3d874853031d8bedd1d00127c835103 Mon Sep 17 00:00:00 2001
From: Chris Lattner <clattner@nondot.org>
Date: Tue, 22 Dec 2020 10:35:15 -0800
Subject: [PATCH 10/10] [IR] Add an ImplicitLocOpBuilder helper class for
 building IR with the same loc.

One common situation is to create a lot of IR at a well known location,
e.g. when doing a big rewrite from one dialect to another where you're expanding
ops out into lots of other ops.

For these sorts of situations, it is annoying to pass the location into
every create call.  As we discused in a few threads on the forum, a way to help
with this is to produce a new sort of builder that holds a location and provides
it to each of the create<> calls automatically.

This patch implements an ImplicitLocOpBuilder class that does this.  We've had
good experience with this in the CIRCT project, and it makes sense to upstream to
MLIR.

I picked a random pass to adopt it to show the impact, but I don't think there is
any particular need to force adopt it in the codebase.

Differential Revision: https://reviews.llvm.org/D93717
---
 mlir/include/mlir/IR/ImplicitLocOpBuilder.h   | 123 ++++++++++++++++++
 .../Conversion/AsyncToLLVM/AsyncToLLVM.cpp    |  96 +++++++-------
 2 files changed, 170 insertions(+), 49 deletions(-)
 create mode 100644 mlir/include/mlir/IR/ImplicitLocOpBuilder.h

diff --git a/mlir/include/mlir/IR/ImplicitLocOpBuilder.h b/mlir/include/mlir/IR/ImplicitLocOpBuilder.h
new file mode 100644
index 0000000000000..2dc7c34f4e855
--- /dev/null
+++ b/mlir/include/mlir/IR/ImplicitLocOpBuilder.h
@@ -0,0 +1,123 @@
+//===- ImplicitLocOpBuilder.h - Convenience OpBuilder -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Helper class to create ops with a modally set location.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_IR_IMPLICITLOCOPBUILDER_H
+#define MLIR_IR_IMPLICITLOCOPBUILDER_H
+
+#include "mlir/IR/Builders.h"
+
+namespace mlir {
+
+/// ImplictLocOpBuilder maintains a 'current location', allowing use of the
+/// create<> method without specifying the location.  It is otherwise the same
+/// as OpBuilder.
+class ImplicitLocOpBuilder : public mlir::OpBuilder {
+public:
+  /// Create an ImplicitLocOpBuilder using the insertion point and listener from
+  /// an existing OpBuilder.
+  ImplicitLocOpBuilder(Location loc, const OpBuilder &builder)
+      : OpBuilder(builder), curLoc(loc) {}
+
+  /// OpBuilder has a bunch of convenience constructors - we support them all
+  /// with the additional Location.
+  template <typename T>
+  ImplicitLocOpBuilder(Location loc, T &&operand, Listener *listener = nullptr)
+      : OpBuilder(std::forward<T>(operand), listener), curLoc(loc) {}
+
+  ImplicitLocOpBuilder(Location loc, Block *block, Block::iterator insertPoint,
+                       Listener *listener = nullptr)
+      : OpBuilder(block, insertPoint, listener), curLoc(loc) {}
+
+  /// Create a builder and set the insertion point to before the first operation
+  /// in the block but still inside the block.
+  static ImplicitLocOpBuilder atBlockBegin(Location loc, Block *block,
+                                           Listener *listener = nullptr) {
+    return ImplicitLocOpBuilder(loc, block, block->begin(), listener);
+  }
+
+  /// Create a builder and set the insertion point to after the last operation
+  /// in the block but still inside the block.
+  static ImplicitLocOpBuilder atBlockEnd(Location loc, Block *block,
+                                         Listener *listener = nullptr) {
+    return ImplicitLocOpBuilder(loc, block, block->end(), listener);
+  }
+
+  /// Create a builder and set the insertion point to before the block
+  /// terminator.
+  static ImplicitLocOpBuilder atBlockTerminator(Location loc, Block *block,
+                                                Listener *listener = nullptr) {
+    auto *terminator = block->getTerminator();
+    assert(terminator != nullptr && "the block has no terminator");
+    return ImplicitLocOpBuilder(loc, block, Block::iterator(terminator),
+                                listener);
+  }
+
+  /// Accessors for the implied location.
+  Location getLoc() const { return curLoc; }
+  void setLoc(Location loc) { curLoc = loc; }
+
+  // We allow clients to use the explicit-loc version of create as well.
+  using OpBuilder::create;
+  using OpBuilder::createOrFold;
+
+  /// Create an operation of specific op type at the current insertion point and
+  /// location.
+  template <typename OpTy, typename... Args>
+  OpTy create(Args &&... args) {
+    return OpBuilder::create<OpTy>(curLoc, std::forward<Args>(args)...);
+  }
+
+  /// Create an operation of specific op type at the current insertion point,
+  /// and immediately try to fold it. This functions populates 'results' with
+  /// the results after folding the operation.
+  template <typename OpTy, typename... Args>
+  void createOrFold(llvm::SmallVectorImpl<Value> &results, Args &&... args) {
+    OpBuilder::createOrFold<OpTy>(results, curLoc, std::forward<Args>(args)...);
+  }
+
+  /// Overload to create or fold a single result operation.
+  template <typename OpTy, typename... Args>
+  typename std::enable_if<OpTy::template hasTrait<mlir::OpTrait::OneResult>(),
+                          Value>::type
+  createOrFold(Args &&... args) {
+    return OpBuilder::createOrFold<OpTy>(curLoc, std::forward<Args>(args)...);
+  }
+
+  /// Overload to create or fold a zero result operation.
+  template <typename OpTy, typename... Args>
+  typename std::enable_if<OpTy::template hasTrait<mlir::OpTrait::ZeroResult>(),
+                          OpTy>::type
+  createOrFold(Args &&... args) {
+    return OpBuilder::createOrFold<OpTy>(curLoc, std::forward<Args>(args)...);
+  }
+
+  /// This builder can also be used to emit diagnostics to the current location.
+  mlir::InFlightDiagnostic
+  emitError(const llvm::Twine &message = llvm::Twine()) {
+    return mlir::emitError(curLoc, message);
+  }
+  mlir::InFlightDiagnostic
+  emitWarning(const llvm::Twine &message = llvm::Twine()) {
+    return mlir::emitWarning(curLoc, message);
+  }
+  mlir::InFlightDiagnostic
+  emitRemark(const llvm::Twine &message = llvm::Twine()) {
+    return mlir::emitRemark(curLoc, message);
+  }
+
+private:
+  Location curLoc;
+};
+
+} // namespace mlir
+
+#endif // MLIR_IR_IMPLICITLOCOPBUILDER_H
\ No newline at end of file
diff --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
index 65545d8ab2de1..2415924557db7 100644
--- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
+++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
@@ -13,7 +13,7 @@
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
+#include "mlir/IR/ImplicitLocOpBuilder.h"
 #include "mlir/IR/TypeUtilities.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"
@@ -112,12 +112,13 @@ struct AsyncAPI {
 
 // Adds Async Runtime C API declarations to the module.
 static void addAsyncRuntimeApiDeclarations(ModuleOp module) {
-  auto builder = OpBuilder::atBlockTerminator(module.getBody());
+  auto builder = ImplicitLocOpBuilder::atBlockTerminator(module.getLoc(),
+                                                         module.getBody());
 
   auto addFuncDecl = [&](StringRef name, FunctionType type) {
     if (module.lookupSymbol(name))
       return;
-    builder.create<FuncOp>(module.getLoc(), name, type).setPrivate();
+    builder.create<FuncOp>(name, type).setPrivate();
   };
 
   MLIRContext *ctx = module.getContext();
@@ -149,13 +150,13 @@ static constexpr const char *kCoroFree = "llvm.coro.free";
 static constexpr const char *kCoroResume = "llvm.coro.resume";
 
 /// Adds an LLVM function declaration to a module.
-static void addLLVMFuncDecl(ModuleOp module, OpBuilder &builder, StringRef name,
-                            LLVM::LLVMType ret,
+static void addLLVMFuncDecl(ModuleOp module, ImplicitLocOpBuilder &builder,
+                            StringRef name, LLVM::LLVMType ret,
                             ArrayRef<LLVM::LLVMType> params) {
   if (module.lookupSymbol(name))
     return;
   LLVM::LLVMType type = LLVM::LLVMType::getFunctionTy(ret, params, false);
-  builder.create<LLVM::LLVMFuncOp>(module.getLoc(), name, type);
+  builder.create<LLVM::LLVMFuncOp>(name, type);
 }
 
 /// Adds coroutine intrinsics declarations to the module.
@@ -163,7 +164,8 @@ static void addCoroutineIntrinsicsDeclarations(ModuleOp module) {
   using namespace mlir::LLVM;
 
   MLIRContext *ctx = module.getContext();
-  OpBuilder builder(module.getBody()->getTerminator());
+  ImplicitLocOpBuilder builder(module.getLoc(),
+                               module.getBody()->getTerminator());
 
   auto token = LLVMTokenType::get(ctx);
   auto voidTy = LLVMType::getVoidTy(ctx);
@@ -196,7 +198,8 @@ static void addCRuntimeDeclarations(ModuleOp module) {
   using namespace mlir::LLVM;
 
   MLIRContext *ctx = module.getContext();
-  OpBuilder builder(module.getBody()->getTerminator());
+  ImplicitLocOpBuilder builder(module.getLoc(),
+                               module.getBody()->getTerminator());
 
   auto voidTy = LLVMType::getVoidTy(ctx);
   auto i64 = LLVMType::getInt64Ty(ctx);
@@ -232,13 +235,13 @@ static void addResumeFunction(ModuleOp module) {
   resumeOp.setPrivate();
 
   auto *block = resumeOp.addEntryBlock();
-  OpBuilder blockBuilder = OpBuilder::atBlockEnd(block);
+  auto blockBuilder = ImplicitLocOpBuilder::atBlockEnd(loc, block);
 
-  blockBuilder.create<LLVM::CallOp>(loc, TypeRange(),
+  blockBuilder.create<LLVM::CallOp>(TypeRange(),
                                     blockBuilder.getSymbolRefAttr(kCoroResume),
                                     resumeOp.getArgument(0));
 
-  blockBuilder.create<LLVM::ReturnOp>(loc, ValueRange());
+  blockBuilder.create<LLVM::ReturnOp>(ValueRange());
 }
 
 //===----------------------------------------------------------------------===//
@@ -302,13 +305,12 @@ static CoroMachinery setupCoroMachinery(FuncOp func) {
   Block *entryBlock = func.addEntryBlock();
   Location loc = func.getBody().getLoc();
 
-  OpBuilder builder = OpBuilder::atBlockBegin(entryBlock);
+  auto builder = ImplicitLocOpBuilder::atBlockBegin(loc, entryBlock);
 
   // ------------------------------------------------------------------------ //
   // Allocate async tokens/values that we will return from a ramp function.
   // ------------------------------------------------------------------------ //
-  auto createToken =
-      builder.create<CallOp>(loc, kCreateToken, TokenType::get(ctx));
+  auto createToken = builder.create<CallOp>(kCreateToken, TokenType::get(ctx));
 
   // ------------------------------------------------------------------------ //
   // Initialize coroutine: allocate frame, get coroutine handle.
@@ -316,28 +318,28 @@ static CoroMachinery setupCoroMachinery(FuncOp func) {
 
   // Constants for initializing coroutine frame.
   auto constZero =
-      builder.create<LLVM::ConstantOp>(loc, i32, builder.getI32IntegerAttr(0));
+      builder.create<LLVM::ConstantOp>(i32, builder.getI32IntegerAttr(0));
   auto constFalse =
-      builder.create<LLVM::ConstantOp>(loc, i1, builder.getBoolAttr(false));
-  auto nullPtr = builder.create<LLVM::NullOp>(loc, i8Ptr);
+      builder.create<LLVM::ConstantOp>(i1, builder.getBoolAttr(false));
+  auto nullPtr = builder.create<LLVM::NullOp>(i8Ptr);
 
   // Get coroutine id: @llvm.coro.id
   auto coroId = builder.create<LLVM::CallOp>(
-      loc, token, builder.getSymbolRefAttr(kCoroId),
+      token, builder.getSymbolRefAttr(kCoroId),
       ValueRange({constZero, nullPtr, nullPtr, nullPtr}));
 
   // Get coroutine frame size: @llvm.coro.size.i64
   auto coroSize = builder.create<LLVM::CallOp>(
-      loc, i64, builder.getSymbolRefAttr(kCoroSizeI64), ValueRange());
+      i64, builder.getSymbolRefAttr(kCoroSizeI64), ValueRange());
 
   // Allocate memory for coroutine frame.
-  auto coroAlloc = builder.create<LLVM::CallOp>(
-      loc, i8Ptr, builder.getSymbolRefAttr(kMalloc),
-      ValueRange(coroSize.getResult(0)));
+  auto coroAlloc =
+      builder.create<LLVM::CallOp>(i8Ptr, builder.getSymbolRefAttr(kMalloc),
+                                   ValueRange(coroSize.getResult(0)));
 
   // Begin a coroutine: @llvm.coro.begin
   auto coroHdl = builder.create<LLVM::CallOp>(
-      loc, i8Ptr, builder.getSymbolRefAttr(kCoroBegin),
+      i8Ptr, builder.getSymbolRefAttr(kCoroBegin),
       ValueRange({coroId.getResult(0), coroAlloc.getResult(0)}));
 
   Block *cleanupBlock = func.addBlock();
@@ -350,15 +352,14 @@ static CoroMachinery setupCoroMachinery(FuncOp func) {
 
   // Get a pointer to the coroutine frame memory: @llvm.coro.free.
   auto coroMem = builder.create<LLVM::CallOp>(
-      loc, i8Ptr, builder.getSymbolRefAttr(kCoroFree),
+      i8Ptr, builder.getSymbolRefAttr(kCoroFree),
       ValueRange({coroId.getResult(0), coroHdl.getResult(0)}));
 
   // Free the memory.
-  builder.create<LLVM::CallOp>(loc, TypeRange(),
-                               builder.getSymbolRefAttr(kFree),
+  builder.create<LLVM::CallOp>(TypeRange(), builder.getSymbolRefAttr(kFree),
                                ValueRange(coroMem.getResult(0)));
   // Branch into the suspend block.
-  builder.create<BranchOp>(loc, suspendBlock);
+  builder.create<BranchOp>(suspendBlock);
 
   // ------------------------------------------------------------------------ //
   // Coroutine suspend block: mark the end of a coroutine and return allocated
@@ -367,17 +368,17 @@ static CoroMachinery setupCoroMachinery(FuncOp func) {
   builder.setInsertionPointToStart(suspendBlock);
 
   // Mark the end of a coroutine: @llvm.coro.end.
-  builder.create<LLVM::CallOp>(loc, i1, builder.getSymbolRefAttr(kCoroEnd),
+  builder.create<LLVM::CallOp>(i1, builder.getSymbolRefAttr(kCoroEnd),
                                ValueRange({coroHdl.getResult(0), constFalse}));
 
   // Return created `async.token` from the suspend block. This will be the
   // return value of a coroutine ramp function.
-  builder.create<ReturnOp>(loc, createToken.getResult(0));
+  builder.create<ReturnOp>(createToken.getResult(0));
 
   // Branch from the entry block to the cleanup block to create a valid CFG.
   builder.setInsertionPointToEnd(entryBlock);
 
-  builder.create<BranchOp>(loc, cleanupBlock);
+  builder.create<BranchOp>(cleanupBlock);
 
   // `async.await` op lowering will create resume blocks for async
   // continuations, and will conditionally branch to cleanup or suspend blocks.
@@ -471,8 +472,6 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) {
   MLIRContext *ctx = module.getContext();
   Location loc = execute.getLoc();
 
-  OpBuilder moduleBuilder(module.getBody()->getTerminator());
-
   // Collect all outlined function inputs.
   llvm::SetVector<mlir::Value> functionInputs(execute.dependencies().begin(),
                                               execute.dependencies().end());
@@ -484,13 +483,13 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) {
   SmallVector<Type, 4> inputTypes(typesRange.begin(), typesRange.end());
   auto outputTypes = execute.getResultTypes();
 
-  auto funcType = moduleBuilder.getFunctionType(inputTypes, outputTypes);
+  auto funcType = FunctionType::get(ctx, inputTypes, outputTypes);
   auto funcAttrs = ArrayRef<NamedAttribute>();
 
   // TODO: Derive outlined function name from the parent FuncOp (support
   // multiple nested async.execute operations).
   FuncOp func = FuncOp::create(loc, kAsyncFnPrefix, funcType, funcAttrs);
-  symbolTable.insert(func, moduleBuilder.getInsertionPoint());
+  symbolTable.insert(func, Block::iterator(module.getBody()->getTerminator()));
 
   SymbolTable::setSymbolVisibility(func, SymbolTable::Visibility::Private);
 
@@ -502,21 +501,21 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) {
   // Async execute API (execution will be resumed in a thread managed by the
   // async runtime).
   Block *entryBlock = &func.getBlocks().front();
-  OpBuilder builder = OpBuilder::atBlockTerminator(entryBlock);
+  auto builder = ImplicitLocOpBuilder::atBlockTerminator(loc, entryBlock);
 
   // A pointer to coroutine resume intrinsic wrapper.
   auto resumeFnTy = AsyncAPI::resumeFunctionType(ctx);
   auto resumePtr = builder.create<LLVM::AddressOfOp>(
-      loc, LLVM::LLVMPointerType::get(resumeFnTy), kResume);
+      LLVM::LLVMPointerType::get(resumeFnTy), kResume);
 
   // Save the coroutine state: @llvm.coro.save
   auto coroSave = builder.create<LLVM::CallOp>(
-      loc, LLVM::LLVMTokenType::get(ctx), builder.getSymbolRefAttr(kCoroSave),
+      LLVM::LLVMTokenType::get(ctx), builder.getSymbolRefAttr(kCoroSave),
       ValueRange({coro.coroHandle}));
 
   // Call async runtime API to execute a coroutine in the managed thread.
   SmallVector<Value, 2> executeArgs = {coro.coroHandle, resumePtr.res()};
-  builder.create<CallOp>(loc, TypeRange(), kExecute, executeArgs);
+  builder.create<CallOp>(TypeRange(), kExecute, executeArgs);
 
   // Split the entry block before the terminator.
   auto *terminatorOp = entryBlock->getTerminator();
@@ -528,7 +527,7 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) {
   // Await on all dependencies before starting to execute the body region.
   builder.setInsertionPointToStart(resume);
   for (size_t i = 0; i < execute.dependencies().size(); ++i)
-    builder.create<AwaitOp>(loc, func.getArgument(i));
+    builder.create<AwaitOp>(func.getArgument(i));
 
   // Map from function inputs defined above the execute op to the function
   // arguments.
@@ -540,17 +539,16 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) {
   // to async runtime to emplace the result token.
   for (Operation &op : execute.body().getOps()) {
     if (isa<async::YieldOp>(op)) {
-      builder.create<CallOp>(loc, kEmplaceToken, TypeRange(), coro.asyncToken);
+      builder.create<CallOp>(kEmplaceToken, TypeRange(), coro.asyncToken);
       continue;
     }
     builder.clone(op, valueMapping);
   }
 
   // Replace the original `async.execute` with a call to outlined function.
-  OpBuilder callBuilder(execute);
-  auto callOutlinedFunc =
-      callBuilder.create<CallOp>(loc, func.getName(), execute.getResultTypes(),
-                                 functionInputs.getArrayRef());
+  ImplicitLocOpBuilder callBuilder(loc, execute);
+  auto callOutlinedFunc = callBuilder.create<CallOp>(
+      func.getName(), execute.getResultTypes(), functionInputs.getArrayRef());
   execute.replaceAllUsesWith(callOutlinedFunc.getResults());
   execute.erase();
 
@@ -744,24 +742,24 @@ class AwaitOpLoweringBase : public ConversionPattern {
     if (isInCoroutine) {
       const CoroMachinery &coro = outlined->getSecond();
 
-      OpBuilder builder(op, rewriter.getListener());
+      ImplicitLocOpBuilder builder(loc, op, rewriter.getListener());
       MLIRContext *ctx = op->getContext();
 
       // A pointer to coroutine resume intrinsic wrapper.
       auto resumeFnTy = AsyncAPI::resumeFunctionType(ctx);
       auto resumePtr = builder.create<LLVM::AddressOfOp>(
-          loc, LLVM::LLVMPointerType::get(resumeFnTy), kResume);
+          LLVM::LLVMPointerType::get(resumeFnTy), kResume);
 
       // Save the coroutine state: @llvm.coro.save
       auto coroSave = builder.create<LLVM::CallOp>(
-          loc, LLVM::LLVMTokenType::get(ctx),
-          builder.getSymbolRefAttr(kCoroSave), ValueRange(coro.coroHandle));
+          LLVM::LLVMTokenType::get(ctx), builder.getSymbolRefAttr(kCoroSave),
+          ValueRange(coro.coroHandle));
 
       // Call async runtime API to resume a coroutine in the managed thread when
       // the async await argument becomes ready.
       SmallVector<Value, 3> awaitAndExecuteArgs = {operands[0], coro.coroHandle,
                                                    resumePtr.res()};
-      builder.create<CallOp>(loc, TypeRange(), coroAwaitFuncName,
+      builder.create<CallOp>(TypeRange(), coroAwaitFuncName,
                              awaitAndExecuteArgs);
 
       Block *suspended = op->getBlock();