From 57ffbe020af6469b7c2fdb599f2f7e5e5d0322f0 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 22 Dec 2020 11:00:57 -0500 Subject: [PATCH 01/10] glld/mac] Don't add names of unreferenced symbols to string table Before this, a hello world program would contain many many unnecessary entries in its string table. No behavior change, just makes the string table in the output smaller and more like ld64's. Differential Revision: https://reviews.llvm.org/D93711 --- lld/MachO/SyntheticSections.cpp | 22 +++++++++++----------- lld/test/MachO/symtab.s | 6 ++++-- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 8b2ebd36e1ae8e..2ed1f2eb34fbea 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -694,6 +694,11 @@ void SymtabSection::emitStabs() { } void SymtabSection::finalizeContents() { + auto addSymbol = [&](std::vector &symbols, Symbol *sym) { + uint32_t strx = stringTableSection.addString(sym->getName()); + symbols.push_back({sym, strx}); + }; + // Local symbols aren't in the SymbolTable, so we walk the list of object // files to gather them. for (InputFile *file : inputFiles) { @@ -702,10 +707,8 @@ void SymtabSection::finalizeContents() { // TODO: when we implement -dead_strip, we should filter out symbols // that belong to dead sections. if (auto *defined = dyn_cast(sym)) { - if (!defined->isExternal()) { - uint32_t strx = stringTableSection.addString(sym->getName()); - localSymbols.push_back({sym, strx}); - } + if (!defined->isExternal()) + addSymbol(localSymbols, sym); } } } @@ -713,19 +716,16 @@ void SymtabSection::finalizeContents() { // __dyld_private is a local symbol too. It's linker-created and doesn't // exist in any object file. - if (Defined* dyldPrivate = in.stubHelper->dyldPrivate) { - uint32_t strx = stringTableSection.addString(dyldPrivate->getName()); - localSymbols.push_back({dyldPrivate, strx}); - } + if (Defined* dyldPrivate = in.stubHelper->dyldPrivate) + addSymbol(localSymbols, dyldPrivate); for (Symbol *sym : symtab->getSymbols()) { - uint32_t strx = stringTableSection.addString(sym->getName()); if (auto *defined = dyn_cast(sym)) { assert(defined->isExternal()); - externalSymbols.push_back({sym, strx}); + addSymbol(externalSymbols, sym); } else if (auto *dysym = dyn_cast(sym)) { if (dysym->isReferenced()) - undefinedSymbols.push_back({sym, strx}); + addSymbol(undefinedSymbols, sym); } } diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s index d18986c9d91c0d..fa784a34e16a00 100644 --- a/lld/test/MachO/symtab.s +++ b/lld/test/MachO/symtab.s @@ -86,17 +86,19 @@ # CHECK-NEXT: iundefsym: 5 # CHECK-NEXT: nundefsym: 2 -## Verify that the first entry in the StringTable is a space. +## Verify that the first entry in the StringTable is a space, and that +## unreferenced symbols aren't emitted. # RUN: obj2yaml %t/test | FileCheck %s --check-prefix=YAML # YAML: StringTable: # YAML-NEXT: ' ' +# YAML-NOT: _unreferenced #--- libfoo.s .globl _dynamic _dynamic: #--- test.s -.globl _main, _external, _external_weak +.globl _main, _external, _external_weak, _unreferenced .data _external: From 0d15d4b6f43a3355c1d618766c8e550cfe1481d0 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 22 Dec 2020 14:13:39 -0500 Subject: [PATCH 02/10] [SLP] use operand index abstraction for number of operands I think this is NFC currently, but the bug would be exposed when we allow binary intrinsics (maxnum, etc) as candidates for reductions. The code in matchAssociativeReduction() is using OperationData::getNumberOfOperands() when comparing whether the "EdgeToVisit" iterator is in-bounds, so this code must use the same (potentially offset) operand value to set the "EdgeToVisit". --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b03fb203c6d75b..baa8ce2638a0d1 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6772,7 +6772,8 @@ class HorizontalReduction { // in this case. // Do not perform analysis of remaining operands of ParentStackElem.first // instruction, this whole instruction is an extra argument. - ParentStackElem.second = ParentStackElem.first->getNumOperands(); + OperationData OpData = getOperationData(ParentStackElem.first); + ParentStackElem.second = OpData.getNumberOfOperands(); } else { // We ran into something like: // ParentStackElem.first += ... + ExtraArg + ... From f6929c01952b3f144df620544ed937e801b9c945 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 22 Dec 2020 14:54:09 -0500 Subject: [PATCH 03/10] [SLP] add reduction tests for maxnum/minnum intrinsics; NFC --- .../Transforms/SLPVectorizer/X86/fmaxnum.ll | 147 ++++++++++++++++++ .../Transforms/SLPVectorizer/X86/fminnum.ll | 147 ++++++++++++++++++ 2 files changed, 294 insertions(+) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll index e03f3f808a4ff2..23f2196b2425b3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll @@ -338,4 +338,151 @@ define void @fmaxnum_16f32() #0 { ret void } +define float @reduction_v4f32_fast(float* %p) { +; CHECK-LABEL: @reduction_v4f32_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT: [[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT: ret float [[M3]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0) + %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1) + %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2) + ret float %m3 +} + +define float @reduction_v4f32_nnan(float* %p) { +; CHECK-LABEL: @reduction_v4f32_nnan( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT: [[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT: ret float [[M3]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %m1 = tail call nnan float @llvm.maxnum.f32(float %t1, float %t0) + %m2 = tail call nnan float @llvm.maxnum.f32(float %t2, float %m1) + %m3 = tail call nnan float @llvm.maxnum.f32(float %t3, float %m2) + ret float %m3 +} + +define float @reduction_v8f32_fast(float* %p) { +; CHECK-LABEL: @reduction_v8f32_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, float* [[P]], i64 4 +; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, float* [[P]], i64 5 +; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, float* [[P]], i64 6 +; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, float* [[P]], i64 7 +; CHECK-NEXT: [[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT: [[T4:%.*]] = load float, float* [[G4]], align 4 +; CHECK-NEXT: [[T5:%.*]] = load float, float* [[G5]], align 4 +; CHECK-NEXT: [[T6:%.*]] = load float, float* [[G6]], align 4 +; CHECK-NEXT: [[T7:%.*]] = load float, float* [[G7]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT: [[M4:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T4]], float [[M3]]) +; CHECK-NEXT: [[M5:%.*]] = tail call fast float @llvm.maxnum.f32(float [[M4]], float [[T6]]) +; CHECK-NEXT: [[M6:%.*]] = tail call fast float @llvm.maxnum.f32(float [[M5]], float [[T5]]) +; CHECK-NEXT: [[M7:%.*]] = tail call fast float @llvm.maxnum.f32(float [[M6]], float [[T7]]) +; CHECK-NEXT: ret float [[M7]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %g4 = getelementptr inbounds float, float* %p, i64 4 + %g5 = getelementptr inbounds float, float* %p, i64 5 + %g6 = getelementptr inbounds float, float* %p, i64 6 + %g7 = getelementptr inbounds float, float* %p, i64 7 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %t4 = load float, float* %g4, align 4 + %t5 = load float, float* %g5, align 4 + %t6 = load float, float* %g6, align 4 + %t7 = load float, float* %g7, align 4 + %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0) + %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1) + %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2) + %m4 = tail call fast float @llvm.maxnum.f32(float %t4, float %m3) + %m5 = tail call fast float @llvm.maxnum.f32(float %m4, float %t6) + %m6 = tail call fast float @llvm.maxnum.f32(float %m5, float %t5) + %m7 = tail call fast float @llvm.maxnum.f32(float %m6, float %t7) + ret float %m7 +} + +define double @reduction_v2f64_fast(double* %p) { +; CHECK-LABEL: @reduction_v2f64_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1 +; CHECK-NEXT: [[T0:%.*]] = load double, double* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load double, double* [[G1]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast double @llvm.maxnum.f64(double [[T1]], double [[T0]]) +; CHECK-NEXT: ret double [[M1]] +; + %g1 = getelementptr inbounds double, double* %p, i64 1 + %t0 = load double, double* %p, align 4 + %t1 = load double, double* %g1, align 4 + %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0) + ret double %m1 +} + +define double @reduction_v4f64_fast(double* %p) { +; CHECK-LABEL: @reduction_v4f64_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3 +; CHECK-NEXT: [[T0:%.*]] = load double, double* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load double, double* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load double, double* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load double, double* [[G3]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast double @llvm.maxnum.f64(double [[T1]], double [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call fast double @llvm.maxnum.f64(double [[T2]], double [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call fast double @llvm.maxnum.f64(double [[T3]], double [[M2]]) +; CHECK-NEXT: ret double [[M3]] +; + %g1 = getelementptr inbounds double, double* %p, i64 1 + %g2 = getelementptr inbounds double, double* %p, i64 2 + %g3 = getelementptr inbounds double, double* %p, i64 3 + %t0 = load double, double* %p, align 4 + %t1 = load double, double* %g1, align 4 + %t2 = load double, double* %g2, align 4 + %t3 = load double, double* %g3, align 4 + %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0) + %m2 = tail call fast double @llvm.maxnum.f64(double %t2, double %m1) + %m3 = tail call fast double @llvm.maxnum.f64(double %t3, double %m2) + ret double %m3 +} + attributes #0 = { nounwind } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll index b830d826c2e960..81bcfb2f1e9b74 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll @@ -338,4 +338,151 @@ define void @fminnum_16f32() #0 { ret void } +define float @reduction_v4f32_fast(float* %p) { +; CHECK-LABEL: @reduction_v4f32_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT: [[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast float @llvm.minnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call fast float @llvm.minnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call fast float @llvm.minnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT: ret float [[M3]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %m1 = tail call fast float @llvm.minnum.f32(float %t1, float %t0) + %m2 = tail call fast float @llvm.minnum.f32(float %t2, float %m1) + %m3 = tail call fast float @llvm.minnum.f32(float %t3, float %m2) + ret float %m3 +} + +define float @reduction_v4f32_nnan(float* %p) { +; CHECK-LABEL: @reduction_v4f32_nnan( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT: [[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT: ret float [[M3]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %m1 = tail call nnan float @llvm.minnum.f32(float %t1, float %t0) + %m2 = tail call nnan float @llvm.minnum.f32(float %t2, float %m1) + %m3 = tail call nnan float @llvm.minnum.f32(float %t3, float %m2) + ret float %m3 +} + +define float @reduction_v8f32_fast(float* %p) { +; CHECK-LABEL: @reduction_v8f32_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, float* [[P]], i64 4 +; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, float* [[P]], i64 5 +; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, float* [[P]], i64 6 +; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, float* [[P]], i64 7 +; CHECK-NEXT: [[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT: [[T4:%.*]] = load float, float* [[G4]], align 4 +; CHECK-NEXT: [[T5:%.*]] = load float, float* [[G5]], align 4 +; CHECK-NEXT: [[T6:%.*]] = load float, float* [[G6]], align 4 +; CHECK-NEXT: [[T7:%.*]] = load float, float* [[G7]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast float @llvm.minnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call fast float @llvm.minnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call fast float @llvm.minnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT: [[M4:%.*]] = tail call fast float @llvm.minnum.f32(float [[T4]], float [[M3]]) +; CHECK-NEXT: [[M5:%.*]] = tail call fast float @llvm.minnum.f32(float [[M4]], float [[T6]]) +; CHECK-NEXT: [[M6:%.*]] = tail call fast float @llvm.minnum.f32(float [[M5]], float [[T5]]) +; CHECK-NEXT: [[M7:%.*]] = tail call fast float @llvm.minnum.f32(float [[M6]], float [[T7]]) +; CHECK-NEXT: ret float [[M7]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %g4 = getelementptr inbounds float, float* %p, i64 4 + %g5 = getelementptr inbounds float, float* %p, i64 5 + %g6 = getelementptr inbounds float, float* %p, i64 6 + %g7 = getelementptr inbounds float, float* %p, i64 7 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %t4 = load float, float* %g4, align 4 + %t5 = load float, float* %g5, align 4 + %t6 = load float, float* %g6, align 4 + %t7 = load float, float* %g7, align 4 + %m1 = tail call fast float @llvm.minnum.f32(float %t1, float %t0) + %m2 = tail call fast float @llvm.minnum.f32(float %t2, float %m1) + %m3 = tail call fast float @llvm.minnum.f32(float %t3, float %m2) + %m4 = tail call fast float @llvm.minnum.f32(float %t4, float %m3) + %m5 = tail call fast float @llvm.minnum.f32(float %m4, float %t6) + %m6 = tail call fast float @llvm.minnum.f32(float %m5, float %t5) + %m7 = tail call fast float @llvm.minnum.f32(float %m6, float %t7) + ret float %m7 +} + +define double @reduction_v2f64_fast(double* %p) { +; CHECK-LABEL: @reduction_v2f64_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1 +; CHECK-NEXT: [[T0:%.*]] = load double, double* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load double, double* [[G1]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast double @llvm.minnum.f64(double [[T1]], double [[T0]]) +; CHECK-NEXT: ret double [[M1]] +; + %g1 = getelementptr inbounds double, double* %p, i64 1 + %t0 = load double, double* %p, align 4 + %t1 = load double, double* %g1, align 4 + %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0) + ret double %m1 +} + +define double @reduction_v4f64_fast(double* %p) { +; CHECK-LABEL: @reduction_v4f64_fast( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3 +; CHECK-NEXT: [[T0:%.*]] = load double, double* [[P]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load double, double* [[G1]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load double, double* [[G2]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load double, double* [[G3]], align 4 +; CHECK-NEXT: [[M1:%.*]] = tail call fast double @llvm.minnum.f64(double [[T1]], double [[T0]]) +; CHECK-NEXT: [[M2:%.*]] = tail call fast double @llvm.minnum.f64(double [[T2]], double [[M1]]) +; CHECK-NEXT: [[M3:%.*]] = tail call fast double @llvm.minnum.f64(double [[T3]], double [[M2]]) +; CHECK-NEXT: ret double [[M3]] +; + %g1 = getelementptr inbounds double, double* %p, i64 1 + %g2 = getelementptr inbounds double, double* %p, i64 2 + %g3 = getelementptr inbounds double, double* %p, i64 3 + %t0 = load double, double* %p, align 4 + %t1 = load double, double* %g1, align 4 + %t2 = load double, double* %g2, align 4 + %t3 = load double, double* %g3, align 4 + %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0) + %m2 = tail call fast double @llvm.minnum.f64(double %t2, double %m1) + %m3 = tail call fast double @llvm.minnum.f64(double %t3, double %m2) + ret double %m3 +} + attributes #0 = { nounwind } From 3dbe471a260392ec63dda8deb2709160afc56dde Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 22 Dec 2020 21:36:41 +0100 Subject: [PATCH 04/10] [clangd] Use atomics instead of locks to track periodic memory trimming Instead of always locking/unlocking a contended mutex, we now do one atomic read in the common case, and one read + one exchange if the timer has expried. Also use this for memory profiling which has similar/compatible requirements. Differential Revision: https://reviews.llvm.org/D93726 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 37 ++++--------------- clang-tools-extra/clangd/ClangdLSPServer.h | 11 ++---- .../clangd/support/Threading.cpp | 12 ++++++ clang-tools-extra/clangd/support/Threading.h | 29 +++++++++++++++ .../unittests/support/ThreadingTests.cpp | 21 +++++++++++ 5 files changed, 73 insertions(+), 37 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 0c42f95fb59474..c606ccae4fdc0d 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -1285,13 +1285,7 @@ void ClangdLSPServer::publishDiagnostics( } void ClangdLSPServer::maybeExportMemoryProfile() { - if (!trace::enabled()) - return; - // Profiling might be expensive, so we throttle it to happen once every 5 - // minutes. - static constexpr auto ProfileInterval = std::chrono::minutes(5); - auto Now = std::chrono::steady_clock::now(); - if (Now < NextProfileTime) + if (!trace::enabled() || !ShouldProfile()) return; static constexpr trace::Metric MemoryUsage( @@ -1300,27 +1294,11 @@ void ClangdLSPServer::maybeExportMemoryProfile() { MemoryTree MT; profile(MT); record(MT, "clangd_lsp_server", MemoryUsage); - NextProfileTime = Now + ProfileInterval; } void ClangdLSPServer::maybeCleanupMemory() { - // Memory cleanup is probably expensive, throttle it - static constexpr auto MemoryCleanupInterval = std::chrono::minutes(1); - - if (!Opts.MemoryCleanup) + if (!Opts.MemoryCleanup || !ShouldCleanupMemory()) return; - - // FIXME: this can probably be done without a mutex - // and the logic could be shared with maybeExportMemoryProfile - { - auto Now = std::chrono::steady_clock::now(); - std::lock_guard Lock(NextMemoryCleanupTimeMutex); - if (Now < NextMemoryCleanupTime) - return; - NextMemoryCleanupTime = Now + MemoryCleanupInterval; - } - - vlog("Calling memory cleanup callback"); Opts.MemoryCleanup(); } @@ -1481,10 +1459,15 @@ void ClangdLSPServer::onAST(const ASTParams &Params, ClangdLSPServer::ClangdLSPServer(class Transport &Transp, const ThreadsafeFS &TFS, const ClangdLSPServer::Options &Opts) - : BackgroundContext(Context::current().clone()), Transp(Transp), + : ShouldProfile(/*Period=*/std::chrono::minutes(5), + /*Delay=*/std::chrono::minutes(1)), + ShouldCleanupMemory(/*Period=*/std::chrono::minutes(1), + /*Delay=*/std::chrono::minutes(1)), + BackgroundContext(Context::current().clone()), Transp(Transp), MsgHandler(new MessageHandler(*this)), TFS(TFS), SupportedSymbolKinds(defaultSymbolKinds()), SupportedCompletionItemKinds(defaultCompletionItemKinds()), Opts(Opts) { + // clang-format off MsgHandler->bind("initialize", &ClangdLSPServer::onInitialize); MsgHandler->bind("initialized", &ClangdLSPServer::onInitialized); @@ -1529,10 +1512,6 @@ ClangdLSPServer::ClangdLSPServer(class Transport &Transp, if (Opts.FoldingRanges) MsgHandler->bind("textDocument/foldingRange", &ClangdLSPServer::onFoldingRange); // clang-format on - - // Delay first profile and memory cleanup until we've finished warming up. - NextMemoryCleanupTime = NextProfileTime = - std::chrono::steady_clock::now() + std::chrono::minutes(1); } ClangdLSPServer::~ClangdLSPServer() { diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h index b5f9d2c9d766ab..a41bc5666af333 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.h +++ b/clang-tools-extra/clangd/ClangdLSPServer.h @@ -19,6 +19,7 @@ #include "support/Context.h" #include "support/MemoryTree.h" #include "support/Path.h" +#include "support/Threading.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringSet.h" @@ -186,18 +187,12 @@ class ClangdLSPServer : private ClangdServer::Callbacks { /// Runs profiling and exports memory usage metrics if tracing is enabled and /// profiling hasn't happened recently. void maybeExportMemoryProfile(); + PeriodicThrottler ShouldProfile; /// Run the MemoryCleanup callback if it's time. /// This method is thread safe. void maybeCleanupMemory(); - - /// Timepoint until which profiling is off. It is used to throttle profiling - /// requests. - std::chrono::steady_clock::time_point NextProfileTime; - - /// Next time we want to call the MemoryCleanup callback. - std::mutex NextMemoryCleanupTimeMutex; - std::chrono::steady_clock::time_point NextMemoryCleanupTime; + PeriodicThrottler ShouldCleanupMemory; /// Since initialization of CDBs and ClangdServer is done lazily, the /// following context captures the one used while creating ClangdLSPServer and diff --git a/clang-tools-extra/clangd/support/Threading.cpp b/clang-tools-extra/clangd/support/Threading.cpp index 5f95888ae3e2d7..7f3bd62be306ce 100644 --- a/clang-tools-extra/clangd/support/Threading.cpp +++ b/clang-tools-extra/clangd/support/Threading.cpp @@ -116,5 +116,17 @@ void wait(std::unique_lock &Lock, std::condition_variable &CV, CV.wait_until(Lock, D.time()); } +bool PeriodicThrottler::operator()() { + Rep Now = Stopwatch::now().time_since_epoch().count(); + Rep OldNext = Next.load(std::memory_order_acquire); + if (Now < OldNext) + return false; + // We're ready to run (but may be racing other threads). + // Work out the updated target time, and run if we successfully bump it. + Rep NewNext = Now + Period; + return Next.compare_exchange_strong(OldNext, NewNext, + std::memory_order_acq_rel); +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/support/Threading.h b/clang-tools-extra/clangd/support/Threading.h index 5155ac193fd18a..da9e3b8ea8b688 100644 --- a/clang-tools-extra/clangd/support/Threading.h +++ b/clang-tools-extra/clangd/support/Threading.h @@ -169,6 +169,35 @@ template class Memoize { } }; +/// Used to guard an operation that should run at most every N seconds. +/// +/// Usage: +/// mutable PeriodicThrottler ShouldLog(std::chrono::seconds(1)); +/// void calledFrequently() { +/// if (ShouldLog()) +/// log("this is not spammy"); +/// } +/// +/// This class is threadsafe. If multiple threads are involved, then the guarded +/// operation still needs to be threadsafe! +class PeriodicThrottler { + using Stopwatch = std::chrono::steady_clock; + using Rep = Stopwatch::duration::rep; + + Rep Period; + std::atomic Next; + +public: + /// If Period is zero, the throttler will return true every time. + PeriodicThrottler(Stopwatch::duration Period, Stopwatch::duration Delay = {}) + : Period(Period.count()), + Next((Stopwatch::now() + Delay).time_since_epoch().count()) {} + + /// Returns whether the operation should run at this time. + /// operator() is safe to call concurrently. + bool operator()(); +}; + } // namespace clangd } // namespace clang #endif diff --git a/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp b/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp index e265ad2eabeaea..87002d3cfa86a4 100644 --- a/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp +++ b/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp @@ -10,6 +10,7 @@ #include "llvm/ADT/DenseMap.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include #include namespace clang { @@ -121,5 +122,25 @@ TEST_F(ThreadingTest, MemoizeDeterministic) { ASSERT_THAT(ValueA.load(), testing::AnyOf('A', 'B')); } +// It's hard to write a real test of this class, std::chrono is awkward to mock. +// But test some degenerate cases at least. +TEST(PeriodicThrottlerTest, Minimal) { + PeriodicThrottler Once(std::chrono::hours(24)); + EXPECT_TRUE(Once()); + EXPECT_FALSE(Once()); + EXPECT_FALSE(Once()); + + PeriodicThrottler Later(std::chrono::hours(24), + /*Delay=*/std::chrono::hours(24)); + EXPECT_FALSE(Later()); + EXPECT_FALSE(Later()); + EXPECT_FALSE(Later()); + + PeriodicThrottler Always(std::chrono::seconds(0)); + EXPECT_TRUE(Always()); + EXPECT_TRUE(Always()); + EXPECT_TRUE(Always()); +} + } // namespace clangd } // namespace clang From df6cbd37f57fd330e413c394a4653ea55393fcef Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Tue, 22 Dec 2020 17:42:59 +0100 Subject: [PATCH 05/10] [mlir] Lower gpu.memcpy to GPU runtime calls. Reviewed By: herhut Differential Revision: https://reviews.llvm.org/D93204 --- .../ConvertLaunchFuncToRuntimeCalls.cpp | 65 +++++++++++++++++++ .../lower-memcpy-to-gpu-runtime-calls.mlir | 19 ++++++ .../cuda-runtime-wrappers.cpp | 7 ++ .../rocm-runtime-wrappers.cpp | 5 ++ 4 files changed, 96 insertions(+) create mode 100644 mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp index 3b4b39e57d557c..41a079c44eea58 100644 --- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp +++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp @@ -151,6 +151,12 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern { "mgpuMemFree", llvmVoidType, {llvmPointerType /* void *ptr */, llvmPointerType /* void *stream */}}; + FunctionCallBuilder memcpyCallBuilder = { + "mgpuMemcpy", + llvmVoidType, + {llvmPointerType /* void *dst */, llvmPointerType /* void *src */, + llvmIntPtrType /* intptr_t sizeBytes */, + llvmPointerType /* void *stream */}}; }; /// A rewrite pattern to convert gpu.host_register operations into a GPU runtime @@ -268,6 +274,20 @@ class EraseGpuModuleOpPattern : public OpRewritePattern { return success(); } }; + +/// A rewrite pattern to convert gpu.memcpy operations into a GPU runtime +/// call. Currently it supports CUDA and ROCm (HIP). +class ConvertMemcpyOpToGpuRuntimeCallPattern + : public ConvertOpToGpuRuntimeCallPattern { +public: + ConvertMemcpyOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter) + : ConvertOpToGpuRuntimeCallPattern(typeConverter) {} + +private: + LogicalResult + matchAndRewrite(gpu::MemcpyOp memcpyOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; } // namespace void GpuToLLVMConversionPass::runOnOperation() { @@ -643,6 +663,50 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite( return success(); } +LogicalResult ConvertMemcpyOpToGpuRuntimeCallPattern::matchAndRewrite( + gpu::MemcpyOp memcpyOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + auto memRefType = memcpyOp.src().getType().cast(); + + if (failed(areAllLLVMTypes(memcpyOp, operands, rewriter)) || + !isSupportedMemRefType(memRefType) || + failed(isAsyncWithOneDependency(rewriter, memcpyOp))) + return failure(); + + auto loc = memcpyOp.getLoc(); + auto adaptor = gpu::MemcpyOpAdaptor(operands, memcpyOp->getAttrDictionary()); + + MemRefDescriptor srcDesc(adaptor.src()); + + Value numElements = + memRefType.hasStaticShape() + ? createIndexConstant(rewriter, loc, memRefType.getNumElements()) + // For identity layouts (verified above), the number of elements is + // stride[0] * size[0]. + : rewriter.create(loc, srcDesc.stride(rewriter, loc, 0), + srcDesc.size(rewriter, loc, 0)); + + Type elementPtrType = getElementPtrType(memRefType); + Value nullPtr = rewriter.create(loc, elementPtrType); + Value gepPtr = rewriter.create( + loc, elementPtrType, ArrayRef{nullPtr, numElements}); + auto sizeBytes = + rewriter.create(loc, getIndexType(), gepPtr); + + auto src = rewriter.create( + loc, llvmPointerType, srcDesc.alignedPtr(rewriter, loc)); + auto dst = rewriter.create( + loc, llvmPointerType, + MemRefDescriptor(adaptor.dst()).alignedPtr(rewriter, loc)); + + auto stream = adaptor.asyncDependencies().front(); + memcpyCallBuilder.create(loc, rewriter, {dst, src, sizeBytes, stream}); + + rewriter.replaceOp(memcpyOp, {stream}); + + return success(); +} + std::unique_ptr> mlir::createGpuToLLVMConversionPass(StringRef gpuBinaryAnnotation) { return std::make_unique(gpuBinaryAnnotation); @@ -658,6 +722,7 @@ void mlir::populateGpuToLLVMConversionPatterns( patterns.insert(converter); patterns.insert( diff --git a/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir new file mode 100644 index 00000000000000..790c92f92ec96f --- /dev/null +++ b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir @@ -0,0 +1,19 @@ +// RUN: mlir-opt -allow-unregistered-dialect %s --gpu-to-llvm | FileCheck %s + +module attributes {gpu.container_module} { + + // CHECK: func @foo + func @foo(%dst : memref<7xf32, 1>, %src : memref<7xf32>) { + // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate + %t0 = gpu.wait async + // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint + // CHECK: %[[src:.*]] = llvm.bitcast + // CHECK: %[[dst:.*]] = llvm.bitcast + // CHECK: llvm.call @mgpuMemcpy(%[[dst]], %[[src]], %[[size_bytes]], %[[t0]]) + %t1 = gpu.memcpy async [%t0] %dst, %src : memref<7xf32, 1>, memref<7xf32> + // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]]) + // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]]) + gpu.wait [%t1] + return + } +} diff --git a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp index a6729b1c0b7d1d..72d172889d3019 100644 --- a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp +++ b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp @@ -117,6 +117,13 @@ extern "C" void mgpuMemFree(void *ptr, CUstream /*stream*/) { CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast(ptr))); } +extern "C" void mgpuMemcpy(void *dst, void *src, uint64_t sizeBytes, + CUstream stream) { + CUDA_REPORT_IF_ERROR(cuMemcpyAsync(reinterpret_cast(dst), + reinterpret_cast(src), + sizeBytes, stream)); +} + /// Helper functions for writing mlir example code // Allows to register byte array with the CUDA runtime. Helpful until we have diff --git a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp b/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp index aad7ae27ff8924..4f62f204f4a837 100644 --- a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp +++ b/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp @@ -118,6 +118,11 @@ extern "C" void mgpuMemFree(void *ptr, hipStream_t /*stream*/) { HIP_REPORT_IF_ERROR(hipMemFree(ptr)); } +extern "C" void mgpuMemcpy(void *dst, void *src, uint64_t sizeBytes, + hipStream_t stream) { + HIP_REPORT_IF_ERROR(hipMemcpyAsync(dst, src, sizeBytes, stream)); +} + /// Helper functions for writing mlir example code // Allows to register byte array with the ROCM runtime. Helpful until we have From f7a26127f21fb1ca8252879ca647835ea7c5903d Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 22 Dec 2020 22:58:39 +0100 Subject: [PATCH 06/10] [clangd] Release notes for b8c37153d5393aad96 --- clang-tools-extra/docs/ReleaseNotes.rst | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 450b80fd45814e..2960aad5a5569a 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -47,7 +47,17 @@ Major New Features Improvements to clangd ---------------------- -The improvements are... +- clangd's memory usage is significantly reduced on most Linux systems. + In particular, memory usage should not increase dramatically over time. + + The standard allocator on most systems is glibc's ptmalloc2, and it creates + disproportionately large heaps when handling clangd's allocation patterns. + By default, clangd will now periodically call ``malloc_trim`` to release free + pages on glibc systems. + + Users of other allocators (such as ``jemalloc`` or ``tcmalloc``) on glibc + systems can disable this using ``--malloc_trim=0`` or the CMake flag + ``-DCLANGD_MALLOC_TRIM=0``. Improvements to clang-doc ------------------------- From a781a706b961a348006b604cdff8b555e62a2fcb Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Tue, 22 Dec 2020 14:29:06 -0800 Subject: [PATCH 07/10] [WebAssembly][SIMD] Rename shuffle, swizzle, and load_splats These instructions previously used prefixes like v8x16 to signify that they were agnostic between float and int interpretations. We renamed these instructions to remove this form of prefix in https://github.com/WebAssembly/simd/issues/297 and https://github.com/WebAssembly/simd/issues/316 and this commit brings the names in LLVM up to date. Differential Revision: https://reviews.llvm.org/D93722 --- .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 8 +- .../WebAssembly/WebAssemblyInstrSIMD.td | 46 ++++----- .../CodeGen/WebAssembly/simd-build-vector.ll | 6 +- .../CodeGen/WebAssembly/simd-intrinsics.ll | 10 +- .../CodeGen/WebAssembly/simd-load-splat.ll | 2 +- .../WebAssembly/simd-load-store-alignment.ll | 36 +++---- .../WebAssembly/simd-nested-shuffles.ll | 2 +- llvm/test/CodeGen/WebAssembly/simd-offset.ll | 96 +++++++++---------- .../WebAssembly/simd-shift-complex-splats.ll | 2 +- .../WebAssembly/simd-shuffle-bitcast.ll | 2 +- llvm/test/CodeGen/WebAssembly/simd.ll | 48 +++++----- .../test/MC/Disassembler/WebAssembly/wasm.txt | 2 +- llvm/test/MC/WebAssembly/simd-encodings.s | 24 ++--- 13 files changed, 142 insertions(+), 142 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index fccee4b96ed513..4bc77aa68668bc 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -194,7 +194,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW8_U_XCHG_I64) WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I32) WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I64) - WASM_LOAD_STORE(LOAD_SPLAT_v8x16) + WASM_LOAD_STORE(LOAD8_SPLAT) WASM_LOAD_STORE(LOAD_LANE_v16i8) WASM_LOAD_STORE(STORE_LANE_v16i8) return 0; @@ -222,7 +222,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW16_U_XCHG_I64) WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I32) WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I64) - WASM_LOAD_STORE(LOAD_SPLAT_v16x8) + WASM_LOAD_STORE(LOAD16_SPLAT) WASM_LOAD_STORE(LOAD_LANE_v8i16) WASM_LOAD_STORE(STORE_LANE_v8i16) return 1; @@ -253,7 +253,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW32_U_CMPXCHG_I64) WASM_LOAD_STORE(MEMORY_ATOMIC_NOTIFY) WASM_LOAD_STORE(MEMORY_ATOMIC_WAIT32) - WASM_LOAD_STORE(LOAD_SPLAT_v32x4) + WASM_LOAD_STORE(LOAD32_SPLAT) WASM_LOAD_STORE(LOAD_ZERO_v4i32) WASM_LOAD_STORE(LOAD_LANE_v4i32) WASM_LOAD_STORE(STORE_LANE_v4i32) @@ -272,7 +272,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW_XCHG_I64) WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I64) WASM_LOAD_STORE(MEMORY_ATOMIC_WAIT64) - WASM_LOAD_STORE(LOAD_SPLAT_v64x2) + WASM_LOAD_STORE(LOAD64_SPLAT) WASM_LOAD_STORE(LOAD_EXTEND_S_v8i16) WASM_LOAD_STORE(LOAD_EXTEND_U_v8i16) WASM_LOAD_STORE(LOAD_EXTEND_S_v4i32) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 191cdea0c0aeee..e48bbaebd47e92 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -64,55 +64,55 @@ defm : LoadPatOffsetOnly; defm : LoadPatGlobalAddrOffOnly; } -// vNxM.load_splat -multiclass SIMDLoadSplat simdop> { +// v128.loadX_splat +multiclass SIMDLoadSplat simdop> { let mayLoad = 1, UseNamedOperandTable = 1 in { - defm LOAD_SPLAT_#vec#_A32 : + defm LOAD#size#_SPLAT_A32 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - vec#".load_splat\t$dst, ${off}(${addr})$p2align", - vec#".load_splat\t$off$p2align", simdop>; - defm LOAD_SPLAT_#vec#_A64 : + "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align", + "v128.load"#size#"_splat\t$off$p2align", simdop>; + defm LOAD#size#_SPLAT_A64 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), (outs), (ins P2Align:$p2align, offset64_op:$off), [], - vec#".load_splat\t$dst, ${off}(${addr})$p2align", - vec#".load_splat\t$off$p2align", simdop>; + "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align", + "v128.load"#size#"_splat\t$off$p2align", simdop>; } } -defm "" : SIMDLoadSplat<"v8x16", 7>; -defm "" : SIMDLoadSplat<"v16x8", 8>; -defm "" : SIMDLoadSplat<"v32x4", 9>; -defm "" : SIMDLoadSplat<"v64x2", 10>; +defm "" : SIMDLoadSplat<8, 7>; +defm "" : SIMDLoadSplat<16, 8>; +defm "" : SIMDLoadSplat<32, 9>; +defm "" : SIMDLoadSplat<64, 10>; def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>; def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>; -foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"], - ["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in { +foreach args = [["v16i8", "8"], ["v8i16", "16"], ["v4i32", "32"], + ["v2i64", "64"], ["v4f32", "32"], ["v2f64", "64"]] in { defm : LoadPatNoOffset(args[0]), load_splat, - "LOAD_SPLAT_"#args[1]>; + "LOAD"#args[1]#"_SPLAT">; defm : LoadPatImmOff(args[0]), load_splat, regPlusImm, - "LOAD_SPLAT_"#args[1]>; + "LOAD"#args[1]#"_SPLAT">; defm : LoadPatImmOff(args[0]), load_splat, or_is_add, - "LOAD_SPLAT_"#args[1]>; + "LOAD"#args[1]#"_SPLAT">; defm : LoadPatOffsetOnly(args[0]), load_splat, - "LOAD_SPLAT_"#args[1]>; + "LOAD"#args[1]#"_SPLAT">; defm : LoadPatGlobalAddrOffOnly(args[0]), load_splat, - "LOAD_SPLAT_"#args[1]>; + "LOAD"#args[1]#"_SPLAT">; } // Load and extend @@ -401,10 +401,10 @@ defm SHUFFLE : vec_i8imm_op:$mC, vec_i8imm_op:$mD, vec_i8imm_op:$mE, vec_i8imm_op:$mF), [], - "v8x16.shuffle\t$dst, $x, $y, "# + "i8x16.shuffle\t$dst, $x, $y, "# "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", - "v8x16.shuffle\t"# + "i8x16.shuffle\t"# "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", 13>; @@ -433,14 +433,14 @@ def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y), (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>; } -// Swizzle lanes: v8x16.swizzle +// Swizzle lanes: i8x16.swizzle def wasm_swizzle_t : SDTypeProfile<1, 2, []>; def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>; defm SWIZZLE : SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), [(set (v16i8 V128:$dst), (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], - "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 14>; + "i8x16.swizzle\t$dst, $src, $mask", "i8x16.swizzle", 14>; def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)), (SWIZZLE V128:$src, V128:$mask)>; diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll index 4f758878737843..1360e0172d3fd2 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll @@ -178,7 +178,7 @@ define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) { ; CHECK-LABEL: swizzle_one_i8x16: ; CHECK-NEXT: .functype swizzle_one_i8x16 (v128, v128) -> (v128) -; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 +; CHECK-NEXT: i8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 ; CHECK-NEXT: return $pop[[L0]] define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) { %m0 = extractelement <16 x i8> %mask, i32 0 @@ -189,7 +189,7 @@ define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) { ; CHECK-LABEL: swizzle_all_i8x16: ; CHECK-NEXT: .functype swizzle_all_i8x16 (v128, v128) -> (v128) -; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 +; CHECK-NEXT: i8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 ; CHECK-NEXT: return $pop[[L0]] define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) { %m0 = extractelement <16 x i8> %mask, i32 0 @@ -256,7 +256,7 @@ define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) { ; CHECK-LABEL: mashup_swizzle_i8x16: ; CHECK-NEXT: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128) -; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 +; CHECK-NEXT: i8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 ; CHECK: i8x16.replace_lane ; CHECK: i8x16.replace_lane ; CHECK: i8x16.replace_lane diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index 23a7bfbde927ae..da7343770de77c 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -13,7 +13,7 @@ target triple = "wasm32-unknown-unknown" ; ============================================================================== ; CHECK-LABEL: swizzle_v16i8: ; SIMD128-NEXT: .functype swizzle_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.swizzle $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: i8x16.swizzle $push[[R:[0-9]+]]=, $0, $1{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} declare <16 x i8> @llvm.wasm.swizzle(<16 x i8>, <16 x i8>) define <16 x i8> @swizzle_v16i8(<16 x i8> %x, <16 x i8> %y) { @@ -164,9 +164,9 @@ define <16 x i8> @narrow_unsigned_v16i8(<8 x i16> %low, <8 x i16> %high) { } ; CHECK-LABEL: shuffle_v16i8: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} declare <16 x i8> @llvm.wasm.shuffle( @@ -180,9 +180,9 @@ define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) { } ; CHECK-LABEL: shuffle_undef_v16i8: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) { diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll b/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll index 4e693c285a3faa..3d08a586edb5a0 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll @@ -9,7 +9,7 @@ target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: load_splat: ; CHECK-NEXT: .functype load_splat (i32, i32) -> (i32) ; CHECK-NEXT: i32.load8_u $[[E:[0-9]+]]=, 0($0){{$}} -; CHECK-NEXT: v8x16.load_splat $push[[V:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load8_splat $push[[V:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: v128.store 0($1), $pop[[V]]{{$}} ; CHECK-NEXT: return $[[E]]{{$}} define i8 @load_splat(i8* %p, <16 x i8>* %out) { diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll b/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll index 8ebeb15ccc9a25..000b7730e3bf2a 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll @@ -89,11 +89,11 @@ define void @store_v16i8_a32(<16 x i8> *%p, <16 x i8> %v) { ret void } -; 1 is the default alignment for v8x16.load_splat so no attribute is needed. +; 1 is the default alignment for v128.load8_splat so no attribute is needed. ; CHECK-LABEL: load_splat_v16i8_a1: ; CHECK-NEXT: .functype load_splat_v16i8_a1 (i32) -> (v128){{$}} -; CHECK-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load8_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_splat_v16i8_a1(i8* %p) { %e = load i8, i8* %p, align 1 @@ -106,7 +106,7 @@ define <16 x i8> @load_splat_v16i8_a1(i8* %p) { ; CHECK-LABEL: load_splat_v16i8_a2: ; CHECK-NEXT: .functype load_splat_v16i8_a2 (i32) -> (v128){{$}} -; CHECK-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load8_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_splat_v16i8_a2(i8* %p) { %e = load i8, i8* %p, align 2 @@ -304,7 +304,7 @@ define <8 x i16> @load_sext_v8i16_a16(<8 x i8>* %p) { ; CHECK-LABEL: load_splat_v8i16_a1: ; CHECK-NEXT: .functype load_splat_v8i16_a1 (i32) -> (v128){{$}} -; CHECK-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} +; CHECK-NEXT: v128.load16_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_splat_v8i16_a1(i16* %p) { %e = load i16, i16* %p, align 1 @@ -313,11 +313,11 @@ define <8 x i16> @load_splat_v8i16_a1(i16* %p) { ret <8 x i16> %v2 } -; 2 is the default alignment for v16x8.load_splat so no attribute is needed. +; 2 is the default alignment for v128.load16_splat so no attribute is needed. ; CHECK-LABEL: load_splat_v8i16_a2: ; CHECK-NEXT: .functype load_splat_v8i16_a2 (i32) -> (v128){{$}} -; CHECK-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load16_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_splat_v8i16_a2(i16* %p) { %e = load i16, i16* %p, align 2 @@ -330,7 +330,7 @@ define <8 x i16> @load_splat_v8i16_a2(i16* %p) { ; CHECK-LABEL: load_splat_v8i16_a4: ; CHECK-NEXT: .functype load_splat_v8i16_a4 (i32) -> (v128){{$}} -; CHECK-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load16_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_splat_v8i16_a4(i16* %p) { %e = load i16, i16* %p, align 4 @@ -528,7 +528,7 @@ define <4 x i32> @load_sext_v4i32_a16(<4 x i16>* %p) { ; CHECK-LABEL: load_splat_v4i32_a1: ; CHECK-NEXT: .functype load_splat_v4i32_a1 (i32) -> (v128){{$}} -; CHECK-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} +; CHECK-NEXT: v128.load32_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_splat_v4i32_a1(i32* %addr) { %e = load i32, i32* %addr, align 1 @@ -539,7 +539,7 @@ define <4 x i32> @load_splat_v4i32_a1(i32* %addr) { ; CHECK-LABEL: load_splat_v4i32_a2: ; CHECK-NEXT: .functype load_splat_v4i32_a2 (i32) -> (v128){{$}} -; CHECK-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=1{{$}} +; CHECK-NEXT: v128.load32_splat $push[[R:[0-9]+]]=, 0($0):p2align=1{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_splat_v4i32_a2(i32* %addr) { %e = load i32, i32* %addr, align 2 @@ -548,11 +548,11 @@ define <4 x i32> @load_splat_v4i32_a2(i32* %addr) { ret <4 x i32> %v2 } -; 4 is the default alignment for v32x4.load_splat so no attribute is needed. +; 4 is the default alignment for v128.load32_splat so no attribute is needed. ; CHECK-LABEL: load_splat_v4i32_a4: ; CHECK-NEXT: .functype load_splat_v4i32_a4 (i32) -> (v128){{$}} -; CHECK-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load32_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_splat_v4i32_a4(i32* %addr) { %e = load i32, i32* %addr, align 4 @@ -565,7 +565,7 @@ define <4 x i32> @load_splat_v4i32_a4(i32* %addr) { ; CHECK-LABEL: load_splat_v4i32_a8: ; CHECK-NEXT: .functype load_splat_v4i32_a8 (i32) -> (v128){{$}} -; CHECK-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load32_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_splat_v4i32_a8(i32* %addr) { %e = load i32, i32* %addr, align 8 @@ -660,7 +660,7 @@ define void @store_v2i64_a32(<2 x i64> *%p, <2 x i64> %v) { ; CHECK-LABEL: load_splat_v2i64_a1: ; CHECK-NEXT: .functype load_splat_v2i64_a1 (i32) -> (v128){{$}} -; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} +; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_a1(i64* %p) { %e = load i64, i64* %p, align 1 @@ -671,7 +671,7 @@ define <2 x i64> @load_splat_v2i64_a1(i64* %p) { ; CHECK-LABEL: load_splat_v2i64_a2: ; CHECK-NEXT: .functype load_splat_v2i64_a2 (i32) -> (v128){{$}} -; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=1{{$}} +; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0):p2align=1{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_a2(i64* %p) { %e = load i64, i64* %p, align 2 @@ -682,7 +682,7 @@ define <2 x i64> @load_splat_v2i64_a2(i64* %p) { ; CHECK-LABEL: load_splat_v2i64_a4: ; CHECK-NEXT: .functype load_splat_v2i64_a4 (i32) -> (v128){{$}} -; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0):p2align=2{{$}} +; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0):p2align=2{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_a4(i64* %p) { %e = load i64, i64* %p, align 4 @@ -691,11 +691,11 @@ define <2 x i64> @load_splat_v2i64_a4(i64* %p) { ret <2 x i64> %v2 } -; 8 is the default alignment for v64x2.load_splat so no attribute is needed. +; 8 is the default alignment for v128.load64_splat so no attribute is needed. ; CHECK-LABEL: load_splat_v2i64_a8: ; CHECK-NEXT: .functype load_splat_v2i64_a8 (i32) -> (v128){{$}} -; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_a8(i64* %p) { %e = load i64, i64* %p, align 8 @@ -708,7 +708,7 @@ define <2 x i64> @load_splat_v2i64_a8(i64* %p) { ; CHECK-LABEL: load_splat_v2i64_a16: ; CHECK-NEXT: .functype load_splat_v2i64_a16 (i32) -> (v128){{$}} -; CHECK-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load64_splat $push[[R:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_a16(i64* %p) { %e = load i64, i64* %p, align 16 diff --git a/llvm/test/CodeGen/WebAssembly/simd-nested-shuffles.ll b/llvm/test/CodeGen/WebAssembly/simd-nested-shuffles.ll index 597ab58e879e00..b72086a2d6cb91 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-nested-shuffles.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-nested-shuffles.ll @@ -6,7 +6,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -; CHECK: v8x16.shuffle +; CHECK: i8x16.shuffle define <4 x i32> @foo(<4 x i32> %x) { %1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> diff --git a/llvm/test/CodeGen/WebAssembly/simd-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-offset.ll index b2d32936df1305..fb41653a514a02 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-offset.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-offset.ll @@ -25,7 +25,7 @@ define <16 x i8> @load_splat_v16i8(i8* %p) { ; CHECK: .functype load_splat_v16i8 (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v8x16.load_splat 0 +; CHECK-NEXT: v128.load8_splat 0 ; CHECK-NEXT: # fallthrough-return %e = load i8, i8* %p %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 @@ -52,7 +52,7 @@ define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) { ; CHECK: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v8x16.load_splat 16 +; CHECK-NEXT: v128.load8_splat 16 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i8* %p to i32 %r = add nuw i32 %q, 16 @@ -80,7 +80,7 @@ define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) { ; CHECK: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v8x16.load_splat 1 +; CHECK-NEXT: v128.load8_splat 1 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i8, i8* %p, i32 1 %e = load i8, i8* %s @@ -110,7 +110,7 @@ define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -1 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v8x16.load_splat 0 +; CHECK-NEXT: v128.load8_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i8, i8* %p, i32 -1 %e = load i8, i8* %s @@ -142,7 +142,7 @@ define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v8x16.load_splat 0 +; CHECK-NEXT: v128.load8_splat 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i8* %p to i32 %r = add nsw i32 %q, 16 @@ -174,7 +174,7 @@ define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v8x16.load_splat 0 +; CHECK-NEXT: v128.load8_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr i8, i8* %p, i32 1 %e = load i8, i8* %s @@ -200,7 +200,7 @@ define <16 x i8> @load_splat_v16i8_from_numeric_address() { ; CHECK: .functype load_splat_v16i8_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v8x16.load_splat 32 +; CHECK-NEXT: v128.load8_splat 32 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to i8* %e = load i8, i8* %s @@ -227,7 +227,7 @@ define <16 x i8> @load_splat_v16i8_from_global_address() { ; CHECK: .functype load_splat_v16i8_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v8x16.load_splat gv_i8 +; CHECK-NEXT: v128.load8_splat gv_i8 ; CHECK-NEXT: # fallthrough-return %e = load i8, i8* @gv_i8 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 @@ -366,7 +366,7 @@ define <8 x i16> @load_splat_v8i16(i16* %p) { ; CHECK: .functype load_splat_v8i16 (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v16x8.load_splat 0 +; CHECK-NEXT: v128.load16_splat 0 ; CHECK-NEXT: # fallthrough-return %e = load i16, i16* %p %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 @@ -428,7 +428,7 @@ define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) { ; CHECK: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v16x8.load_splat 16 +; CHECK-NEXT: v128.load16_splat 16 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i16* %p to i32 %r = add nuw i32 %q, 16 @@ -500,7 +500,7 @@ define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) { ; CHECK: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v16x8.load_splat 2 +; CHECK-NEXT: v128.load16_splat 2 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i16, i16* %p, i32 1 %e = load i16, i16* %s @@ -568,7 +568,7 @@ define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -2 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v16x8.load_splat 0 +; CHECK-NEXT: v128.load16_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i16, i16* %p, i32 -1 %e = load i16, i16* %s @@ -644,7 +644,7 @@ define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v16x8.load_splat 0 +; CHECK-NEXT: v128.load16_splat 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i16* %p to i32 %r = add nsw i32 %q, 16 @@ -726,7 +726,7 @@ define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 2 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v16x8.load_splat 0 +; CHECK-NEXT: v128.load16_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr i16, i16* %p, i32 1 %e = load i16, i16* %s @@ -796,7 +796,7 @@ define <8 x i16> @load_splat_v8i16_from_numeric_address() { ; CHECK: .functype load_splat_v8i16_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v16x8.load_splat 32 +; CHECK-NEXT: v128.load16_splat 32 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to i16* %e = load i16, i16* %s @@ -861,7 +861,7 @@ define <8 x i16> @load_splat_v8i16_from_global_address() { ; CHECK: .functype load_splat_v8i16_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v16x8.load_splat gv_i16 +; CHECK-NEXT: v128.load16_splat gv_i16 ; CHECK-NEXT: # fallthrough-return %e = load i16, i16* @gv_i16 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 @@ -1197,7 +1197,7 @@ define <4 x i32> @load_splat_v4i32(i32* %addr) { ; CHECK: .functype load_splat_v4i32 (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %e = load i32, i32* %addr, align 4 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 @@ -1259,7 +1259,7 @@ define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) { ; CHECK: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v32x4.load_splat 16 +; CHECK-NEXT: v128.load32_splat 16 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i32* %p to i32 %r = add nuw i32 %q, 16 @@ -1331,7 +1331,7 @@ define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) { ; CHECK: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v32x4.load_splat 4 +; CHECK-NEXT: v128.load32_splat 4 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i32, i32* %p, i32 1 %e = load i32, i32* %s @@ -1399,7 +1399,7 @@ define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -4 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i32, i32* %p, i32 -1 %e = load i32, i32* %s @@ -1475,7 +1475,7 @@ define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i32* %p to i32 %r = add nsw i32 %q, 16 @@ -1557,7 +1557,7 @@ define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 4 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr i32, i32* %p, i32 1 %e = load i32, i32* %s @@ -1627,7 +1627,7 @@ define <4 x i32> @load_splat_v4i32_from_numeric_address() { ; CHECK: .functype load_splat_v4i32_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v32x4.load_splat 32 +; CHECK-NEXT: v128.load32_splat 32 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to i32* %e = load i32, i32* %s @@ -1692,7 +1692,7 @@ define <4 x i32> @load_splat_v4i32_from_global_address() { ; CHECK: .functype load_splat_v4i32_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v32x4.load_splat gv_i32 +; CHECK-NEXT: v128.load32_splat gv_i32 ; CHECK-NEXT: # fallthrough-return %e = load i32, i32* @gv_i32 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 @@ -2027,7 +2027,7 @@ define <2 x i64> @load_splat_v2i64(i64* %p) { ; CHECK: .functype load_splat_v2i64 (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %e = load i64, i64* %p %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 @@ -2089,7 +2089,7 @@ define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) { ; CHECK: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v64x2.load_splat 16 +; CHECK-NEXT: v128.load64_splat 16 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i64* %p to i32 %r = add nuw i32 %q, 16 @@ -2161,7 +2161,7 @@ define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) { ; CHECK: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v64x2.load_splat 8 +; CHECK-NEXT: v128.load64_splat 8 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i64, i64* %p, i32 1 %e = load i64, i64* %s @@ -2229,7 +2229,7 @@ define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -8 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i64, i64* %p, i32 -1 %e = load i64, i64* %s @@ -2305,7 +2305,7 @@ define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i64* %p to i32 %r = add nsw i32 %q, 16 @@ -2387,7 +2387,7 @@ define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 8 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr i64, i64* %p, i32 1 %e = load i64, i64* %s @@ -2457,7 +2457,7 @@ define <2 x i64> @load_splat_v2i64_from_numeric_address() { ; CHECK: .functype load_splat_v2i64_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v64x2.load_splat 32 +; CHECK-NEXT: v128.load64_splat 32 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to i64* %e = load i64, i64* %s @@ -2522,7 +2522,7 @@ define <2 x i64> @load_splat_v2i64_from_global_address() { ; CHECK: .functype load_splat_v2i64_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v64x2.load_splat gv_i64 +; CHECK-NEXT: v128.load64_splat gv_i64 ; CHECK-NEXT: # fallthrough-return %e = load i64, i64* @gv_i64 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 @@ -2697,7 +2697,7 @@ define <4 x float> @load_splat_v4f32(float* %p) { ; CHECK: .functype load_splat_v4f32 (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %e = load float, float* %p %v1 = insertelement <4 x float> undef, float %e, i32 0 @@ -2724,7 +2724,7 @@ define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) { ; CHECK: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v32x4.load_splat 16 +; CHECK-NEXT: v128.load32_splat 16 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint float* %p to i32 %r = add nuw i32 %q, 16 @@ -2752,7 +2752,7 @@ define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) { ; CHECK: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v32x4.load_splat 4 +; CHECK-NEXT: v128.load32_splat 4 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds float, float* %p, i32 1 %e = load float, float* %s @@ -2782,7 +2782,7 @@ define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -4 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds float, float* %p, i32 -1 %e = load float, float* %s @@ -2814,7 +2814,7 @@ define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint float* %p to i32 %r = add nsw i32 %q, 16 @@ -2846,7 +2846,7 @@ define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 4 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: v128.load32_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr float, float* %p, i32 1 %e = load float, float* %s @@ -2872,7 +2872,7 @@ define <4 x float> @load_splat_v4f32_from_numeric_address() { ; CHECK: .functype load_splat_v4f32_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v32x4.load_splat 32 +; CHECK-NEXT: v128.load32_splat 32 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to float* %e = load float, float* %s @@ -2899,7 +2899,7 @@ define <4 x float> @load_splat_v4f32_from_global_address() { ; CHECK: .functype load_splat_v4f32_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v32x4.load_splat gv_f32 +; CHECK-NEXT: v128.load32_splat gv_f32 ; CHECK-NEXT: # fallthrough-return %e = load float, float* @gv_f32 %v1 = insertelement <4 x float> undef, float %e, i32 0 @@ -3038,7 +3038,7 @@ define <2 x double> @load_splat_v2f64(double* %p) { ; CHECK: .functype load_splat_v2f64 (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %e = load double, double* %p %v1 = insertelement <2 x double> undef, double %e, i32 0 @@ -3065,7 +3065,7 @@ define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) { ; CHECK: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v64x2.load_splat 16 +; CHECK-NEXT: v128.load64_splat 16 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint double* %p to i32 %r = add nuw i32 %q, 16 @@ -3093,7 +3093,7 @@ define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) { ; CHECK: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v64x2.load_splat 8 +; CHECK-NEXT: v128.load64_splat 8 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds double, double* %p, i32 1 %e = load double, double* %s @@ -3123,7 +3123,7 @@ define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -8 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds double, double* %p, i32 -1 %e = load double, double* %s @@ -3155,7 +3155,7 @@ define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint double* %p to i32 %r = add nsw i32 %q, 16 @@ -3187,7 +3187,7 @@ define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 8 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: v128.load64_splat 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr double, double* %p, i32 1 %e = load double, double* %s @@ -3213,7 +3213,7 @@ define <2 x double> @load_splat_v2f64_from_numeric_address() { ; CHECK: .functype load_splat_v2f64_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v64x2.load_splat 32 +; CHECK-NEXT: v128.load64_splat 32 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to double* %e = load double, double* %s @@ -3240,7 +3240,7 @@ define <2 x double> @load_splat_v2f64_from_global_address() { ; CHECK: .functype load_splat_v2f64_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v64x2.load_splat gv_f64 +; CHECK-NEXT: v128.load64_splat gv_f64 ; CHECK-NEXT: # fallthrough-return %e = load double, double* @gv_f64 %v1 = insertelement <2 x double> undef, double %e, i32 0 diff --git a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll index 2473f0b27b7e8c..4582bc62216a7c 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll @@ -67,7 +67,7 @@ define <16 x i8> @shl_abs(<16 x i8> %v, i8 %a) { ; CHECK-NEXT: i8x16.splat $push1=, $1 ; CHECK-NEXT: i8x16.splat $push0=, $2 ; CHECK-NEXT: i8x16.add $push2=, $pop1, $pop0 -; CHECK-NEXT: v8x16.shuffle $push3=, $pop2, $0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle $push3=, $pop2, $0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: i8x16.abs $push101=, $pop3 ; CHECK-NEXT: local.tee $push100=, $3=, $pop101 ; CHECK-NEXT: i8x16.extract_lane_u $push9=, $pop100, 0 diff --git a/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll b/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll index b3e2db98861d51..3e73b47c5a5b8e 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll @@ -21,7 +21,7 @@ define <4 x i32> @f32x4_splat(float %x) { ; CHECK-LABEL: not_a_vec: ; CHECK-NEXT: .functype not_a_vec (i64, i64) -> (v128){{$}} ; CHECK-NEXT: i64x2.splat $push[[L1:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L1]], $2, 0, 1, 2, 3 +; CHECK-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L1]], $2, 0, 1, 2, 3 ; CHECK-NEXT: return $pop[[R]] define <4 x i32> @not_a_vec(i128 %x) { %a = bitcast i128 %x to <4 x i32> diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll index 25e647f07230a5..c8053293ebac0f 100644 --- a/llvm/test/CodeGen/WebAssembly/simd.ll +++ b/llvm/test/CodeGen/WebAssembly/simd.ll @@ -202,9 +202,9 @@ define <16 x i8> @replace_zero_v16i8(<16 x i8> %v, i8 %x) { } ; CHECK-LABEL: shuffle_v16i8: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) { @@ -215,9 +215,9 @@ define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) { } ; CHECK-LABEL: shuffle_undef_v16i8: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, ; SIMD128-SAME: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) { @@ -472,9 +472,9 @@ define <8 x i16> @replace_zero_v8i16(<8 x i16> %v, i16 %x) { } ; CHECK-LABEL: shuffle_v8i16: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shuffle_v8i16(<8 x i16> %x, <8 x i16> %y) { @@ -484,9 +484,9 @@ define <8 x i16> @shuffle_v8i16(<8 x i16> %x, <8 x i16> %y) { } ; CHECK-LABEL: shuffle_undef_v8i16: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, ; SIMD128-SAME: 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) { @@ -634,9 +634,9 @@ define <4 x i32> @replace_zero_v4i32(<4 x i32> %v, i32 %x) { } ; CHECK-LABEL: shuffle_v4i32: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shuffle_v4i32(<4 x i32> %x, <4 x i32> %y) { @@ -646,9 +646,9 @@ define <4 x i32> @shuffle_v4i32(<4 x i32> %x, <4 x i32> %y) { } ; CHECK-LABEL: shuffle_undef_v4i32: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, ; SIMD128-SAME: 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) { @@ -785,9 +785,9 @@ define <2 x i64> @replace_zero_v2i64(<2 x i64> %v, i64 %x) { } ; CHECK-LABEL: shuffle_v2i64: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shuffle_v2i64(<2 x i64> %x, <2 x i64> %y) { @@ -796,9 +796,9 @@ define <2 x i64> @shuffle_v2i64(<2 x i64> %x, <2 x i64> %y) { } ; CHECK-LABEL: shuffle_undef_v2i64: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, ; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shuffle_undef_v2i64(<2 x i64> %x, <2 x i64> %y) { @@ -934,9 +934,9 @@ define <4 x float> @replace_zero_v4f32(<4 x float> %v, float %x) { } ; CHECK-LABEL: shuffle_v4f32: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @shuffle_v4f32(<4 x float> %x, <4 x float> %y) { @@ -946,9 +946,9 @@ define <4 x float> @shuffle_v4f32(<4 x float> %x, <4 x float> %y) { } ; CHECK-LABEL: shuffle_undef_v4f32: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, ; SIMD128-SAME: 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) { @@ -1085,9 +1085,9 @@ define <2 x double> @replace_zero_v2f64(<2 x double> %v, double %x) { } ; CHECK-LABEL: shuffle_v2f64: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, ; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @shuffle_v2f64(<2 x double> %x, <2 x double> %y) { @@ -1097,9 +1097,9 @@ define <2 x double> @shuffle_v2f64(<2 x double> %x, <2 x double> %y) { } ; CHECK-LABEL: shuffle_undef_v2f64: -; NO-SIMD128-NOT: v8x16 +; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, +; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, ; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @shuffle_undef_v2f64(<2 x double> %x, <2 x double> %y) { diff --git a/llvm/test/MC/Disassembler/WebAssembly/wasm.txt b/llvm/test/MC/Disassembler/WebAssembly/wasm.txt index bb50b646ab5493..8201213e54b26c 100644 --- a/llvm/test/MC/Disassembler/WebAssembly/wasm.txt +++ b/llvm/test/MC/Disassembler/WebAssembly/wasm.txt @@ -36,7 +36,7 @@ # CHECK: v128.const 50462976, 117835012, 185207048, 252579084 0xFD 0x0C 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F -# CHECK: v8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +# CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 0xFD 0x0D 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F # Check LEB128 encoding of SIMD instructions diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index 509f4246475f0f..91e1f07fe44c47 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -24,17 +24,17 @@ main: # CHECK: i64x2.load32x2_u 32 # encoding: [0xfd,0x06,0x03,0x20] i64x2.load32x2_u 32 - # CHECK: v8x16.load_splat 48 # encoding: [0xfd,0x07,0x00,0x30] - v8x16.load_splat 48 + # CHECK: v128.load8_splat 48 # encoding: [0xfd,0x07,0x00,0x30] + v128.load8_splat 48 - # CHECK: v16x8.load_splat 48 # encoding: [0xfd,0x08,0x01,0x30] - v16x8.load_splat 48 + # CHECK: v128.load16_splat 48 # encoding: [0xfd,0x08,0x01,0x30] + v128.load16_splat 48 - # CHECK: v32x4.load_splat 48 # encoding: [0xfd,0x09,0x02,0x30] - v32x4.load_splat 48 + # CHECK: v128.load32_splat 48 # encoding: [0xfd,0x09,0x02,0x30] + v128.load32_splat 48 - # CHECK: v64x2.load_splat 48 # encoding: [0xfd,0x0a,0x03,0x30] - v64x2.load_splat 48 + # CHECK: v128.load64_splat 48 # encoding: [0xfd,0x0a,0x03,0x30] + v128.load64_splat 48 # CHECK: v128.store 48 # encoding: [0xfd,0x0b,0x04,0x30] v128.store 48 @@ -66,15 +66,15 @@ main: # CHECK-SAME: 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f] v128.const 0x1.60504030201p-911, 0x1.e0d0c0b0a0908p-783 - # CHECK: v8x16.shuffle 0, 17, 2, 19, 4, 21, 6, 23, + # CHECK: i8x16.shuffle 0, 17, 2, 19, 4, 21, 6, 23, # CHECK-SAME: 8, 25, 10, 27, 12, 29, 14, 31 # CHECK-SAME: # encoding: [0xfd,0x0d, # CHECK-SAME: 0x00,0x11,0x02,0x13,0x04,0x15,0x06,0x17, # CHECK-SAME: 0x08,0x19,0x0a,0x1b,0x0c,0x1d,0x0e,0x1f] - v8x16.shuffle 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31 + i8x16.shuffle 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31 - # CHECK: v8x16.swizzle # encoding: [0xfd,0x0e] - v8x16.swizzle + # CHECK: i8x16.swizzle # encoding: [0xfd,0x0e] + i8x16.swizzle # CHECK: i8x16.splat # encoding: [0xfd,0x0f] i8x16.splat From 8de43b926f0e960bbc5b6a53d1b613c46b7c774b Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 22 Dec 2020 11:22:21 +0100 Subject: [PATCH 08/10] [mlir] Remove instance methods from LLVMType LLVMType contains multiple instance methods that were introduced initially for compatibility with LLVM API. These methods boil down to `cast` followed by type-specific call. Arguably, they are mostly used in an LLVM cast-follows-isa anti-pattern. This doesn't connect nicely to the rest of the MLIR infrastructure and actively prevents it from making the LLVM dialect type system more open, e.g., reusing built-in types when appropriate. Remove such instance methods and replaces their uses with apporpriate casts and methods on derived classes. In some cases, the result may look slightly more verbose, but most cases should actually use a stricter subtype of LLVMType anyway and avoid the isa/cast. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D93680 --- .../StandardToLLVM/ConvertStandardToLLVM.h | 3 +- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 28 ++- mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h | 75 ++----- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 9 +- .../Conversion/AsyncToLLVM/AsyncToLLVM.cpp | 10 +- .../ConvertLaunchFuncToRuntimeCalls.cpp | 15 +- .../lib/Conversion/GPUCommon/GPUOpsLowering.h | 18 +- .../GPUCommon/OpToFuncCallLowering.h | 9 +- .../ConvertLaunchFuncToVulkanCalls.cpp | 45 ++-- .../SPIRVToLLVM/ConvertSPIRVToLLVM.cpp | 8 +- .../StandardToLLVM/StandardToLLVM.cpp | 159 +++++++------ .../VectorToLLVM/ConvertVectorToLLVM.cpp | 22 +- .../VectorToROCDL/VectorToROCDL.cpp | 6 +- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 211 +++++++++--------- mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp | 170 ++++---------- mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 5 +- mlir/lib/ExecutionEngine/JitRunner.cpp | 19 +- mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp | 45 ++-- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 4 +- mlir/test/Dialect/LLVMIR/invalid.mlir | 8 +- 20 files changed, 402 insertions(+), 467 deletions(-) diff --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h index 7c069c9cd5566f..63ff16a84ab876 100644 --- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h +++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h @@ -446,7 +446,8 @@ class UnrankedMemRefDescriptor : public StructBuilder { /// Builds IR extracting the pointer to the first element of the size array. static Value sizeBasePtr(OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, - Value memRefDescPtr, LLVM::LLVMType elemPtrPtrType); + Value memRefDescPtr, + LLVM::LLVMPointerType elemPtrPtrType); /// Builds IR extracting the size[index] from the descriptor. static Value size(OpBuilder &builder, Location loc, LLVMTypeConverter typeConverter, Value sizeBasePtr, diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index df022ef47b3317..552fe15e68997f 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -51,7 +51,7 @@ def LLVM_VoidResultTypeOpBuilder : [{ auto llvmType = resultType.dyn_cast(); (void)llvmType; assert(llvmType && "result must be an LLVM type"); - assert(llvmType.isVoidTy() && + assert(llvmType.isa() && "for zero-result operands, only 'void' is accepted as result type"); build($_builder, $_state, operands, attributes); }]>; @@ -288,7 +288,7 @@ def LLVM_LoadOp : LLVM_Op<"load">, MemoryOpWithAlignmentAndAttributes { OpBuilderDAG<(ins "Value":$addr, CArg<"unsigned", "0">:$alignment, CArg<"bool", "false">:$isVolatile, CArg<"bool", "false">:$isNonTemporal), [{ - auto type = addr.getType().cast().getPointerElementTy(); + auto type = addr.getType().cast().getElementType(); build($_builder, $_state, type, addr, alignment, isVolatile, isNonTemporal); }]>, OpBuilderDAG<(ins "Type":$t, "Value":$addr, @@ -443,8 +443,8 @@ def LLVM_CallOp : LLVM_Op<"call"> { OpBuilderDAG<(ins "LLVMFuncOp":$func, "ValueRange":$operands, CArg<"ArrayRef", "{}">:$attributes), [{ - LLVMType resultType = func.getType().getFunctionResultType(); - if (!resultType.isVoidTy()) + LLVMType resultType = func.getType().getReturnType(); + if (!resultType.isa()) $_state.addTypes(resultType); $_state.addAttribute("callee", $_builder.getSymbolRefAttr(func)); $_state.addAttributes(attributes); @@ -515,12 +515,10 @@ def LLVM_ShuffleVectorOp : LLVM_Op<"shufflevector", [NoSideEffect]> { OpBuilderDAG<(ins "Value":$v1, "Value":$v2, "ArrayAttr":$mask, CArg<"ArrayRef", "{}">:$attrs)>]; let verifier = [{ - auto wrappedVectorType1 = v1().getType().cast(); - auto wrappedVectorType2 = v2().getType().cast(); - if (!wrappedVectorType2.isVectorTy()) - return emitOpError("expected LLVM IR Dialect vector type for operand #2"); - if (wrappedVectorType1.getVectorElementType() != - wrappedVectorType2.getVectorElementType()) + auto wrappedVectorType1 = v1().getType().cast(); + auto wrappedVectorType2 = v2().getType().cast(); + if (wrappedVectorType1.getElementType() != + wrappedVectorType2.getElementType()) return emitOpError("expected matching LLVM IR Dialect element types"); return success(); }]; @@ -768,13 +766,13 @@ def LLVM_AddressOfOp : LLVM_Op<"mlir.addressof"> { CArg<"ArrayRef", "{}">:$attrs), [{ build($_builder, $_state, - global.getType().getPointerTo(global.addr_space()), + LLVM::LLVMPointerType::get(global.getType(), global.addr_space()), global.sym_name(), attrs);}]>, OpBuilderDAG<(ins "LLVMFuncOp":$func, CArg<"ArrayRef", "{}">:$attrs), [{ build($_builder, $_state, - func.getType().getPointerTo(), func.getName(), attrs);}]> + LLVM::LLVMPointerType::get(func.getType()), func.getName(), attrs);}]> ]; let extraClassDeclaration = [{ @@ -970,12 +968,12 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", // to match the signature of the function. Block *addEntryBlock(); - LLVMType getType() { + LLVMFunctionType getType() { return (*this)->getAttrOfType(getTypeAttrName()) - .getValue().cast(); + .getValue().cast(); } bool isVarArg() { - return getType().isFunctionVarArg(); + return getType().isVarArg(); } // Hook for OpTrait::FunctionLike, returns the number of function arguments`. diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h index f92bdf9e3041ac..e1938c12c809e9 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h @@ -80,58 +80,6 @@ class LLVMType : public Type { LLVMDialect &getDialect(); - /// Returns the size of a primitive type (including vectors) in bits, for - /// example, the size of !llvm.i16 is 16 and the size of !llvm.vec<4 x i16> - /// is 64. Returns 0 for non-primitive (aggregates such as struct) or types - /// that don't have a size (such as void). - llvm::TypeSize getPrimitiveSizeInBits(); - - /// Floating-point type utilities. - bool isBFloatTy() { return isa(); } - bool isHalfTy() { return isa(); } - bool isFloatTy() { return isa(); } - bool isDoubleTy() { return isa(); } - bool isFP128Ty() { return isa(); } - bool isX86_FP80Ty() { return isa(); } - bool isFloatingPointTy() { - return isa() || isa() || - isa() || isa() || - isa() || isa(); - } - - /// Array type utilities. - LLVMType getArrayElementType(); - unsigned getArrayNumElements(); - bool isArrayTy(); - - /// Integer type utilities. - bool isIntegerTy() { return isa(); } - bool isIntegerTy(unsigned bitwidth); - unsigned getIntegerBitWidth(); - - /// Vector type utilities. - LLVMType getVectorElementType(); - unsigned getVectorNumElements(); - llvm::ElementCount getVectorElementCount(); - bool isVectorTy(); - - /// Function type utilities. - LLVMType getFunctionParamType(unsigned argIdx); - unsigned getFunctionNumParams(); - LLVMType getFunctionResultType(); - bool isFunctionTy(); - bool isFunctionVarArg(); - - /// Pointer type utilities. - LLVMType getPointerTo(unsigned addrSpace = 0); - LLVMType getPointerElementTy(); - bool isPointerTy(); - - /// Struct type utilities. - LLVMType getStructElementType(unsigned i); - unsigned getStructNumElements(); - bool isStructTy(); - /// Utilities used to generate floating point types. static LLVMType getDoubleTy(MLIRContext *context); static LLVMType getFloatTy(MLIRContext *context); @@ -148,9 +96,7 @@ class LLVMType : public Type { static LLVMType getInt8Ty(MLIRContext *context) { return getIntNTy(context, /*numBits=*/8); } - static LLVMType getInt8PtrTy(MLIRContext *context) { - return getInt8Ty(context).getPointerTo(); - } + static LLVMType getInt8PtrTy(MLIRContext *context); static LLVMType getInt16Ty(MLIRContext *context) { return getIntNTy(context, /*numBits=*/16); } @@ -184,7 +130,6 @@ class LLVMType : public Type { /// Void type utilities. static LLVMType getVoidTy(MLIRContext *context); - bool isVoidTy(); // Creation and setting of LLVM's identified struct types static LLVMType createStructTy(MLIRContext *context, @@ -585,6 +530,24 @@ LLVMType parseType(DialectAsmParser &parser); void printType(LLVMType type, DialectAsmPrinter &printer); } // namespace detail +//===----------------------------------------------------------------------===// +// Utility functions. +//===----------------------------------------------------------------------===// + +/// Returns `true` if the given type is compatible with the LLVM dialect. +inline bool isCompatibleType(Type type) { return type.isa(); } + +inline bool isCompatibleFloatingPointType(Type type) { + return type.isa(); +} + +/// Returns the size of the given primitive LLVM dialect-compatible type +/// (including vectors) in bits, for example, the size of !llvm.i16 is 16 and +/// the size of !llvm.vec<4 x i16> is 64. Returns 0 for non-primitive +/// (aggregates such as struct) or types that don't have a size (such as void). +llvm::TypeSize getPrimitiveTypeSizeInBits(Type type); + } // namespace LLVM } // namespace mlir diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 1f9b860eb52eb5..3c73cdf64eb707 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -109,10 +109,11 @@ def NVVM_ShflBflyOp : let verifier = [{ if (!(*this)->getAttrOfType("return_value_and_is_valid")) return success(); - auto type = getType().cast(); - if (!type.isStructTy() || type.getStructNumElements() != 2 || - !type.getStructElementType(1).isIntegerTy( - /*Bitwidth=*/1)) + auto type = getType().dyn_cast(); + auto elementType = (type && type.getBody().size() == 2) + ? type.getBody()[1].dyn_cast() + : nullptr; + if (!elementType || elementType.getBitWidth() != 1) return emitError("expected return type to be a two-element struct with " "i1 as the second element"); return success(); diff --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp index 273754fe2480c4..65545d8ab2de1d 100644 --- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp +++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp @@ -79,7 +79,7 @@ struct AsyncAPI { static FunctionType executeFunctionType(MLIRContext *ctx) { auto hdl = LLVM::LLVMType::getInt8PtrTy(ctx); - auto resume = resumeFunctionType(ctx).getPointerTo(); + auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx)); return FunctionType::get(ctx, {hdl, resume}, {}); } @@ -91,13 +91,13 @@ struct AsyncAPI { static FunctionType awaitAndExecuteFunctionType(MLIRContext *ctx) { auto hdl = LLVM::LLVMType::getInt8PtrTy(ctx); - auto resume = resumeFunctionType(ctx).getPointerTo(); + auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx)); return FunctionType::get(ctx, {TokenType::get(ctx), hdl, resume}, {}); } static FunctionType awaitAllAndExecuteFunctionType(MLIRContext *ctx) { auto hdl = LLVM::LLVMType::getInt8PtrTy(ctx); - auto resume = resumeFunctionType(ctx).getPointerTo(); + auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx)); return FunctionType::get(ctx, {GroupType::get(ctx), hdl, resume}, {}); } @@ -507,7 +507,7 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) { // A pointer to coroutine resume intrinsic wrapper. auto resumeFnTy = AsyncAPI::resumeFunctionType(ctx); auto resumePtr = builder.create( - loc, resumeFnTy.getPointerTo(), kResume); + loc, LLVM::LLVMPointerType::get(resumeFnTy), kResume); // Save the coroutine state: @llvm.coro.save auto coroSave = builder.create( @@ -750,7 +750,7 @@ class AwaitOpLoweringBase : public ConversionPattern { // A pointer to coroutine resume intrinsic wrapper. auto resumeFnTy = AsyncAPI::resumeFunctionType(ctx); auto resumePtr = builder.create( - loc, resumeFnTy.getPointerTo(), kResume); + loc, LLVM::LLVMPointerType::get(resumeFnTy), kResume); // Save the coroutine state: @llvm.coro.save auto coroSave = builder.create( diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp index 41a079c44eea58..bbb2bf1e04ff22 100644 --- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp +++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp @@ -55,14 +55,14 @@ class FunctionCallBuilder { FunctionCallBuilder(StringRef functionName, LLVM::LLVMType returnType, ArrayRef argumentTypes) : functionName(functionName), - functionType(LLVM::LLVMType::getFunctionTy(returnType, argumentTypes, - /*isVarArg=*/false)) {} + functionType(LLVM::LLVMFunctionType::get(returnType, argumentTypes, + /*isVarArg=*/false)) {} LLVM::CallOp create(Location loc, OpBuilder &builder, ArrayRef arguments) const; private: StringRef functionName; - LLVM::LLVMType functionType; + LLVM::LLVMFunctionType functionType; }; template @@ -76,7 +76,8 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern { LLVM::LLVMType llvmVoidType = LLVM::LLVMType::getVoidTy(context); LLVM::LLVMType llvmPointerType = LLVM::LLVMType::getInt8PtrTy(context); - LLVM::LLVMType llvmPointerPointerType = llvmPointerType.getPointerTo(); + LLVM::LLVMType llvmPointerPointerType = + LLVM::LLVMPointerType::get(llvmPointerType); LLVM::LLVMType llvmInt8Type = LLVM::LLVMType::getInt8Ty(context); LLVM::LLVMType llvmInt32Type = LLVM::LLVMType::getInt32Ty(context); LLVM::LLVMType llvmInt64Type = LLVM::LLVMType::getInt64Ty(context); @@ -312,7 +313,7 @@ LLVM::CallOp FunctionCallBuilder::create(Location loc, OpBuilder &builder, .create(loc, functionName, functionType); }(); return builder.create( - loc, const_cast(functionType).getFunctionResultType(), + loc, const_cast(functionType).getReturnType(), builder.getSymbolRefAttr(function), arguments); } @@ -518,7 +519,7 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray( auto one = builder.create(loc, llvmInt32Type, builder.getI32IntegerAttr(1)); auto structPtr = builder.create( - loc, structType.getPointerTo(), one, /*alignment=*/0); + loc, LLVM::LLVMPointerType::get(structType), one, /*alignment=*/0); auto arraySize = builder.create( loc, llvmInt32Type, builder.getI32IntegerAttr(numArguments)); auto arrayPtr = builder.create(loc, llvmPointerPointerType, @@ -529,7 +530,7 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray( auto index = builder.create( loc, llvmInt32Type, builder.getI32IntegerAttr(en.index())); auto fieldPtr = builder.create( - loc, argumentTypes[en.index()].getPointerTo(), structPtr, + loc, LLVM::LLVMPointerType::get(argumentTypes[en.index()]), structPtr, ArrayRef{zero, index.getResult()}); builder.create(loc, en.value(), fieldPtr); auto elementPtr = builder.create(loc, llvmPointerPointerType, diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h index bf17200e594f1f..914b7ee50cf934 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h @@ -51,8 +51,8 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { // Rewrite the original GPU function to an LLVM function. auto funcType = typeConverter->convertType(gpuFuncOp.getType()) - .template cast() - .getPointerElementTy(); + .template cast() + .getElementType(); // Remap proper input types. TypeConverter::SignatureConversion signatureConversion( @@ -94,10 +94,11 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { for (auto en : llvm::enumerate(workgroupBuffers)) { LLVM::GlobalOp global = en.value(); Value address = rewriter.create(loc, global); - auto elementType = global.getType().getArrayElementType(); + auto elementType = + global.getType().cast().getElementType(); Value memory = rewriter.create( - loc, elementType.getPointerTo(global.addr_space()), address, - ArrayRef{zero, zero}); + loc, LLVM::LLVMPointerType::get(elementType, global.addr_space()), + address, ArrayRef{zero, zero}); // Build a memref descriptor pointing to the buffer to plug with the // existing memref infrastructure. This may use more registers than @@ -123,9 +124,10 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { // Explicitly drop memory space when lowering private memory // attributions since NVVM models it as `alloca`s in the default // memory space and does not support `alloca`s with addrspace(5). - auto ptrType = typeConverter->convertType(type.getElementType()) - .template cast() - .getPointerTo(AllocaAddrSpace); + auto ptrType = LLVM::LLVMPointerType::get( + typeConverter->convertType(type.getElementType()) + .template cast(), + AllocaAddrSpace); Value numElements = rewriter.create( gpuFuncOp.getLoc(), int64Ty, rewriter.getI64IntegerAttr(type.getNumElements())); diff --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h index 9d08aeee190610..b2887aa1d78291 100644 --- a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h +++ b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h @@ -57,7 +57,8 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern { LLVMType resultType = castedOperands.front().getType().cast(); LLVMType funcType = getFunctionType(resultType, castedOperands); - StringRef funcName = getFunctionName(funcType.getFunctionResultType()); + StringRef funcName = getFunctionName( + funcType.cast().getReturnType()); if (funcName.empty()) return failure(); @@ -80,7 +81,7 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern { private: Value maybeCast(Value operand, PatternRewriter &rewriter) const { LLVM::LLVMType type = operand.getType().cast(); - if (!type.isHalfTy()) + if (!type.isa()) return operand; return rewriter.create( @@ -100,9 +101,9 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern { } StringRef getFunctionName(LLVM::LLVMType type) const { - if (type.isFloatTy()) + if (type.isa()) return f32Func; - if (type.isDoubleTy()) + if (type.isa()) return f64Func; return ""; } diff --git a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp index 355bced96ae750..c676cd256d66a8 100644 --- a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp +++ b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp @@ -75,7 +75,7 @@ class VulkanLaunchFuncToVulkanCallsPass // int64_t sizes[Rank]; // omitted when rank == 0 // int64_t strides[Rank]; // omitted when rank == 0 // }; - auto llvmPtrToElementType = elemenType.getPointerTo(); + auto llvmPtrToElementType = LLVM::LLVMPointerType::get(elemenType); auto llvmArrayRankElementSizeType = LLVM::LLVMType::getArrayTy(getInt64Type(), rank); @@ -131,16 +131,18 @@ class VulkanLaunchFuncToVulkanCallsPass /// Returns a string representation from the given `type`. StringRef stringifyType(LLVM::LLVMType type) { - if (type.isFloatTy()) + if (type.isa()) return "Float"; - if (type.isHalfTy()) + if (type.isa()) return "Half"; - if (type.isIntegerTy(32)) - return "Int32"; - if (type.isIntegerTy(16)) - return "Int16"; - if (type.isIntegerTy(8)) - return "Int8"; + if (auto intType = type.dyn_cast()) { + if (intType.getBitWidth() == 32) + return "Int32"; + if (intType.getBitWidth() == 16) + return "Int16"; + if (intType.getBitWidth() == 8) + return "Int8"; + } llvm_unreachable("unsupported type"); } @@ -238,11 +240,11 @@ void VulkanLaunchFuncToVulkanCallsPass::createBindMemRefCalls( llvm::formatv("bindMemRef{0}D{1}", rank, stringifyType(type)).str(); // Special case for fp16 type. Since it is not a supported type in C we use // int16_t and bitcast the descriptor. - if (type.isHalfTy()) { + if (type.isa()) { auto memRefTy = getMemRefType(rank, LLVM::LLVMType::getInt16Ty(&getContext())); ptrToMemRefDescriptor = builder.create( - loc, memRefTy.getPointerTo(), ptrToMemRefDescriptor); + loc, LLVM::LLVMPointerType::get(memRefTy), ptrToMemRefDescriptor); } // Create call to `bindMemRef`. builder.create( @@ -257,11 +259,12 @@ void VulkanLaunchFuncToVulkanCallsPass::createBindMemRefCalls( LogicalResult VulkanLaunchFuncToVulkanCallsPass::deduceMemRefRankAndType( Value ptrToMemRefDescriptor, uint32_t &rank, LLVM::LLVMType &type) { auto llvmPtrDescriptorTy = - ptrToMemRefDescriptor.getType().dyn_cast(); + ptrToMemRefDescriptor.getType().dyn_cast(); if (!llvmPtrDescriptorTy) return failure(); - auto llvmDescriptorTy = llvmPtrDescriptorTy.getPointerElementTy(); + auto llvmDescriptorTy = + llvmPtrDescriptorTy.getElementType().dyn_cast(); // template // struct { // Elem *allocated; @@ -270,15 +273,19 @@ LogicalResult VulkanLaunchFuncToVulkanCallsPass::deduceMemRefRankAndType( // int64_t sizes[Rank]; // omitted when rank == 0 // int64_t strides[Rank]; // omitted when rank == 0 // }; - if (!llvmDescriptorTy || !llvmDescriptorTy.isStructTy()) + if (!llvmDescriptorTy) return failure(); - type = llvmDescriptorTy.getStructElementType(0).getPointerElementTy(); - if (llvmDescriptorTy.getStructNumElements() == 3) { + type = llvmDescriptorTy.getBody()[0] + .cast() + .getElementType(); + if (llvmDescriptorTy.getBody().size() == 3) { rank = 0; return success(); } - rank = llvmDescriptorTy.getStructElementType(3).getArrayNumElements(); + rank = llvmDescriptorTy.getBody()[3] + .cast() + .getNumElements(); return success(); } @@ -326,13 +333,13 @@ void VulkanLaunchFuncToVulkanCallsPass::declareVulkanFunctions(Location loc) { LLVM::LLVMType::getHalfTy(&getContext())}) { std::string fnName = "bindMemRef" + std::to_string(i) + "D" + std::string(stringifyType(type)); - if (type.isHalfTy()) + if (type.isa()) type = LLVM::LLVMType::getInt16Ty(&getContext()); if (!module.lookupSymbol(fnName)) { auto fnType = LLVM::LLVMType::getFunctionTy( getVoidType(), {getPointerType(), getInt32Type(), getInt32Type(), - getMemRefType(i, type).getPointerTo()}, + LLVM::LLVMPointerType::get(getMemRefType(i, type))}, /*isVarArg=*/false); builder.create(loc, fnName, fnType); } diff --git a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp index cacb4787edd4ef..7da9c47f921992 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp @@ -66,8 +66,10 @@ static unsigned getBitWidth(Type type) { /// Returns the bit width of LLVMType integer or vector. static unsigned getLLVMTypeBitWidth(LLVM::LLVMType type) { - return type.isVectorTy() ? type.getVectorElementType().getIntegerBitWidth() - : type.getIntegerBitWidth(); + auto vectorType = type.dyn_cast(); + return (vectorType ? vectorType.getElementType() : type) + .cast() + .getBitWidth(); } /// Creates `IntegerAttribute` with all bits set for given type @@ -265,7 +267,7 @@ static Type convertPointerType(spirv::PointerType type, TypeConverter &converter) { auto pointeeType = converter.convertType(type.getPointeeType()).cast(); - return pointeeType.getPointerTo(); + return LLVM::LLVMPointerType::get(pointeeType); } /// Converts SPIR-V runtime array to LLVM array. Since LLVM allows indexing over diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 6fbcc220a86b08..e37e7e2dc0c114 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -215,7 +215,7 @@ Type LLVMTypeConverter::convertFunctionType(FunctionType type) { SignatureConversion conversion(type.getNumInputs()); LLVM::LLVMType converted = convertFunctionSignature(type, /*isVariadic=*/false, conversion); - return converted.getPointerTo(); + return LLVM::LLVMPointerType::get(converted); } @@ -267,7 +267,7 @@ LLVMTypeConverter::convertFunctionTypeCWrapper(FunctionType type) { if (!converted) return {}; if (t.isa()) - converted = converted.getPointerTo(); + converted = LLVM::LLVMPointerType::get(converted); inputs.push_back(converted); } @@ -324,7 +324,7 @@ LLVMTypeConverter::getMemRefDescriptorFields(MemRefType type, LLVM::LLVMType elementType = unwrap(convertType(type.getElementType())); if (!elementType) return {}; - auto ptrTy = elementType.getPointerTo(type.getMemorySpace()); + auto ptrTy = LLVM::LLVMPointerType::get(elementType, type.getMemorySpace()); auto indexTy = getIndexType(); SmallVector results = {ptrTy, ptrTy, indexTy}; @@ -396,7 +396,7 @@ Type LLVMTypeConverter::convertMemRefToBarePtr(BaseMemRefType type) { LLVM::LLVMType elementType = unwrap(convertType(type.getElementType())); if (!elementType) return {}; - return elementType.getPointerTo(type.getMemorySpace()); + return LLVM::LLVMPointerType::get(elementType, type.getMemorySpace()); } // Convert an n-D vector type to an LLVM vector type via (n-1)-D array type when @@ -460,7 +460,7 @@ StructBuilder::StructBuilder(Value v) : value(v) { Value StructBuilder::extractPtr(OpBuilder &builder, Location loc, unsigned pos) { - Type type = structType.cast().getStructElementType(pos); + Type type = structType.cast().getBody()[pos]; return builder.create(loc, type, value, builder.getI64ArrayAttr(pos)); } @@ -507,8 +507,9 @@ Value ComplexStructBuilder::imaginary(OpBuilder &builder, Location loc) { MemRefDescriptor::MemRefDescriptor(Value descriptor) : StructBuilder(descriptor) { assert(value != nullptr && "value cannot be null"); - indexType = value.getType().cast().getStructElementType( - kOffsetPosInMemRefDescriptor); + indexType = value.getType() + .cast() + .getBody()[kOffsetPosInMemRefDescriptor]; } /// Builds IR creating an `undef` value of the descriptor type. @@ -618,9 +619,9 @@ Value MemRefDescriptor::size(OpBuilder &builder, Location loc, unsigned pos) { Value MemRefDescriptor::size(OpBuilder &builder, Location loc, Value pos, int64_t rank) { auto indexTy = indexType.cast(); - auto indexPtrTy = indexTy.getPointerTo(); + auto indexPtrTy = LLVM::LLVMPointerType::get(indexTy); auto arrayTy = LLVM::LLVMType::getArrayTy(indexTy, rank); - auto arrayPtrTy = arrayTy.getPointerTo(); + auto arrayPtrTy = LLVM::LLVMPointerType::get(arrayTy); // Copy size values to stack-allocated memory. auto zero = createIndexAttrConstant(builder, loc, indexType, 0); @@ -675,8 +676,8 @@ void MemRefDescriptor::setConstantStride(OpBuilder &builder, Location loc, LLVM::LLVMPointerType MemRefDescriptor::getElementPtrType() { return value.getType() - .cast() - .getStructElementType(kAlignedPtrPosInMemRefDescriptor) + .cast() + .getBody()[kAlignedPtrPosInMemRefDescriptor] .cast(); } @@ -922,7 +923,7 @@ Value UnrankedMemRefDescriptor::offset(OpBuilder &builder, Location loc, Value offsetGep = builder.create( loc, elemPtrPtrType, elementPtrPtr, ValueRange({two})); offsetGep = builder.create( - loc, typeConverter.getIndexType().getPointerTo(), offsetGep); + loc, LLVM::LLVMPointerType::get(typeConverter.getIndexType()), offsetGep); return builder.create(loc, offsetGep); } @@ -939,19 +940,17 @@ void UnrankedMemRefDescriptor::setOffset(OpBuilder &builder, Location loc, Value offsetGep = builder.create( loc, elemPtrPtrType, elementPtrPtr, ValueRange({two})); offsetGep = builder.create( - loc, typeConverter.getIndexType().getPointerTo(), offsetGep); + loc, LLVM::LLVMPointerType::get(typeConverter.getIndexType()), offsetGep); builder.create(loc, offset, offsetGep); } -Value UnrankedMemRefDescriptor::sizeBasePtr(OpBuilder &builder, Location loc, - LLVMTypeConverter &typeConverter, - Value memRefDescPtr, - LLVM::LLVMType elemPtrPtrType) { - LLVM::LLVMType elemPtrTy = elemPtrPtrType.getPointerElementTy(); +Value UnrankedMemRefDescriptor::sizeBasePtr( + OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, + Value memRefDescPtr, LLVM::LLVMPointerType elemPtrPtrType) { + LLVM::LLVMType elemPtrTy = elemPtrPtrType.getElementType(); LLVM::LLVMType indexTy = typeConverter.getIndexType(); - LLVM::LLVMType structPtrTy = - LLVM::LLVMType::getStructTy(elemPtrTy, elemPtrTy, indexTy, indexTy) - .getPointerTo(); + LLVM::LLVMType structPtrTy = LLVM::LLVMPointerType::get( + LLVM::LLVMType::getStructTy(elemPtrTy, elemPtrTy, indexTy, indexTy)); Value structPtr = builder.create(loc, structPtrTy, memRefDescPtr); @@ -961,14 +960,15 @@ Value UnrankedMemRefDescriptor::sizeBasePtr(OpBuilder &builder, Location loc, createIndexAttrConstant(builder, loc, typeConverter.getIndexType(), 0); Value three = builder.create(loc, int32_type, builder.getI32IntegerAttr(3)); - return builder.create(loc, indexTy.getPointerTo(), structPtr, - ValueRange({zero, three})); + return builder.create(loc, LLVM::LLVMPointerType::get(indexTy), + structPtr, ValueRange({zero, three})); } Value UnrankedMemRefDescriptor::size(OpBuilder &builder, Location loc, LLVMTypeConverter typeConverter, Value sizeBasePtr, Value index) { - LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo(); + LLVM::LLVMType indexPtrTy = + LLVM::LLVMPointerType::get(typeConverter.getIndexType()); Value sizeStoreGep = builder.create(loc, indexPtrTy, sizeBasePtr, ValueRange({index})); return builder.create(loc, sizeStoreGep); @@ -978,7 +978,8 @@ void UnrankedMemRefDescriptor::setSize(OpBuilder &builder, Location loc, LLVMTypeConverter typeConverter, Value sizeBasePtr, Value index, Value size) { - LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo(); + LLVM::LLVMType indexPtrTy = + LLVM::LLVMPointerType::get(typeConverter.getIndexType()); Value sizeStoreGep = builder.create(loc, indexPtrTy, sizeBasePtr, ValueRange({index})); builder.create(loc, size, sizeStoreGep); @@ -987,7 +988,8 @@ void UnrankedMemRefDescriptor::setSize(OpBuilder &builder, Location loc, Value UnrankedMemRefDescriptor::strideBasePtr(OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, Value sizeBasePtr, Value rank) { - LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo(); + LLVM::LLVMType indexPtrTy = + LLVM::LLVMPointerType::get(typeConverter.getIndexType()); return builder.create(loc, indexPtrTy, sizeBasePtr, ValueRange({rank})); } @@ -996,7 +998,8 @@ Value UnrankedMemRefDescriptor::stride(OpBuilder &builder, Location loc, LLVMTypeConverter typeConverter, Value strideBasePtr, Value index, Value stride) { - LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo(); + LLVM::LLVMType indexPtrTy = + LLVM::LLVMPointerType::get(typeConverter.getIndexType()); Value strideStoreGep = builder.create( loc, indexPtrTy, strideBasePtr, ValueRange({index})); return builder.create(loc, strideStoreGep); @@ -1006,7 +1009,8 @@ void UnrankedMemRefDescriptor::setStride(OpBuilder &builder, Location loc, LLVMTypeConverter typeConverter, Value strideBasePtr, Value index, Value stride) { - LLVM::LLVMType indexPtrTy = typeConverter.getIndexType().getPointerTo(); + LLVM::LLVMType indexPtrTy = + LLVM::LLVMPointerType::get(typeConverter.getIndexType()); Value strideStoreGep = builder.create( loc, indexPtrTy, strideBasePtr, ValueRange({index})); builder.create(loc, stride, strideStoreGep); @@ -1100,7 +1104,7 @@ bool ConvertToLLVMPattern::isSupportedMemRefType(MemRefType type) const { Type ConvertToLLVMPattern::getElementPtrType(MemRefType type) const { auto elementType = type.getElementType(); auto structElementType = unwrap(typeConverter->convertType(elementType)); - return structElementType.getPointerTo(type.getMemorySpace()); + return LLVM::LLVMPointerType::get(structElementType, type.getMemorySpace()); } void ConvertToLLVMPattern::getMemRefDescriptorSizes( @@ -1158,8 +1162,8 @@ Value ConvertToLLVMPattern::getSizeInBytes( // %0 = getelementptr %elementType* null, %indexType 1 // %1 = ptrtoint %elementType* %0 to %indexType // which is a common pattern of getting the size of a type in bytes. - auto convertedPtrType = - typeConverter->convertType(type).cast().getPointerTo(); + auto convertedPtrType = LLVM::LLVMPointerType::get( + typeConverter->convertType(type).cast()); auto nullPtr = rewriter.create(loc, convertedPtrType); auto gep = rewriter.create( loc, convertedPtrType, @@ -1315,7 +1319,8 @@ static void wrapExternalFunction(OpBuilder &builder, Location loc, builder, loc, typeConverter, unrankedMemRefType, wrapperArgsRange.take_front(numToDrop)); - auto ptrTy = packed.getType().cast().getPointerTo(); + auto ptrTy = + LLVM::LLVMPointerType::get(packed.getType().cast()); Value one = builder.create( loc, typeConverter.convertType(builder.getIndexType()), builder.getIntegerAttr(builder.getIndexType(), 1)); @@ -1512,11 +1517,12 @@ static NDVectorTypeInfo extractNDVectorTypeInfo(VectorType vectorType, return info; info.arraySizes.reserve(vectorType.getRank() - 1); auto llvmTy = info.llvmArrayTy; - while (llvmTy.isArrayTy()) { - info.arraySizes.push_back(llvmTy.getArrayNumElements()); - llvmTy = llvmTy.getArrayElementType(); + while (llvmTy.isa()) { + info.arraySizes.push_back( + llvmTy.cast().getNumElements()); + llvmTy = llvmTy.cast().getElementType(); } - if (!llvmTy.isVectorTy()) + if (!llvmTy.isa()) return info; info.llvmVectorTy = llvmTy; return info; @@ -1644,7 +1650,7 @@ LogicalResult LLVM::detail::vectorOneToOneRewrite( return failure(); auto llvmArrayTy = operands[0].getType().cast(); - if (!llvmArrayTy.isArrayTy()) + if (!llvmArrayTy.isa()) return oneToOneRewrite(op, targetOp, operands, typeConverter, rewriter); auto callback = [op, targetOp, &rewriter](LLVM::LLVMType llvmVectorTy, @@ -2457,13 +2463,14 @@ struct GetGlobalMemrefOpLowering : public AllocLikeOpLowering { LLVM::LLVMType arrayTy = convertGlobalMemrefTypeToLLVM(type, *getTypeConverter()); auto addressOf = rewriter.create( - loc, arrayTy.getPointerTo(memSpace), getGlobalOp.name()); + loc, LLVM::LLVMPointerType::get(arrayTy, memSpace), getGlobalOp.name()); // Get the address of the first element in the array by creating a GEP with // the address of the GV as the base, and (rank + 1) number of 0 indices. LLVM::LLVMType elementType = unwrap(typeConverter->convertType(type.getElementType())); - LLVM::LLVMType elementPtrType = elementType.getPointerTo(memSpace); + LLVM::LLVMType elementPtrType = + LLVM::LLVMPointerType::get(elementType, memSpace); SmallVector operands = {addressOf}; operands.insert(operands.end(), type.getRank() + 1, @@ -2504,9 +2511,9 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern { auto floatType = getElementTypeOrSelf(resultType).cast(); auto floatOne = rewriter.getFloatAttr(floatType, 1.0); - if (!operandType.isArrayTy()) { + if (!operandType.isa()) { LLVM::ConstantOp one; - if (operandType.isVectorTy()) { + if (operandType.isa()) { one = rewriter.create( loc, operandType, SplatElementsAttr::get(resultType.cast(), floatOne)); @@ -2526,8 +2533,10 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern { op.getOperation(), operands, *getTypeConverter(), [&](LLVM::LLVMType llvmVectorTy, ValueRange operands) { auto splatAttr = SplatElementsAttr::get( - mlir::VectorType::get({llvmVectorTy.getVectorNumElements()}, - floatType), + mlir::VectorType::get( + {llvmVectorTy.cast() + .getNumElements()}, + floatType), floatOne); auto one = rewriter.create(loc, llvmVectorTy, splatAttr); @@ -2614,12 +2623,13 @@ struct MemRefCastOpLowering : public ConvertOpToLLVMPattern { // ptr = ExtractValueOp src, 1 auto ptr = memRefDesc.memRefDescPtr(rewriter, loc); // castPtr = BitCastOp i8* to structTy* - auto castPtr = - rewriter - .create( - loc, targetStructType.cast().getPointerTo(), - ptr) - .getResult(); + auto castPtr = rewriter + .create( + loc, + LLVM::LLVMPointerType::get( + targetStructType.cast()), + ptr) + .getResult(); // struct = LoadOp castPtr auto loadOp = rewriter.create(loc, castPtr); rewriter.replaceOp(memRefCastOp, loadOp.getResult()); @@ -2654,8 +2664,8 @@ static void extractPointersAndOffset(Location loc, Type elementType = operandType.cast().getElementType(); LLVM::LLVMType llvmElementType = unwrap(typeConverter.convertType(elementType)); - LLVM::LLVMType elementPtrPtrType = - llvmElementType.getPointerTo(memorySpace).getPointerTo(); + LLVM::LLVMType elementPtrPtrType = LLVM::LLVMPointerType::get( + LLVM::LLVMPointerType::get(llvmElementType, memorySpace)); // Extract pointer to the underlying ranked memref descriptor and cast it to // ElemType**. @@ -2700,8 +2710,8 @@ struct MemRefReinterpretCastOpLowering MemRefType targetMemRefType = castOp.getResult().getType().cast(); auto llvmTargetDescriptorTy = typeConverter->convertType(targetMemRefType) - .dyn_cast_or_null(); - if (!llvmTargetDescriptorTy || !llvmTargetDescriptorTy.isStructTy()) + .dyn_cast_or_null(); + if (!llvmTargetDescriptorTy) return failure(); // Create descriptor. @@ -2804,8 +2814,8 @@ struct MemRefReshapeOpLowering // Set pointers and offset. LLVM::LLVMType llvmElementType = unwrap(typeConverter->convertType(elementType)); - LLVM::LLVMType elementPtrPtrType = - llvmElementType.getPointerTo(addressSpace).getPointerTo(); + auto elementPtrPtrType = LLVM::LLVMPointerType::get( + LLVM::LLVMPointerType::get(llvmElementType, addressSpace)); UnrankedMemRefDescriptor::setAllocatedPtr(rewriter, loc, underlyingDescPtr, elementPtrPtrType, allocatedPtr); UnrankedMemRefDescriptor::setAlignedPtr(rewriter, loc, *getTypeConverter(), @@ -2858,7 +2868,7 @@ struct MemRefReshapeOpLowering rewriter.setInsertionPointToStart(bodyBlock); // Copy size from shape to descriptor. - LLVM::LLVMType llvmIndexPtrType = indexType.getPointerTo(); + LLVM::LLVMType llvmIndexPtrType = LLVM::LLVMPointerType::get(indexType); Value sizeLoadGep = rewriter.create( loc, llvmIndexPtrType, shapeOperandPtr, ValueRange{indexArg}); Value size = rewriter.create(loc, sizeLoadGep); @@ -2950,14 +2960,14 @@ struct DimOpLowering : public ConvertOpToLLVMPattern { Value underlyingRankedDesc = unrankedDesc.memRefDescPtr(rewriter, loc); Value scalarMemRefDescPtr = rewriter.create( loc, - typeConverter->convertType(scalarMemRefType) - .cast() - .getPointerTo(addressSpace), + LLVM::LLVMPointerType::get( + typeConverter->convertType(scalarMemRefType).cast(), + addressSpace), underlyingRankedDesc); // Get pointer to offset field of memref descriptor. - Type indexPtrTy = - getTypeConverter()->getIndexType().getPointerTo(addressSpace); + Type indexPtrTy = LLVM::LLVMPointerType::get( + getTypeConverter()->getIndexType(), addressSpace); Value two = rewriter.create( loc, typeConverter->convertType(rewriter.getI32Type()), rewriter.getI32IntegerAttr(2)); @@ -3120,10 +3130,10 @@ struct IndexCastOpLowering : public ConvertOpToLLVMPattern { auto targetType = typeConverter->convertType(indexCastOp.getResult().getType()) - .cast(); - auto sourceType = transformed.in().getType().cast(); - unsigned targetBits = targetType.getIntegerBitWidth(); - unsigned sourceBits = sourceType.getIntegerBitWidth(); + .cast(); + auto sourceType = transformed.in().getType().cast(); + unsigned targetBits = targetType.getBitWidth(); + unsigned sourceBits = sourceType.getBitWidth(); if (targetBits == sourceBits) rewriter.replaceOp(indexCastOp, transformed.in()); @@ -3462,14 +3472,18 @@ struct SubViewOpLowering : public ConvertOpToLLVMPattern { // Copy the buffer pointer from the old descriptor to the new one. Value extracted = sourceMemRef.allocatedPtr(rewriter, loc); Value bitcastPtr = rewriter.create( - loc, targetElementTy.getPointerTo(viewMemRefType.getMemorySpace()), + loc, + LLVM::LLVMPointerType::get(targetElementTy, + viewMemRefType.getMemorySpace()), extracted); targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr); // Copy the buffer pointer from the old descriptor to the new one. extracted = sourceMemRef.alignedPtr(rewriter, loc); bitcastPtr = rewriter.create( - loc, targetElementTy.getPointerTo(viewMemRefType.getMemorySpace()), + loc, + LLVM::LLVMPointerType::get(targetElementTy, + viewMemRefType.getMemorySpace()), extracted); targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr); @@ -3662,7 +3676,9 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern { Value allocatedPtr = sourceMemRef.allocatedPtr(rewriter, loc); auto srcMemRefType = viewOp.source().getType().cast(); Value bitcastPtr = rewriter.create( - loc, targetElementTy.getPointerTo(srcMemRefType.getMemorySpace()), + loc, + LLVM::LLVMPointerType::get(targetElementTy, + srcMemRefType.getMemorySpace()), allocatedPtr); targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr); @@ -3671,7 +3687,9 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern { alignedPtr = rewriter.create(loc, alignedPtr.getType(), alignedPtr, adaptor.byte_shift()); bitcastPtr = rewriter.create( - loc, targetElementTy.getPointerTo(srcMemRefType.getMemorySpace()), + loc, + LLVM::LLVMPointerType::get(targetElementTy, + srcMemRefType.getMemorySpace()), alignedPtr); targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr); @@ -4064,7 +4082,8 @@ Value LLVMTypeConverter::promoteOneMemRefDescriptor(Location loc, Value operand, auto indexType = IndexType::get(context); // Alloca with proper alignment. We do not expect optimizations of this // alloca op and so we omit allocating at the entry block. - auto ptrType = operand.getType().cast().getPointerTo(); + auto ptrType = + LLVM::LLVMPointerType::get(operand.getType().cast()); Value one = builder.create(loc, int64Ty, IntegerAttr::get(indexType, 1)); Value allocated = diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index a982b90e0e93bc..bcc91e304e72de 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -193,7 +193,7 @@ static LogicalResult getBasePtr(ConversionPatternRewriter &rewriter, Value base; if (failed(getBase(rewriter, loc, memref, memRefType, base))) return failure(); - auto pType = type.template cast().getPointerTo(); + auto pType = LLVM::LLVMPointerType::get(type.template cast()); base = rewriter.create(loc, pType, base); ptr = rewriter.create(loc, pType, base); return success(); @@ -1100,14 +1100,14 @@ class VectorTypeCastOpConversion return failure(); auto llvmSourceDescriptorTy = - operands[0].getType().dyn_cast(); - if (!llvmSourceDescriptorTy || !llvmSourceDescriptorTy.isStructTy()) + operands[0].getType().dyn_cast(); + if (!llvmSourceDescriptorTy) return failure(); MemRefDescriptor sourceMemRef(operands[0]); auto llvmTargetDescriptorTy = typeConverter->convertType(targetMemRefType) - .dyn_cast_or_null(); - if (!llvmTargetDescriptorTy || !llvmTargetDescriptorTy.isStructTy()) + .dyn_cast_or_null(); + if (!llvmTargetDescriptorTy) return failure(); // Only contiguous source buffers supported atm. @@ -1231,15 +1231,15 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { // TODO: support alignment when possible. Value dataPtr = this->getStridedElementPtr( loc, memRefType, adaptor.source(), adaptor.indices(), rewriter); - auto vecTy = - toLLVMTy(xferOp.getVectorType()).template cast(); + auto vecTy = toLLVMTy(xferOp.getVectorType()) + .template cast(); Value vectorDataPtr; if (memRefType.getMemorySpace() == 0) - vectorDataPtr = - rewriter.create(loc, vecTy.getPointerTo(), dataPtr); + vectorDataPtr = rewriter.create( + loc, LLVM::LLVMPointerType::get(vecTy), dataPtr); else vectorDataPtr = rewriter.create( - loc, vecTy.getPointerTo(), dataPtr); + loc, LLVM::LLVMPointerType::get(vecTy), dataPtr); if (!xferOp.isMaskedDim(0)) return replaceTransferOpWithLoadOrStore(rewriter, @@ -1253,7 +1253,7 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { // // TODO: when the leaf transfer rank is k > 1, we need the last `k` // dimensions here. - unsigned vecWidth = vecTy.getVectorNumElements(); + unsigned vecWidth = vecTy.getNumElements(); unsigned lastIndex = llvm::size(xferOp.indices()) - 1; Value off = xferOp.indices()[lastIndex]; Value dim = rewriter.create(loc, xferOp.source(), lastIndex); diff --git a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp index 973b116ef498ca..1335f33e10aa73 100644 --- a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp +++ b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp @@ -78,9 +78,9 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { auto toLLVMTy = [&](Type t) { return this->getTypeConverter()->convertType(t); }; - LLVM::LLVMType vecTy = - toLLVMTy(xferOp.getVectorType()).template cast(); - unsigned vecWidth = vecTy.getVectorNumElements(); + auto vecTy = toLLVMTy(xferOp.getVectorType()) + .template cast(); + unsigned vecWidth = vecTy.getNumElements(); Location loc = xferOp->getLoc(); // The backend result vector scalarization have trouble scalarize diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 7b1300da1783f8..2bdbb877ec84c7 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -105,9 +105,10 @@ static ParseResult parseCmpOp(OpAsmParser &parser, OperationState &result) { auto argType = type.dyn_cast(); if (!argType) return parser.emitError(trailingTypeLoc, "expected LLVM IR dialect type"); - if (argType.isVectorTy()) - resultType = - LLVMType::getVectorTy(resultType, argType.getVectorNumElements()); + if (auto vecArgType = argType.dyn_cast()) + resultType = LLVMType::getVectorTy(resultType, vecArgType.getNumElements()); + assert(!argType.isa() && + "unhandled scalable vector"); result.addTypes({resultType}); return success(); @@ -118,7 +119,7 @@ static ParseResult parseCmpOp(OpAsmParser &parser, OperationState &result) { //===----------------------------------------------------------------------===// static void printAllocaOp(OpAsmPrinter &p, AllocaOp &op) { - auto elemTy = op.getType().cast().getPointerElementTy(); + auto elemTy = op.getType().cast().getElementType(); auto funcTy = FunctionType::get(op.getContext(), {op.arraySize().getType()}, {op.getType()}); @@ -363,14 +364,11 @@ static void printLoadOp(OpAsmPrinter &p, LoadOp &op) { // the resulting type wrapped in MLIR, or nullptr on error. static Type getLoadStoreElementType(OpAsmParser &parser, Type type, llvm::SMLoc trailingTypeLoc) { - auto llvmTy = type.dyn_cast(); + auto llvmTy = type.dyn_cast(); if (!llvmTy) - return parser.emitError(trailingTypeLoc, "expected LLVM IR dialect type"), - nullptr; - if (!llvmTy.isPointerTy()) return parser.emitError(trailingTypeLoc, "expected LLVM pointer type"), nullptr; - return llvmTy.getPointerElementTy(); + return llvmTy.getElementType(); } // ::= `llvm.load` `volatile` ssa-use attribute-dict? `:` type @@ -569,7 +567,7 @@ static ParseResult parseInvokeOp(OpAsmParser &parser, OperationState &result) { auto llvmFuncType = LLVM::LLVMType::getFunctionTy(llvmResultType, argTypes, /*isVarArg=*/false); - auto wrappedFuncType = llvmFuncType.getPointerTo(); + auto wrappedFuncType = LLVM::LLVMPointerType::get(llvmFuncType); auto funcArguments = llvm::makeArrayRef(operands).drop_front(); @@ -613,7 +611,7 @@ static LogicalResult verify(LandingpadOp op) { for (unsigned idx = 0, ie = op.getNumOperands(); idx < ie; idx++) { value = op.getOperand(idx); - bool isFilter = value.getType().cast().isArrayTy(); + bool isFilter = value.getType().isa(); if (isFilter) { // FIXME: Verify filter clauses when arrays are appropriately handled } else { @@ -646,7 +644,7 @@ static void printLandingpadOp(OpAsmPrinter &p, LandingpadOp &op) { for (auto value : op.getOperands()) { // Similar to llvm - if clause is an array type then it is filter // clause else catch clause - bool isArrayTy = value.getType().cast().isArrayTy(); + bool isArrayTy = value.getType().isa(); p << '(' << (isArrayTy ? "filter " : "catch ") << value << " : " << value.getType() << ") "; } @@ -728,37 +726,37 @@ static LogicalResult verify(CallOp &op) { fnType = fn.getType(); } - if (!fnType.isFunctionTy()) + + LLVMFunctionType funcType = fnType.dyn_cast(); + if (!funcType) return op.emitOpError("callee does not have a functional type: ") << fnType; // Verify that the operand and result types match the callee. - if (!fnType.isFunctionVarArg() && - fnType.getFunctionNumParams() != (op.getNumOperands() - isIndirect)) + if (!funcType.isVarArg() && + funcType.getNumParams() != (op.getNumOperands() - isIndirect)) return op.emitOpError() << "incorrect number of operands (" << (op.getNumOperands() - isIndirect) - << ") for callee (expecting: " << fnType.getFunctionNumParams() - << ")"; + << ") for callee (expecting: " << funcType.getNumParams() << ")"; - if (fnType.getFunctionNumParams() > (op.getNumOperands() - isIndirect)) + if (funcType.getNumParams() > (op.getNumOperands() - isIndirect)) return op.emitOpError() << "incorrect number of operands (" << (op.getNumOperands() - isIndirect) << ") for varargs callee (expecting at least: " - << fnType.getFunctionNumParams() << ")"; + << funcType.getNumParams() << ")"; - for (unsigned i = 0, e = fnType.getFunctionNumParams(); i != e; ++i) - if (op.getOperand(i + isIndirect).getType() != - fnType.getFunctionParamType(i)) + for (unsigned i = 0, e = funcType.getNumParams(); i != e; ++i) + if (op.getOperand(i + isIndirect).getType() != funcType.getParamType(i)) return op.emitOpError() << "operand type mismatch for operand " << i << ": " << op.getOperand(i + isIndirect).getType() - << " != " << fnType.getFunctionParamType(i); + << " != " << funcType.getParamType(i); if (op.getNumResults() && - op.getResult(0).getType() != fnType.getFunctionResultType()) + op.getResult(0).getType() != funcType.getReturnType()) return op.emitOpError() << "result type mismatch: " << op.getResult(0).getType() - << " != " << fnType.getFunctionResultType(); + << " != " << funcType.getReturnType(); return success(); } @@ -848,7 +846,7 @@ static ParseResult parseCallOp(OpAsmParser &parser, OperationState &result) { } auto llvmFuncType = LLVM::LLVMType::getFunctionTy(llvmResultType, argTypes, /*isVarArg=*/false); - auto wrappedFuncType = llvmFuncType.getPointerTo(); + auto wrappedFuncType = LLVM::LLVMPointerType::get(llvmFuncType); auto funcArguments = ArrayRef(operands).drop_front(); @@ -875,8 +873,8 @@ static ParseResult parseCallOp(OpAsmParser &parser, OperationState &result) { void LLVM::ExtractElementOp::build(OpBuilder &b, OperationState &result, Value vector, Value position, ArrayRef attrs) { - auto wrappedVectorType = vector.getType().cast(); - auto llvmType = wrappedVectorType.getVectorElementType(); + auto vectorType = vector.getType().cast(); + auto llvmType = vectorType.getElementType(); build(b, result, llvmType, vector, position); result.addAttributes(attrs); } @@ -903,11 +901,11 @@ static ParseResult parseExtractElementOp(OpAsmParser &parser, parser.resolveOperand(vector, type, result.operands) || parser.resolveOperand(position, positionType, result.operands)) return failure(); - auto wrappedVectorType = type.dyn_cast(); - if (!wrappedVectorType || !wrappedVectorType.isVectorTy()) + auto vectorType = type.dyn_cast(); + if (!vectorType) return parser.emitError( loc, "expected LLVM IR dialect vector type for operand #1"); - result.addTypes(wrappedVectorType.getVectorElementType()); + result.addTypes(vectorType.getElementType()); return success(); } @@ -930,8 +928,8 @@ static LLVM::LLVMType getInsertExtractValueElementType(OpAsmParser &parser, ArrayAttr positionAttr, llvm::SMLoc attributeLoc, llvm::SMLoc typeLoc) { - auto wrappedContainerType = containerType.dyn_cast(); - if (!wrappedContainerType) + auto llvmType = containerType.dyn_cast(); + if (!llvmType) return parser.emitError(typeLoc, "expected LLVM IR Dialect type"), nullptr; // Infer the element type from the structure type: iteratively step inside the @@ -945,26 +943,24 @@ static LLVM::LLVMType getInsertExtractValueElementType(OpAsmParser &parser, "expected an array of integer literals"), nullptr; int position = positionElementAttr.getInt(); - if (wrappedContainerType.isArrayTy()) { - if (position < 0 || static_cast(position) >= - wrappedContainerType.getArrayNumElements()) + if (auto arrayType = llvmType.dyn_cast()) { + if (position < 0 || + static_cast(position) >= arrayType.getNumElements()) return parser.emitError(attributeLoc, "position out of bounds"), nullptr; - wrappedContainerType = wrappedContainerType.getArrayElementType(); - } else if (wrappedContainerType.isStructTy()) { - if (position < 0 || static_cast(position) >= - wrappedContainerType.getStructNumElements()) + llvmType = arrayType.getElementType(); + } else if (auto structType = llvmType.dyn_cast()) { + if (position < 0 || + static_cast(position) >= structType.getBody().size()) return parser.emitError(attributeLoc, "position out of bounds"), nullptr; - wrappedContainerType = - wrappedContainerType.getStructElementType(position); + llvmType = structType.getBody()[position]; } else { - return parser.emitError(typeLoc, - "expected wrapped LLVM IR structure/array type"), + return parser.emitError(typeLoc, "expected LLVM IR structure/array type"), nullptr; } } - return wrappedContainerType; + return llvmType; } // ::= `llvm.extractvalue` ssa-use @@ -1021,11 +1017,11 @@ static ParseResult parseInsertElementOp(OpAsmParser &parser, parser.parseColonType(vectorType)) return failure(); - auto wrappedVectorType = vectorType.dyn_cast(); - if (!wrappedVectorType || !wrappedVectorType.isVectorTy()) + auto llvmVectorType = vectorType.dyn_cast(); + if (!llvmVectorType) return parser.emitError( loc, "expected LLVM IR dialect vector type for operand #1"); - auto valueType = wrappedVectorType.getVectorElementType(); + Type valueType = llvmVectorType.getElementType(); if (!valueType) return failure(); @@ -1145,12 +1141,14 @@ static LogicalResult verify(AddressOfOp op) { return op.emitOpError( "must reference a global defined by 'llvm.mlir.global' or 'llvm.func'"); - if (global && global.getType().getPointerTo(global.addr_space()) != - op.getResult().getType()) + if (global && + LLVM::LLVMPointerType::get(global.getType(), global.addr_space()) != + op.getResult().getType()) return op.emitOpError( "the type must be a pointer to the type of the referenced global"); - if (function && function.getType().getPointerTo() != op.getResult().getType()) + if (function && LLVM::LLVMPointerType::get(function.getType()) != + op.getResult().getType()) return op.emitOpError( "the type must be a pointer to the type of the referenced function"); @@ -1276,11 +1274,11 @@ static LogicalResult verifyCast(DialectCastOp op, LLVMType llvmType, if (vectorType.getRank() != 1) return op->emitOpError("only 1-d vector is allowed"); - auto llvmVector = llvmType.dyn_cast(); - if (llvmVector.isa()) + auto llvmVector = llvmType.dyn_cast(); + if (!llvmVector) return op->emitOpError("only fixed-sized vector is allowed"); - if (vectorType.getDimSize(0) != llvmVector.getVectorNumElements()) + if (vectorType.getDimSize(0) != llvmVector.getNumElements()) return op->emitOpError( "invalid cast between vectors with mismatching sizes"); @@ -1375,7 +1373,10 @@ static LogicalResult verifyCast(DialectCastOp op, LLVMType llvmType, "be an index-compatible integer"); auto ptrType = structType.getBody()[1].dyn_cast(); - if (!ptrType || !ptrType.getPointerElementTy().isIntegerTy(8)) + auto ptrElementType = + ptrType ? ptrType.getElementType().dyn_cast() + : nullptr; + if (!ptrElementType || ptrElementType.getBitWidth() != 8) return op->emitOpError("expected second element of a memref descriptor " "to be an !llvm.ptr"); @@ -1503,9 +1504,11 @@ static LogicalResult verify(GlobalOp op) { return op.emitOpError("must appear at the module level"); if (auto strAttr = op.getValueOrNull().dyn_cast_or_null()) { - auto type = op.getType(); - if (!type.isArrayTy() || !type.getArrayElementType().isIntegerTy(8) || - type.getArrayNumElements() != strAttr.getValue().size()) + auto type = op.getType().dyn_cast(); + LLVMIntegerType elementType = + type ? type.getElementType().dyn_cast() : nullptr; + if (!elementType || elementType.getBitWidth() != 8 || + type.getNumElements() != strAttr.getValue().size()) return op.emitOpError( "requires an i8 array type of the length equal to that of the string " "attribute"); @@ -1534,9 +1537,9 @@ static LogicalResult verify(GlobalOp op) { void LLVM::ShuffleVectorOp::build(OpBuilder &b, OperationState &result, Value v1, Value v2, ArrayAttr mask, ArrayRef attrs) { - auto wrappedContainerType1 = v1.getType().cast(); - auto vType = LLVMType::getVectorTy( - wrappedContainerType1.getVectorElementType(), mask.size()); + auto containerType = v1.getType().cast(); + auto vType = + LLVMType::getVectorTy(containerType.getElementType(), mask.size()); build(b, result, vType, v1, v2, mask); result.addAttributes(attrs); } @@ -1566,12 +1569,12 @@ static ParseResult parseShuffleVectorOp(OpAsmParser &parser, parser.resolveOperand(v1, typeV1, result.operands) || parser.resolveOperand(v2, typeV2, result.operands)) return failure(); - auto wrappedContainerType1 = typeV1.dyn_cast(); - if (!wrappedContainerType1 || !wrappedContainerType1.isVectorTy()) + auto containerType = typeV1.dyn_cast(); + if (!containerType) return parser.emitError( loc, "expected LLVM IR dialect vector type for operand #1"); - auto vType = LLVMType::getVectorTy( - wrappedContainerType1.getVectorElementType(), maskAttr.size()); + auto vType = + LLVMType::getVectorTy(containerType.getElementType(), maskAttr.size()); result.addTypes(vType); return success(); } @@ -1588,9 +1591,9 @@ Block *LLVMFuncOp::addEntryBlock() { auto *entry = new Block; push_back(entry); - LLVMType type = getType(); - for (unsigned i = 0, e = type.getFunctionNumParams(); i < e; ++i) - entry->addArgument(type.getFunctionParamType(i)); + LLVMFunctionType type = getType(); + for (unsigned i = 0, e = type.getNumParams(); i < e; ++i) + entry->addArgument(type.getParamType(i)); return entry; } @@ -1608,7 +1611,7 @@ void LLVMFuncOp::build(OpBuilder &builder, OperationState &result, if (argAttrs.empty()) return; - unsigned numInputs = type.getFunctionNumParams(); + unsigned numInputs = type.cast().getNumParams(); assert(numInputs == argAttrs.size() && "expected as many argument attribute lists as arguments"); SmallString<8> argAttrName; @@ -1711,15 +1714,15 @@ static void printLLVMFuncOp(OpAsmPrinter &p, LLVMFuncOp op) { p << stringifyLinkage(op.linkage()) << ' '; p.printSymbolName(op.getName()); - LLVMType fnType = op.getType(); + LLVMFunctionType fnType = op.getType(); SmallVector argTypes; SmallVector resTypes; - argTypes.reserve(fnType.getFunctionNumParams()); - for (unsigned i = 0, e = fnType.getFunctionNumParams(); i < e; ++i) - argTypes.push_back(fnType.getFunctionParamType(i)); + argTypes.reserve(fnType.getNumParams()); + for (unsigned i = 0, e = fnType.getNumParams(); i < e; ++i) + argTypes.push_back(fnType.getParamType(i)); - LLVMType returnType = fnType.getFunctionResultType(); - if (!returnType.isVoidTy()) + LLVMType returnType = fnType.getReturnType(); + if (!returnType.isa()) resTypes.push_back(returnType); impl::printFunctionSignature(p, op, argTypes, op.isVarArg(), resTypes); @@ -1737,8 +1740,8 @@ static void printLLVMFuncOp(OpAsmPrinter &p, LLVMFuncOp op) { // attribute is present. This can check for preconditions of the // getNumArguments hook not failing. LogicalResult LLVMFuncOp::verifyType() { - auto llvmType = getTypeAttr().getValue().dyn_cast_or_null(); - if (!llvmType || !llvmType.isFunctionTy()) + auto llvmType = getTypeAttr().getValue().dyn_cast_or_null(); + if (!llvmType) return emitOpError("requires '" + getTypeAttrName() + "' attribute of wrapped LLVM function type"); @@ -1747,9 +1750,7 @@ LogicalResult LLVMFuncOp::verifyType() { // Hook for OpTrait::FunctionLike, returns the number of function arguments. // Depends on the type attribute being correct as checked by verifyType -unsigned LLVMFuncOp::getNumFuncArguments() { - return getType().getFunctionNumParams(); -} +unsigned LLVMFuncOp::getNumFuncArguments() { return getType().getNumParams(); } // Hook for OpTrait::FunctionLike, returns the number of function results. // Depends on the type attribute being correct as checked by verifyType @@ -1759,7 +1760,7 @@ unsigned LLVMFuncOp::getNumFuncResults() { // If we modeled a void return as one result, then it would be possible to // attach an MLIR result attribute to it, and it isn't clear what semantics we // would assign to that. - if (getType().getFunctionResultType().isVoidTy()) + if (getType().getReturnType().isa()) return 0; return 1; } @@ -1788,7 +1789,7 @@ static LogicalResult verify(LLVMFuncOp op) { if (op.isVarArg()) return op.emitOpError("only external functions can be variadic"); - unsigned numArguments = op.getType().getFunctionNumParams(); + unsigned numArguments = op.getType().getNumParams(); Block &entryBlock = op.front(); for (unsigned i = 0; i < numArguments; ++i) { Type argType = entryBlock.getArgument(i).getType(); @@ -1796,7 +1797,7 @@ static LogicalResult verify(LLVMFuncOp op) { if (!argLLVMType) return op.emitOpError("entry block argument #") << i << " is not of LLVM type"; - if (op.getType().getFunctionParamType(i) != argLLVMType) + if (op.getType().getParamType(i) != argLLVMType) return op.emitOpError("the type of entry block argument #") << i << " does not match the function signature"; } @@ -1896,7 +1897,8 @@ static ParseResult parseAtomicRMWOp(OpAsmParser &parser, parseAtomicOrdering(parser, result, "ordering") || parser.parseOptionalAttrDict(result.attributes) || parser.parseColonType(type) || - parser.resolveOperand(ptr, type.getPointerTo(), result.operands) || + parser.resolveOperand(ptr, LLVM::LLVMPointerType::get(type), + result.operands) || parser.resolveOperand(val, type, result.operands)) return failure(); @@ -1905,9 +1907,9 @@ static ParseResult parseAtomicRMWOp(OpAsmParser &parser, } static LogicalResult verify(AtomicRMWOp op) { - auto ptrType = op.ptr().getType().cast(); + auto ptrType = op.ptr().getType().cast(); auto valType = op.val().getType().cast(); - if (valType != ptrType.getPointerElementTy()) + if (valType != ptrType.getElementType()) return op.emitOpError("expected LLVM IR element type for operand #0 to " "match type for operand #1"); auto resType = op.res().getType().cast(); @@ -1915,17 +1917,21 @@ static LogicalResult verify(AtomicRMWOp op) { return op.emitOpError( "expected LLVM IR result type to match type for operand #1"); if (op.bin_op() == AtomicBinOp::fadd || op.bin_op() == AtomicBinOp::fsub) { - if (!valType.isFloatingPointTy()) + if (!mlir::LLVM::isCompatibleFloatingPointType(valType)) return op.emitOpError("expected LLVM IR floating point type"); } else if (op.bin_op() == AtomicBinOp::xchg) { - if (!valType.isIntegerTy(8) && !valType.isIntegerTy(16) && - !valType.isIntegerTy(32) && !valType.isIntegerTy(64) && - !valType.isBFloatTy() && !valType.isHalfTy() && !valType.isFloatTy() && - !valType.isDoubleTy()) + auto intType = valType.dyn_cast(); + unsigned intBitWidth = intType ? intType.getBitWidth() : 0; + if (intBitWidth != 8 && intBitWidth != 16 && intBitWidth != 32 && + intBitWidth != 64 && !valType.isa() && + !valType.isa() && !valType.isa() && + !valType.isa()) return op.emitOpError("unexpected LLVM IR type for 'xchg' bin_op"); } else { - if (!valType.isIntegerTy(8) && !valType.isIntegerTy(16) && - !valType.isIntegerTy(32) && !valType.isIntegerTy(64)) + auto intType = valType.dyn_cast(); + unsigned intBitWidth = intType ? intType.getBitWidth() : 0; + if (intBitWidth != 8 && intBitWidth != 16 && intBitWidth != 32 && + intBitWidth != 64) return op.emitOpError("expected LLVM IR integer type"); } return success(); @@ -1958,7 +1964,8 @@ static ParseResult parseAtomicCmpXchgOp(OpAsmParser &parser, parseAtomicOrdering(parser, result, "failure_ordering") || parser.parseOptionalAttrDict(result.attributes) || parser.parseColonType(type) || - parser.resolveOperand(ptr, type.getPointerTo(), result.operands) || + parser.resolveOperand(ptr, LLVM::LLVMPointerType::get(type), + result.operands) || parser.resolveOperand(cmp, type, result.operands) || parser.resolveOperand(val, type, result.operands)) return failure(); @@ -1971,18 +1978,20 @@ static ParseResult parseAtomicCmpXchgOp(OpAsmParser &parser, } static LogicalResult verify(AtomicCmpXchgOp op) { - auto ptrType = op.ptr().getType().cast(); - if (!ptrType.isPointerTy()) + auto ptrType = op.ptr().getType().cast(); + if (!ptrType) return op.emitOpError("expected LLVM IR pointer type for operand #0"); auto cmpType = op.cmp().getType().cast(); auto valType = op.val().getType().cast(); - if (cmpType != ptrType.getPointerElementTy() || cmpType != valType) + if (cmpType != ptrType.getElementType() || cmpType != valType) return op.emitOpError("expected LLVM IR element type for operand #0 to " "match type for all other operands"); - if (!valType.isPointerTy() && !valType.isIntegerTy(8) && - !valType.isIntegerTy(16) && !valType.isIntegerTy(32) && - !valType.isIntegerTy(64) && !valType.isBFloatTy() && - !valType.isHalfTy() && !valType.isFloatTy() && !valType.isDoubleTy()) + auto intType = valType.dyn_cast(); + unsigned intBitWidth = intType ? intType.getBitWidth() : 0; + if (!valType.isa() && intBitWidth != 8 && + intBitWidth != 16 && intBitWidth != 32 && intBitWidth != 64 && + !valType.isa() && !valType.isa() && + !valType.isa() && !valType.isa()) return op.emitOpError("unexpected LLVM IR type"); if (op.success_ordering() < AtomicOrdering::monotonic || op.failure_ordering() < AtomicOrdering::monotonic) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index a89287b764e5d7..0616efb7ef3f99 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -36,129 +36,6 @@ LLVMDialect &LLVMType::getDialect() { return static_cast(Type::getDialect()); } -//----------------------------------------------------------------------------// -// Misc type utilities. - -llvm::TypeSize LLVMType::getPrimitiveSizeInBits() { - return llvm::TypeSwitch(*this) - .Case( - [](LLVMType) { return llvm::TypeSize::Fixed(16); }) - .Case([](LLVMType) { return llvm::TypeSize::Fixed(32); }) - .Case( - [](LLVMType) { return llvm::TypeSize::Fixed(64); }) - .Case([](LLVMIntegerType intTy) { - return llvm::TypeSize::Fixed(intTy.getBitWidth()); - }) - .Case([](LLVMType) { return llvm::TypeSize::Fixed(80); }) - .Case( - [](LLVMType) { return llvm::TypeSize::Fixed(128); }) - .Case([](LLVMVectorType t) { - llvm::TypeSize elementSize = - t.getElementType().getPrimitiveSizeInBits(); - llvm::ElementCount elementCount = t.getElementCount(); - assert(!elementSize.isScalable() && - "vector type should have fixed-width elements"); - return llvm::TypeSize(elementSize.getFixedSize() * - elementCount.getKnownMinValue(), - elementCount.isScalable()); - }) - .Default([](LLVMType ty) { - assert((ty.isa()) && - "unexpected missing support for primitive type"); - return llvm::TypeSize::Fixed(0); - }); -} - -//----------------------------------------------------------------------------// -// Integer type utilities. - -bool LLVMType::isIntegerTy(unsigned bitwidth) { - if (auto intType = dyn_cast()) - return intType.getBitWidth() == bitwidth; - return false; -} -unsigned LLVMType::getIntegerBitWidth() { - return cast().getBitWidth(); -} - -LLVMType LLVMType::getArrayElementType() { - return cast().getElementType(); -} - -//----------------------------------------------------------------------------// -// Array type utilities. - -unsigned LLVMType::getArrayNumElements() { - return cast().getNumElements(); -} - -bool LLVMType::isArrayTy() { return isa(); } - -//----------------------------------------------------------------------------// -// Vector type utilities. - -LLVMType LLVMType::getVectorElementType() { - return cast().getElementType(); -} - -unsigned LLVMType::getVectorNumElements() { - return cast().getNumElements(); -} -llvm::ElementCount LLVMType::getVectorElementCount() { - return cast().getElementCount(); -} - -bool LLVMType::isVectorTy() { return isa(); } - -//----------------------------------------------------------------------------// -// Function type utilities. - -LLVMType LLVMType::getFunctionParamType(unsigned argIdx) { - return cast().getParamType(argIdx); -} - -unsigned LLVMType::getFunctionNumParams() { - return cast().getNumParams(); -} - -LLVMType LLVMType::getFunctionResultType() { - return cast().getReturnType(); -} - -bool LLVMType::isFunctionTy() { return isa(); } - -bool LLVMType::isFunctionVarArg() { - return cast().isVarArg(); -} - -//----------------------------------------------------------------------------// -// Pointer type utilities. - -LLVMType LLVMType::getPointerTo(unsigned addrSpace) { - return LLVMPointerType::get(*this, addrSpace); -} - -LLVMType LLVMType::getPointerElementTy() { - return cast().getElementType(); -} - -bool LLVMType::isPointerTy() { return isa(); } - -//----------------------------------------------------------------------------// -// Struct type utilities. - -LLVMType LLVMType::getStructElementType(unsigned i) { - return cast().getBody()[i]; -} - -unsigned LLVMType::getStructNumElements() { - return cast().getBody().size(); -} - -bool LLVMType::isStructTy() { return isa(); } - //----------------------------------------------------------------------------// // Utilities used to generate floating point types. @@ -193,6 +70,10 @@ LLVMType LLVMType::getIntNTy(MLIRContext *context, unsigned numBits) { return LLVMIntegerType::get(context, numBits); } +LLVMType LLVMType::getInt8PtrTy(MLIRContext *context) { + return LLVMPointerType::get(LLVMIntegerType::get(context, 8)); +} + //----------------------------------------------------------------------------// // Utilities used to generate other miscellaneous types. @@ -221,8 +102,6 @@ LLVMType LLVMType::getVoidTy(MLIRContext *context) { return LLVMVoidType::get(context); } -bool LLVMType::isVoidTy() { return isa(); } - //----------------------------------------------------------------------------// // Creation and setting of LLVM's identified struct types @@ -470,7 +349,7 @@ LLVMStructType::verifyConstructionInvariants(Location loc, bool LLVMVectorType::isValidElementType(LLVMType type) { return type.isa() || - type.isFloatingPointTy(); + mlir::LLVM::isCompatibleFloatingPointType(type); } /// Support type casting functionality. @@ -536,3 +415,42 @@ LLVMScalableVectorType::getChecked(Location loc, LLVMType elementType, unsigned LLVMScalableVectorType::getMinNumElements() { return getImpl()->numElements; } + +//===----------------------------------------------------------------------===// +// Utility functions. +//===----------------------------------------------------------------------===// + +llvm::TypeSize mlir::LLVM::getPrimitiveTypeSizeInBits(Type type) { + assert(isCompatibleType(type) && + "expected a type compatible with the LLVM dialect"); + + return llvm::TypeSwitch(type) + .Case( + [](LLVMType) { return llvm::TypeSize::Fixed(16); }) + .Case([](LLVMType) { return llvm::TypeSize::Fixed(32); }) + .Case( + [](LLVMType) { return llvm::TypeSize::Fixed(64); }) + .Case([](LLVMIntegerType intTy) { + return llvm::TypeSize::Fixed(intTy.getBitWidth()); + }) + .Case([](LLVMType) { return llvm::TypeSize::Fixed(80); }) + .Case( + [](LLVMType) { return llvm::TypeSize::Fixed(128); }) + .Case([](LLVMVectorType t) { + llvm::TypeSize elementSize = + getPrimitiveTypeSizeInBits(t.getElementType()); + llvm::ElementCount elementCount = t.getElementCount(); + assert(!elementSize.isScalable() && + "vector type should have fixed-width elements"); + return llvm::TypeSize(elementSize.getFixedSize() * + elementCount.getKnownMinValue(), + elementCount.isScalable()); + }) + .Default([](Type ty) { + assert((ty.isa()) && + "unexpected missing support for primitive type"); + return llvm::TypeSize::Fixed(0); + }); +} diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 707ff7c1b089ba..c202075fa2066b 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -57,8 +57,9 @@ static ParseResult parseNVVMShflSyncBflyOp(OpAsmParser &parser, for (auto &attr : result.attributes) { if (attr.first != "return_value_and_is_valid") continue; - if (type.isStructTy() && type.getStructNumElements() > 0) - type = type.getStructElementType(0); + auto structType = type.dyn_cast(); + if (structType && !structType.getBody().empty()) + type = structType.getBody()[0]; break; } diff --git a/mlir/lib/ExecutionEngine/JitRunner.cpp b/mlir/lib/ExecutionEngine/JitRunner.cpp index a323e68170c1b7..bfdae2b4588d59 100644 --- a/mlir/lib/ExecutionEngine/JitRunner.cpp +++ b/mlir/lib/ExecutionEngine/JitRunner.cpp @@ -196,19 +196,30 @@ template Error checkCompatibleReturnType(LLVM::LLVMFuncOp mainFunction); template <> Error checkCompatibleReturnType(LLVM::LLVMFuncOp mainFunction) { - if (!mainFunction.getType().getFunctionResultType().isIntegerTy(32)) + auto resultType = mainFunction.getType() + .cast() + .getReturnType() + .dyn_cast(); + if (!resultType || resultType.getBitWidth() != 32) return make_string_error("only single llvm.i32 function result supported"); return Error::success(); } template <> Error checkCompatibleReturnType(LLVM::LLVMFuncOp mainFunction) { - if (!mainFunction.getType().getFunctionResultType().isIntegerTy(64)) + auto resultType = mainFunction.getType() + .cast() + .getReturnType() + .dyn_cast(); + if (!resultType || resultType.getBitWidth() != 64) return make_string_error("only single llvm.i64 function result supported"); return Error::success(); } template <> Error checkCompatibleReturnType(LLVM::LLVMFuncOp mainFunction) { - if (!mainFunction.getType().getFunctionResultType().isFloatTy()) + if (!mainFunction.getType() + .cast() + .getReturnType() + .isa()) return make_string_error("only single llvm.f32 function result supported"); return Error::success(); } @@ -220,7 +231,7 @@ Error compileAndExecuteSingleReturnFunction(Options &options, ModuleOp module, if (!mainFunction || mainFunction.isExternal()) return make_string_error("entry point not found"); - if (mainFunction.getType().getFunctionNumParams() != 0) + if (mainFunction.getType().cast().getNumParams() != 0) return make_string_error("function inputs not supported"); if (Error error = checkCompatibleReturnType(mainFunction)) diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp index 7f89a41de5db72..9786751ef4b0db 100644 --- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp @@ -172,57 +172,57 @@ Type Importer::getStdTypeForAttr(LLVMType type) { if (!type) return nullptr; - if (type.isIntegerTy()) - return b.getIntegerType(type.getIntegerBitWidth()); + if (auto intType = type.dyn_cast()) + return b.getIntegerType(intType.getBitWidth()); - if (type.isFloatTy()) + if (type.isa()) return b.getF32Type(); - if (type.isDoubleTy()) + if (type.isa()) return b.getF64Type(); // LLVM vectors can only contain scalars. - if (type.isVectorTy()) { - auto numElements = type.getVectorElementCount(); + if (auto vectorType = type.dyn_cast()) { + auto numElements = vectorType.getElementCount(); if (numElements.isScalable()) { emitError(unknownLoc) << "scalable vectors not supported"; return nullptr; } - Type elementType = getStdTypeForAttr(type.getVectorElementType()); + Type elementType = getStdTypeForAttr(vectorType.getElementType()); if (!elementType) return nullptr; return VectorType::get(numElements.getKnownMinValue(), elementType); } // LLVM arrays can contain other arrays or vectors. - if (type.isArrayTy()) { + if (auto arrayType = type.dyn_cast()) { // Recover the nested array shape. SmallVector shape; - shape.push_back(type.getArrayNumElements()); - while (type.getArrayElementType().isArrayTy()) { - type = type.getArrayElementType(); - shape.push_back(type.getArrayNumElements()); + shape.push_back(arrayType.getNumElements()); + while (arrayType.getElementType().isa()) { + arrayType = arrayType.getElementType().cast(); + shape.push_back(arrayType.getNumElements()); } // If the innermost type is a vector, use the multi-dimensional vector as // attribute type. - if (type.getArrayElementType().isVectorTy()) { - LLVMType vectorType = type.getArrayElementType(); - auto numElements = vectorType.getVectorElementCount(); + if (auto vectorType = + arrayType.getElementType().dyn_cast()) { + auto numElements = vectorType.getElementCount(); if (numElements.isScalable()) { emitError(unknownLoc) << "scalable vectors not supported"; return nullptr; } shape.push_back(numElements.getKnownMinValue()); - Type elementType = getStdTypeForAttr(vectorType.getVectorElementType()); + Type elementType = getStdTypeForAttr(vectorType.getElementType()); if (!elementType) return nullptr; return VectorType::get(shape, elementType); } // Otherwise use a tensor. - Type elementType = getStdTypeForAttr(type.getArrayElementType()); + Type elementType = getStdTypeForAttr(arrayType.getElementType()); if (!elementType) return nullptr; return RankedTensorType::get(shape, elementType); @@ -261,7 +261,7 @@ Attribute Importer::getConstantAsAttr(llvm::Constant *value) { if (!attrType) return nullptr; - if (type.isIntegerTy()) { + if (type.isa()) { SmallVector values; values.reserve(cd->getNumElements()); for (unsigned i = 0, e = cd->getNumElements(); i < e; ++i) @@ -269,7 +269,7 @@ Attribute Importer::getConstantAsAttr(llvm::Constant *value) { return DenseElementsAttr::get(attrType, values); } - if (type.isFloatTy() || type.isDoubleTy()) { + if (type.isa() || type.isa()) { SmallVector values; values.reserve(cd->getNumElements()); for (unsigned i = 0, e = cd->getNumElements(); i < e; ++i) @@ -777,7 +777,8 @@ LogicalResult Importer::processFunction(llvm::Function *f) { instMap.clear(); unknownInstMap.clear(); - LLVMType functionType = processType(f->getFunctionType()); + auto functionType = + processType(f->getFunctionType()).dyn_cast(); if (!functionType) return failure(); @@ -805,8 +806,8 @@ LogicalResult Importer::processFunction(llvm::Function *f) { // Add function arguments to the entry block. for (auto kv : llvm::enumerate(f->args())) - instMap[&kv.value()] = blockList[0]->addArgument( - functionType.getFunctionParamType(kv.index())); + instMap[&kv.value()] = + blockList[0]->addArgument(functionType.getParamType(kv.index())); for (auto bbs : llvm::zip(*f, blockList)) { if (failed(processBasicBlock(&std::get<0>(bbs), std::get<1>(bbs)))) diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 8c650506e2d74d..ae0745b0be28f4 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -969,7 +969,7 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) { // NB: Attribute already verified to be boolean, so check if we can indeed // attach the attribute to this argument, based on its type. auto argTy = mlirArg.getType().dyn_cast(); - if (!argTy.isPointerTy()) + if (!argTy.isa()) return func.emitError( "llvm.noalias attribute attached to LLVM non-pointer argument"); if (attr.getValue()) @@ -981,7 +981,7 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) { // NB: Attribute already verified to be int, so check if we can indeed // attach the attribute to this argument, based on its type. auto argTy = mlirArg.getType().dyn_cast(); - if (!argTy.isPointerTy()) + if (!argTy.isa()) return func.emitError( "llvm.align attribute attached to LLVM non-pointer argument"); llvmArg.addAttrs( diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index 9461ebbd9ede92..d02c252c0bf360 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -98,7 +98,7 @@ func @gep_non_function_type(%pos : !llvm.i64, %base : !llvm.ptr) { // ----- func @load_non_llvm_type(%foo : memref) { - // expected-error@+1 {{expected LLVM IR dialect type}} + // expected-error@+1 {{expected LLVM pointer type}} llvm.load %foo : memref } @@ -112,7 +112,7 @@ func @load_non_ptr_type(%foo : !llvm.float) { // ----- func @store_non_llvm_type(%foo : memref, %bar : !llvm.float) { - // expected-error@+1 {{expected LLVM IR dialect type}} + // expected-error@+1 {{expected LLVM pointer type}} llvm.store %bar, %foo : memref } @@ -267,7 +267,7 @@ func @insertvalue_array_out_of_bounds() { // ----- func @insertvalue_wrong_nesting() { - // expected-error@+1 {{expected wrapped LLVM IR structure/array type}} + // expected-error@+1 {{expected LLVM IR structure/array type}} llvm.insertvalue %a, %b[0,0] : !llvm.struct<(i32)> } @@ -311,7 +311,7 @@ func @extractvalue_array_out_of_bounds() { // ----- func @extractvalue_wrong_nesting() { - // expected-error@+1 {{expected wrapped LLVM IR structure/array type}} + // expected-error@+1 {{expected LLVM IR structure/array type}} llvm.extractvalue %b[0,0] : !llvm.struct<(i32)> } From 1c19804ebf4c97666a5c7de86ca7432c6b020205 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 22 Dec 2020 15:14:30 -0500 Subject: [PATCH 09/10] [OpenMP] Add OpenMP Documentation for Libomptarget environment variables Add support to the OpenMP web pages for environment variables supported by Libomptarget and their usage. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D93723 --- openmp/docs/design/Runtimes.rst | 82 +++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst index 61491060ea0475..39ed256c48569f 100644 --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -16,6 +16,88 @@ the LLVM/OpenMP host runtime, aka. `libomp.so`, is available as a `pdf LLVM/OpenMP Target Host Runtime (``libomptarget``) -------------------------------------------------- +Environment Variables +^^^^^^^^^^^^^^^^^^^^^ + +``libomptarget`` uses environment variables to control different features of the +library at runtime. This allows the user to obtain useful runtime information as +well as enable or disable certain features. A full list of supported environment +variables is defined below. + + * ``LIBOMPTARGET_DEBUG=`` + * ``LIBOMPTARGET_PROFILE=`` + * ``LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD=`` + * ``LIBOMPTARGET_INFO=`` + +LIBOMPTARGET_DEBUG +"""""""""""""""""" + +``LIBOMPTARGET_DEBUG`` controls whether or not debugging information will be +displayed. This feature is only availible if ``libomptarget`` was built with +``-DOMPTARGET_DEBUG``. The debugging output provided is intended for use by +``libomptarget`` developers. More user-friendly output is presented when using +``LIBOMPTARGET_INFO``. + +LIBOMPTARGET_PROFILE +"""""""""""""""""""" +``LIBOMPTARGET_PROFILE`` allows ``libomptarget`` to generate time profile output +similar to Clang's ``-ftime-trace`` option. This generates a JSON file based on +`Chrome Tracing`_ that can be viewed with ``chrome://tracing`` or the +`Speedscope App`_. Building this feature depends on the `LLVM Support Library`_ +for time trace output. Using this library is enabled by default when building +using the CMake option ``OPENMP_ENABLE_LIBOMPTARGET_PROFILING``. The output will +be saved to the filename specified by the environment variable. + +.. _`Chrome Tracing`: https://www.chromium.org/developers/how-tos/trace-event-profiling-tool + +.. _`Speedscope App`: https://www.speedscope.app/ + +.. _`LLVM Support Library`: https://llvm.org/docs/SupportLibrary.html + +LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD +""""""""""""""""""""""""""""""""""""" + +``LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`` sets the threshold size for which the +``libomptarget`` memory manager will handle the allocation. Any allocations +larger than this threshold will not use the memory manager and be freed after +the device kernel exits The default threshold value is ``8Kb``. If +``LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`` is set to ``0`` the memory manager +will be completely disabled. + +LIBOMPTARGET_INFO +""""""""""""""""" + +``LIBOMPTARGET_INFO`` allows the user to request different types runtime +information from ``libomptarget``. ``LIBOMPTARGET_INFO`` uses a 32-bit field to +enable or disable different types of information. This includes information +about data-mappings and kernel execution. It is recommended to build your +application with debugging information enabled, this will enable filenames and +variable declarations in the information messages. OpenMP Debugging information +is enabled at any level of debugging so a full debug runtime is not required. +For minimal debugging information compile with `-gline-tables-only`, or compile +with `-g` for full debug information. A full list of flags supported by +``LIBOMPTARGET_INFO`` is given below. + + * Print all data arguments upon entering an OpenMP device kernel: ``0x01`` + * Indicate when a mapped address already exists in the device mapping table: + ``0x02`` + * Dump the contents of the device pointer map at kernel exit: ``0x04`` + * Print OpenMP kernel information from device plugins: ``0x10`` + +Any combination of these flags can be used by setting the appropriate bits. For +example, to enable printing all data active in an OpenMP target region along +with ``CUDA`` information, run the following ``bash`` command. + +.. code-block:: console + + $ env LIBOMPTARGET_INFO=$((1 << 0x1 | 1 << 0x10)) ./your-application + +Or, to enable every flag run with every bit set. + +.. code-block:: console + + $ env LIBOMPTARGET_INFO=-1 ./your-application + LLVM/OpenMP Target Host Runtime Plugins (``libomptarget.rtl.XXXX``) ------------------------------------------------------------------- From 75a3f326c3d874853031d8bedd1d00127c835103 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Tue, 22 Dec 2020 10:35:15 -0800 Subject: [PATCH 10/10] [IR] Add an ImplicitLocOpBuilder helper class for building IR with the same loc. One common situation is to create a lot of IR at a well known location, e.g. when doing a big rewrite from one dialect to another where you're expanding ops out into lots of other ops. For these sorts of situations, it is annoying to pass the location into every create call. As we discused in a few threads on the forum, a way to help with this is to produce a new sort of builder that holds a location and provides it to each of the create<> calls automatically. This patch implements an ImplicitLocOpBuilder class that does this. We've had good experience with this in the CIRCT project, and it makes sense to upstream to MLIR. I picked a random pass to adopt it to show the impact, but I don't think there is any particular need to force adopt it in the codebase. Differential Revision: https://reviews.llvm.org/D93717 --- mlir/include/mlir/IR/ImplicitLocOpBuilder.h | 123 ++++++++++++++++++ .../Conversion/AsyncToLLVM/AsyncToLLVM.cpp | 96 +++++++------- 2 files changed, 170 insertions(+), 49 deletions(-) create mode 100644 mlir/include/mlir/IR/ImplicitLocOpBuilder.h diff --git a/mlir/include/mlir/IR/ImplicitLocOpBuilder.h b/mlir/include/mlir/IR/ImplicitLocOpBuilder.h new file mode 100644 index 00000000000000..2dc7c34f4e8553 --- /dev/null +++ b/mlir/include/mlir/IR/ImplicitLocOpBuilder.h @@ -0,0 +1,123 @@ +//===- ImplicitLocOpBuilder.h - Convenience OpBuilder -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Helper class to create ops with a modally set location. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_IR_IMPLICITLOCOPBUILDER_H +#define MLIR_IR_IMPLICITLOCOPBUILDER_H + +#include "mlir/IR/Builders.h" + +namespace mlir { + +/// ImplictLocOpBuilder maintains a 'current location', allowing use of the +/// create<> method without specifying the location. It is otherwise the same +/// as OpBuilder. +class ImplicitLocOpBuilder : public mlir::OpBuilder { +public: + /// Create an ImplicitLocOpBuilder using the insertion point and listener from + /// an existing OpBuilder. + ImplicitLocOpBuilder(Location loc, const OpBuilder &builder) + : OpBuilder(builder), curLoc(loc) {} + + /// OpBuilder has a bunch of convenience constructors - we support them all + /// with the additional Location. + template + ImplicitLocOpBuilder(Location loc, T &&operand, Listener *listener = nullptr) + : OpBuilder(std::forward(operand), listener), curLoc(loc) {} + + ImplicitLocOpBuilder(Location loc, Block *block, Block::iterator insertPoint, + Listener *listener = nullptr) + : OpBuilder(block, insertPoint, listener), curLoc(loc) {} + + /// Create a builder and set the insertion point to before the first operation + /// in the block but still inside the block. + static ImplicitLocOpBuilder atBlockBegin(Location loc, Block *block, + Listener *listener = nullptr) { + return ImplicitLocOpBuilder(loc, block, block->begin(), listener); + } + + /// Create a builder and set the insertion point to after the last operation + /// in the block but still inside the block. + static ImplicitLocOpBuilder atBlockEnd(Location loc, Block *block, + Listener *listener = nullptr) { + return ImplicitLocOpBuilder(loc, block, block->end(), listener); + } + + /// Create a builder and set the insertion point to before the block + /// terminator. + static ImplicitLocOpBuilder atBlockTerminator(Location loc, Block *block, + Listener *listener = nullptr) { + auto *terminator = block->getTerminator(); + assert(terminator != nullptr && "the block has no terminator"); + return ImplicitLocOpBuilder(loc, block, Block::iterator(terminator), + listener); + } + + /// Accessors for the implied location. + Location getLoc() const { return curLoc; } + void setLoc(Location loc) { curLoc = loc; } + + // We allow clients to use the explicit-loc version of create as well. + using OpBuilder::create; + using OpBuilder::createOrFold; + + /// Create an operation of specific op type at the current insertion point and + /// location. + template + OpTy create(Args &&... args) { + return OpBuilder::create(curLoc, std::forward(args)...); + } + + /// Create an operation of specific op type at the current insertion point, + /// and immediately try to fold it. This functions populates 'results' with + /// the results after folding the operation. + template + void createOrFold(llvm::SmallVectorImpl &results, Args &&... args) { + OpBuilder::createOrFold(results, curLoc, std::forward(args)...); + } + + /// Overload to create or fold a single result operation. + template + typename std::enable_if(), + Value>::type + createOrFold(Args &&... args) { + return OpBuilder::createOrFold(curLoc, std::forward(args)...); + } + + /// Overload to create or fold a zero result operation. + template + typename std::enable_if(), + OpTy>::type + createOrFold(Args &&... args) { + return OpBuilder::createOrFold(curLoc, std::forward(args)...); + } + + /// This builder can also be used to emit diagnostics to the current location. + mlir::InFlightDiagnostic + emitError(const llvm::Twine &message = llvm::Twine()) { + return mlir::emitError(curLoc, message); + } + mlir::InFlightDiagnostic + emitWarning(const llvm::Twine &message = llvm::Twine()) { + return mlir::emitWarning(curLoc, message); + } + mlir::InFlightDiagnostic + emitRemark(const llvm::Twine &message = llvm::Twine()) { + return mlir::emitRemark(curLoc, message); + } + +private: + Location curLoc; +}; + +} // namespace mlir + +#endif // MLIR_IR_IMPLICITLOCOPBUILDER_H \ No newline at end of file diff --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp index 65545d8ab2de1d..2415924557db7b 100644 --- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp +++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp @@ -13,7 +13,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/BlockAndValueMapping.h" -#include "mlir/IR/Builders.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" #include "mlir/IR/TypeUtilities.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" @@ -112,12 +112,13 @@ struct AsyncAPI { // Adds Async Runtime C API declarations to the module. static void addAsyncRuntimeApiDeclarations(ModuleOp module) { - auto builder = OpBuilder::atBlockTerminator(module.getBody()); + auto builder = ImplicitLocOpBuilder::atBlockTerminator(module.getLoc(), + module.getBody()); auto addFuncDecl = [&](StringRef name, FunctionType type) { if (module.lookupSymbol(name)) return; - builder.create(module.getLoc(), name, type).setPrivate(); + builder.create(name, type).setPrivate(); }; MLIRContext *ctx = module.getContext(); @@ -149,13 +150,13 @@ static constexpr const char *kCoroFree = "llvm.coro.free"; static constexpr const char *kCoroResume = "llvm.coro.resume"; /// Adds an LLVM function declaration to a module. -static void addLLVMFuncDecl(ModuleOp module, OpBuilder &builder, StringRef name, - LLVM::LLVMType ret, +static void addLLVMFuncDecl(ModuleOp module, ImplicitLocOpBuilder &builder, + StringRef name, LLVM::LLVMType ret, ArrayRef params) { if (module.lookupSymbol(name)) return; LLVM::LLVMType type = LLVM::LLVMType::getFunctionTy(ret, params, false); - builder.create(module.getLoc(), name, type); + builder.create(name, type); } /// Adds coroutine intrinsics declarations to the module. @@ -163,7 +164,8 @@ static void addCoroutineIntrinsicsDeclarations(ModuleOp module) { using namespace mlir::LLVM; MLIRContext *ctx = module.getContext(); - OpBuilder builder(module.getBody()->getTerminator()); + ImplicitLocOpBuilder builder(module.getLoc(), + module.getBody()->getTerminator()); auto token = LLVMTokenType::get(ctx); auto voidTy = LLVMType::getVoidTy(ctx); @@ -196,7 +198,8 @@ static void addCRuntimeDeclarations(ModuleOp module) { using namespace mlir::LLVM; MLIRContext *ctx = module.getContext(); - OpBuilder builder(module.getBody()->getTerminator()); + ImplicitLocOpBuilder builder(module.getLoc(), + module.getBody()->getTerminator()); auto voidTy = LLVMType::getVoidTy(ctx); auto i64 = LLVMType::getInt64Ty(ctx); @@ -232,13 +235,13 @@ static void addResumeFunction(ModuleOp module) { resumeOp.setPrivate(); auto *block = resumeOp.addEntryBlock(); - OpBuilder blockBuilder = OpBuilder::atBlockEnd(block); + auto blockBuilder = ImplicitLocOpBuilder::atBlockEnd(loc, block); - blockBuilder.create(loc, TypeRange(), + blockBuilder.create(TypeRange(), blockBuilder.getSymbolRefAttr(kCoroResume), resumeOp.getArgument(0)); - blockBuilder.create(loc, ValueRange()); + blockBuilder.create(ValueRange()); } //===----------------------------------------------------------------------===// @@ -302,13 +305,12 @@ static CoroMachinery setupCoroMachinery(FuncOp func) { Block *entryBlock = func.addEntryBlock(); Location loc = func.getBody().getLoc(); - OpBuilder builder = OpBuilder::atBlockBegin(entryBlock); + auto builder = ImplicitLocOpBuilder::atBlockBegin(loc, entryBlock); // ------------------------------------------------------------------------ // // Allocate async tokens/values that we will return from a ramp function. // ------------------------------------------------------------------------ // - auto createToken = - builder.create(loc, kCreateToken, TokenType::get(ctx)); + auto createToken = builder.create(kCreateToken, TokenType::get(ctx)); // ------------------------------------------------------------------------ // // Initialize coroutine: allocate frame, get coroutine handle. @@ -316,28 +318,28 @@ static CoroMachinery setupCoroMachinery(FuncOp func) { // Constants for initializing coroutine frame. auto constZero = - builder.create(loc, i32, builder.getI32IntegerAttr(0)); + builder.create(i32, builder.getI32IntegerAttr(0)); auto constFalse = - builder.create(loc, i1, builder.getBoolAttr(false)); - auto nullPtr = builder.create(loc, i8Ptr); + builder.create(i1, builder.getBoolAttr(false)); + auto nullPtr = builder.create(i8Ptr); // Get coroutine id: @llvm.coro.id auto coroId = builder.create( - loc, token, builder.getSymbolRefAttr(kCoroId), + token, builder.getSymbolRefAttr(kCoroId), ValueRange({constZero, nullPtr, nullPtr, nullPtr})); // Get coroutine frame size: @llvm.coro.size.i64 auto coroSize = builder.create( - loc, i64, builder.getSymbolRefAttr(kCoroSizeI64), ValueRange()); + i64, builder.getSymbolRefAttr(kCoroSizeI64), ValueRange()); // Allocate memory for coroutine frame. - auto coroAlloc = builder.create( - loc, i8Ptr, builder.getSymbolRefAttr(kMalloc), - ValueRange(coroSize.getResult(0))); + auto coroAlloc = + builder.create(i8Ptr, builder.getSymbolRefAttr(kMalloc), + ValueRange(coroSize.getResult(0))); // Begin a coroutine: @llvm.coro.begin auto coroHdl = builder.create( - loc, i8Ptr, builder.getSymbolRefAttr(kCoroBegin), + i8Ptr, builder.getSymbolRefAttr(kCoroBegin), ValueRange({coroId.getResult(0), coroAlloc.getResult(0)})); Block *cleanupBlock = func.addBlock(); @@ -350,15 +352,14 @@ static CoroMachinery setupCoroMachinery(FuncOp func) { // Get a pointer to the coroutine frame memory: @llvm.coro.free. auto coroMem = builder.create( - loc, i8Ptr, builder.getSymbolRefAttr(kCoroFree), + i8Ptr, builder.getSymbolRefAttr(kCoroFree), ValueRange({coroId.getResult(0), coroHdl.getResult(0)})); // Free the memory. - builder.create(loc, TypeRange(), - builder.getSymbolRefAttr(kFree), + builder.create(TypeRange(), builder.getSymbolRefAttr(kFree), ValueRange(coroMem.getResult(0))); // Branch into the suspend block. - builder.create(loc, suspendBlock); + builder.create(suspendBlock); // ------------------------------------------------------------------------ // // Coroutine suspend block: mark the end of a coroutine and return allocated @@ -367,17 +368,17 @@ static CoroMachinery setupCoroMachinery(FuncOp func) { builder.setInsertionPointToStart(suspendBlock); // Mark the end of a coroutine: @llvm.coro.end. - builder.create(loc, i1, builder.getSymbolRefAttr(kCoroEnd), + builder.create(i1, builder.getSymbolRefAttr(kCoroEnd), ValueRange({coroHdl.getResult(0), constFalse})); // Return created `async.token` from the suspend block. This will be the // return value of a coroutine ramp function. - builder.create(loc, createToken.getResult(0)); + builder.create(createToken.getResult(0)); // Branch from the entry block to the cleanup block to create a valid CFG. builder.setInsertionPointToEnd(entryBlock); - builder.create(loc, cleanupBlock); + builder.create(cleanupBlock); // `async.await` op lowering will create resume blocks for async // continuations, and will conditionally branch to cleanup or suspend blocks. @@ -471,8 +472,6 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) { MLIRContext *ctx = module.getContext(); Location loc = execute.getLoc(); - OpBuilder moduleBuilder(module.getBody()->getTerminator()); - // Collect all outlined function inputs. llvm::SetVector functionInputs(execute.dependencies().begin(), execute.dependencies().end()); @@ -484,13 +483,13 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) { SmallVector inputTypes(typesRange.begin(), typesRange.end()); auto outputTypes = execute.getResultTypes(); - auto funcType = moduleBuilder.getFunctionType(inputTypes, outputTypes); + auto funcType = FunctionType::get(ctx, inputTypes, outputTypes); auto funcAttrs = ArrayRef(); // TODO: Derive outlined function name from the parent FuncOp (support // multiple nested async.execute operations). FuncOp func = FuncOp::create(loc, kAsyncFnPrefix, funcType, funcAttrs); - symbolTable.insert(func, moduleBuilder.getInsertionPoint()); + symbolTable.insert(func, Block::iterator(module.getBody()->getTerminator())); SymbolTable::setSymbolVisibility(func, SymbolTable::Visibility::Private); @@ -502,21 +501,21 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) { // Async execute API (execution will be resumed in a thread managed by the // async runtime). Block *entryBlock = &func.getBlocks().front(); - OpBuilder builder = OpBuilder::atBlockTerminator(entryBlock); + auto builder = ImplicitLocOpBuilder::atBlockTerminator(loc, entryBlock); // A pointer to coroutine resume intrinsic wrapper. auto resumeFnTy = AsyncAPI::resumeFunctionType(ctx); auto resumePtr = builder.create( - loc, LLVM::LLVMPointerType::get(resumeFnTy), kResume); + LLVM::LLVMPointerType::get(resumeFnTy), kResume); // Save the coroutine state: @llvm.coro.save auto coroSave = builder.create( - loc, LLVM::LLVMTokenType::get(ctx), builder.getSymbolRefAttr(kCoroSave), + LLVM::LLVMTokenType::get(ctx), builder.getSymbolRefAttr(kCoroSave), ValueRange({coro.coroHandle})); // Call async runtime API to execute a coroutine in the managed thread. SmallVector executeArgs = {coro.coroHandle, resumePtr.res()}; - builder.create(loc, TypeRange(), kExecute, executeArgs); + builder.create(TypeRange(), kExecute, executeArgs); // Split the entry block before the terminator. auto *terminatorOp = entryBlock->getTerminator(); @@ -528,7 +527,7 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) { // Await on all dependencies before starting to execute the body region. builder.setInsertionPointToStart(resume); for (size_t i = 0; i < execute.dependencies().size(); ++i) - builder.create(loc, func.getArgument(i)); + builder.create(func.getArgument(i)); // Map from function inputs defined above the execute op to the function // arguments. @@ -540,17 +539,16 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) { // to async runtime to emplace the result token. for (Operation &op : execute.body().getOps()) { if (isa(op)) { - builder.create(loc, kEmplaceToken, TypeRange(), coro.asyncToken); + builder.create(kEmplaceToken, TypeRange(), coro.asyncToken); continue; } builder.clone(op, valueMapping); } // Replace the original `async.execute` with a call to outlined function. - OpBuilder callBuilder(execute); - auto callOutlinedFunc = - callBuilder.create(loc, func.getName(), execute.getResultTypes(), - functionInputs.getArrayRef()); + ImplicitLocOpBuilder callBuilder(loc, execute); + auto callOutlinedFunc = callBuilder.create( + func.getName(), execute.getResultTypes(), functionInputs.getArrayRef()); execute.replaceAllUsesWith(callOutlinedFunc.getResults()); execute.erase(); @@ -744,24 +742,24 @@ class AwaitOpLoweringBase : public ConversionPattern { if (isInCoroutine) { const CoroMachinery &coro = outlined->getSecond(); - OpBuilder builder(op, rewriter.getListener()); + ImplicitLocOpBuilder builder(loc, op, rewriter.getListener()); MLIRContext *ctx = op->getContext(); // A pointer to coroutine resume intrinsic wrapper. auto resumeFnTy = AsyncAPI::resumeFunctionType(ctx); auto resumePtr = builder.create( - loc, LLVM::LLVMPointerType::get(resumeFnTy), kResume); + LLVM::LLVMPointerType::get(resumeFnTy), kResume); // Save the coroutine state: @llvm.coro.save auto coroSave = builder.create( - loc, LLVM::LLVMTokenType::get(ctx), - builder.getSymbolRefAttr(kCoroSave), ValueRange(coro.coroHandle)); + LLVM::LLVMTokenType::get(ctx), builder.getSymbolRefAttr(kCoroSave), + ValueRange(coro.coroHandle)); // Call async runtime API to resume a coroutine in the managed thread when // the async await argument becomes ready. SmallVector awaitAndExecuteArgs = {operands[0], coro.coroHandle, resumePtr.res()}; - builder.create(loc, TypeRange(), coroAwaitFuncName, + builder.create(TypeRange(), coroAwaitFuncName, awaitAndExecuteArgs); Block *suspended = op->getBlock();