From f8b7a65395a07073feff367145965214d95ba99a Mon Sep 17 00:00:00 2001 From: Petr Kurapov Date: Thu, 10 Oct 2024 14:04:52 +0200 Subject: [PATCH] [MLIR][GPU-LLVM] Add in-pass signature update for opencl kernels (#105664) Default to Global address space for memrefs that do not have an explicit address space set in the IR. --------- Co-authored-by: Victor Perez Co-authored-by: Jakub Kuderski Co-authored-by: Victor Perez --- .../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 49 ++++++++++++++++++ .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 50 ++++++++++++++++--- 2 files changed, 93 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp index 544f1f4a4f6a7..bb6a38c0e76ed 100644 --- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp +++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp @@ -34,6 +34,8 @@ #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/FormatVariadic.h" +#define DEBUG_TYPE "gpu-to-llvm-spv" + using namespace mlir; namespace mlir { @@ -316,6 +318,38 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern { } }; +class MemorySpaceToOpenCLMemorySpaceConverter final : public TypeConverter { +public: + MemorySpaceToOpenCLMemorySpaceConverter(MLIRContext *ctx) { + addConversion([](Type t) { return t; }); + addConversion([ctx](BaseMemRefType memRefType) -> std::optional { + // Attach global addr space attribute to memrefs with no addr space attr + Attribute memSpaceAttr = memRefType.getMemorySpace(); + if (memSpaceAttr) + return std::nullopt; + + unsigned globalAddrspace = storageClassToAddressSpace( + spirv::ClientAPI::OpenCL, spirv::StorageClass::CrossWorkgroup); + Attribute addrSpaceAttr = + IntegerAttr::get(IntegerType::get(ctx, 64), globalAddrspace); + if (auto rankedType = dyn_cast(memRefType)) { + return MemRefType::get(memRefType.getShape(), + memRefType.getElementType(), + rankedType.getLayout(), addrSpaceAttr); + } + return UnrankedMemRefType::get(memRefType.getElementType(), + addrSpaceAttr); + }); + addConversion([this](FunctionType type) { + auto inputs = llvm::map_to_vector( + type.getInputs(), [this](Type ty) { return convertType(ty); }); + auto results = llvm::map_to_vector( + type.getResults(), [this](Type ty) { return convertType(ty); }); + return FunctionType::get(type.getContext(), inputs, results); + }); + } +}; + //===----------------------------------------------------------------------===// // Subgroup query ops. //===----------------------------------------------------------------------===// @@ -382,6 +416,21 @@ struct GPUToLLVMSPVConversionPass final LLVMTypeConverter converter(context, options); LLVMConversionTarget target(*context); + // Force OpenCL address spaces when they are not present + { + MemorySpaceToOpenCLMemorySpaceConverter converter(context); + AttrTypeReplacer replacer; + replacer.addReplacement([&converter](BaseMemRefType origType) + -> std::optional { + return converter.convertType(origType); + }); + + replacer.recursivelyReplaceElementsIn(getOperation(), + /*replaceAttrs=*/true, + /*replaceLocs=*/false, + /*replaceTypes=*/true); + } + target.addIllegalOp, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i64) attributes {gpu.kernel} { + // CHECK-32: llvm.func spir_kernelcc @kernel_with_conv_args(%{{.*}}: i32, %{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i32) attributes {gpu.kernel} { gpu.func @kernel_with_conv_args(%arg0: index, %arg1: memref) kernel { gpu.return } - // CHECK-64: llvm.func spir_kernelcc @kernel_with_sized_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64) attributes {gpu.kernel} { - // CHECK-32: llvm.func spir_kernelcc @kernel_with_sized_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32) attributes {gpu.kernel} { + // CHECK-64: llvm.func spir_kernelcc @kernel_with_sized_memref(%{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64) attributes {gpu.kernel} { + // CHECK-32: llvm.func spir_kernelcc @kernel_with_sized_memref(%{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32) attributes {gpu.kernel} { gpu.func @kernel_with_sized_memref(%arg0: memref<1xindex>) kernel { gpu.return } - // CHECK-64: llvm.func spir_kernelcc @kernel_with_ND_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64) attributes {gpu.kernel} { - // CHECK-32: llvm.func spir_kernelcc @kernel_with_ND_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32) attributes {gpu.kernel} { + // CHECK-64: llvm.func spir_kernelcc @kernel_with_ND_memref(%{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64) attributes {gpu.kernel} { + // CHECK-32: llvm.func spir_kernelcc @kernel_with_ND_memref(%{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32) attributes {gpu.kernel} { gpu.func @kernel_with_ND_memref(%arg0: memref<128x128x128xindex>) kernel { gpu.return } @@ -566,6 +566,44 @@ gpu.module @kernels { // ----- +gpu.module @kernels { +// CHECK: llvm.func spir_funccc @_Z12get_group_idj(i32) +// CHECK-LABEL: llvm.func spir_funccc @no_address_spaces( +// CHECK-SAME: %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1> +// CHECK-SAME: %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1> +// CHECK-SAME: %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1> + gpu.func @no_address_spaces(%arg0: memref, %arg1: memref>, %arg2: memref) { + gpu.return + } + +// CHECK-LABEL: llvm.func spir_kernelcc @no_address_spaces_complex( +// CHECK-SAME: %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1> +// CHECK-SAME: %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1> +// CHECK: func.call @no_address_spaces_callee(%{{[0-9]+}}, %{{[0-9]+}}) +// CHECK-SAME: : (memref<2x2xf32, 1>, memref<4xf32, 1>) + gpu.func @no_address_spaces_complex(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) kernel { + func.call @no_address_spaces_callee(%arg0, %arg1) : (memref<2x2xf32>, memref<4xf32>) -> () + gpu.return + } +// CHECK-LABEL: func.func @no_address_spaces_callee( +// CHECK-SAME: [[ARG0:%.*]]: memref<2x2xf32, 1> +// CHECK-SAME: [[ARG1:%.*]]: memref<4xf32, 1> +// CHECK: [[C0:%.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: [[I0:%.*]] = llvm.call spir_funccc @_Z12get_group_idj([[C0]]) { +// CHECK-32: [[I1:%.*]] = builtin.unrealized_conversion_cast [[I0]] : i32 to index +// CHECK-64: [[I1:%.*]] = builtin.unrealized_conversion_cast [[I0]] : i64 to index +// CHECK: [[LD:%.*]] = memref.load [[ARG0]]{{\[}}[[I1]], [[I1]]{{\]}} : memref<2x2xf32, 1> +// CHECK: memref.store [[LD]], [[ARG1]]{{\[}}[[I1]]{{\]}} : memref<4xf32, 1> + func.func @no_address_spaces_callee(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) { + %block_id = gpu.block_id x + %0 = memref.load %arg0[%block_id, %block_id] : memref<2x2xf32> + memref.store %0, %arg1[%block_id] : memref<4xf32> + func.return + } +} + +// ----- + // Lowering of subgroup query operations // CHECK-DAG: llvm.func spir_funccc @_Z18get_sub_group_size() -> i32 attributes {no_unwind, will_return}