From 0161b327b78a0bc8dbdd3396d1375ad861934cea Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Fri, 20 Aug 2021 16:11:13 +0300 Subject: [PATCH] Fix production of OpPtrCastToGeneric instruction (#1163) In LLVM it is valid if addrspace cast result pointer has not only other address space, but also other element type. However spec states that for Storage Class changing instructions Result Type and Pointer must point to the same type. So, this patch adds a regularization step that adds an additional bitcast in case address space cast changes pointer element type as well and then everything can be easily translated to valid SPIR-V that is accepted by spirv-val. --- lib/SPIRV/SPIRVRegularizeLLVM.cpp | 19 +++++++++++++++++++ test/transcoding/enqueue_kernel.cl | 10 +++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/lib/SPIRV/SPIRVRegularizeLLVM.cpp b/lib/SPIRV/SPIRVRegularizeLLVM.cpp index 59a2f868f1..27b88e8932 100644 --- a/lib/SPIRV/SPIRVRegularizeLLVM.cpp +++ b/lib/SPIRV/SPIRVRegularizeLLVM.cpp @@ -351,6 +351,25 @@ bool SPIRVRegularizeLLVMBase::regularize() { II.setMetadata(MDName, nullptr); } } + // Add an additional bitcast in case address space cast also changes + // pointer element type. + if (auto *ASCast = dyn_cast(&II)) { + Type *DestTy = ASCast->getDestTy(); + Type *SrcTy = ASCast->getSrcTy(); + if (DestTy->getPointerElementType() != + SrcTy->getPointerElementType()) { + PointerType *InterTy = + PointerType::get(DestTy->getPointerElementType(), + SrcTy->getPointerAddressSpace()); + BitCastInst *NewBCast = new BitCastInst( + ASCast->getPointerOperand(), InterTy, /*NameStr=*/"", ASCast); + AddrSpaceCastInst *NewASCast = + new AddrSpaceCastInst(NewBCast, DestTy, /*NameStr=*/"", ASCast); + ToErase.push_back(ASCast); + ASCast->dropAllReferences(); + ASCast->replaceAllUsesWith(NewASCast); + } + } if (auto Cmpxchg = dyn_cast(&II)) { // Transform: // %1 = cmpxchg i32* %ptr, i32 %comparator, i32 %0 seq_cst acquire diff --git a/test/transcoding/enqueue_kernel.cl b/test/transcoding/enqueue_kernel.cl index 3e810807d7..685d35259b 100644 --- a/test/transcoding/enqueue_kernel.cl +++ b/test/transcoding/enqueue_kernel.cl @@ -2,6 +2,7 @@ // RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt // RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV // RUN: llvm-spirv %t.bc -o %t.spv +// RUN: spirv-val %t.spv // RUN: llvm-spirv -r %t.spv -o %t.rev.bc // RUN: llvm-dis %t.rev.bc // RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM @@ -60,7 +61,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]] // CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic* - // CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)* + // CHECK-LLVM: [[InterCast2:%[0-9]+]] = bitcast %struct.__opencl_block_literal_generic* [[Block2]] to i8 + // CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast i8* [[InterCast2]] to i8 addrspace(4)* // CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)* // CHECK-LLVM: call i32 @__enqueue_kernel_basic(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]]) enqueue_kernel(default_queue, flags, ndrange, @@ -79,7 +81,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]] // CHECK-LLVM: [[Block3:%[0-9]+]] = bitcast [[BlockTy3]]* %block4 to %struct.__opencl_block_literal_generic* - // CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block3]] to i8 addrspace(4) + // CHECK-LLVM: [[InterCast3:%[0-9]+]] = bitcast %struct.__opencl_block_literal_generic* [[Block3]] to i8 + // CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast i8* [[InterCast3]] to i8 addrspace(4) // CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)* // CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]]) enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event, @@ -140,7 +143,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // [[BlockKer5]] [[BlockLit5]] [[ConstInt20]] [[ConstInt8]] // CHECK-LLVM: [[Block5:%[0-9]+]] = bitcast [[BlockTy3]]* %block15 to %struct.__opencl_block_literal_generic* - // CHECK-LLVM: [[Block5Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block5]] to i8 addrspace(4) + // CHECK-LLVM: [[InterCast5:%[0-9]+]] = bitcast %struct.__opencl_block_literal_generic* [[Block5]] to i8 + // CHECK-LLVM: [[Block5Ptr:%[0-9]+]] = addrspacecast i8* [[InterCast5]] to i8 addrspace(4) // CHECK-LLVM: [[BlockInv5:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_5_kernel to i8 addrspace(4)* // CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv5]], i8 addrspace(4)* [[Block5Ptr]]) enqueue_kernel(default_queue, flags, ndrange, 0, NULL, &clk_event,