forked from GPUOpen-Drivers/llpc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
PatchInvalidImageDescriptor.lgc
99 lines (79 loc) · 6.91 KB
/
PatchInvalidImageDescriptor.lgc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
; Test that invalid image descriptor patching is applied where required.
; RUN: lgc -mcpu=gfx900 -print-after=lgc-patch-workarounds -o - - <%s 2>&1 | FileCheck --check-prefixes=CHECK,GFX900 %s
; RUN: lgc -mcpu=gfx1010 -print-after=lgc-patch-workarounds -o - - <%s 2>&1 | FileCheck --check-prefixes=CHECK,GFX1010 %s
; CHECK-LABEL: IR Dump After Patch LLVM for workarounds
; GFX900: extractelement <8 x i32> %.desc, i64 7
; GFX900: call i32 @llvm.amdgcn.readfirstlane(i32 %{{[0-9]+}})
; GFX900: insertelement <8 x i32> %{{[0-9]+}}, i32 %{{[0-9]+}}, i64 7
; GFX900: %.load = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 1, i32 0, <8 x i32> %{{[0-9]+}}, i32 0, i32 0)
; GFX1010: extractelement <8 x i32> %{{[0-9]+}}, i64 3
; GFX1010-NEXT: icmp sge i32
; GFX1010-NEXT: and i32
; GFX1010-NEXT: select i1
; GFX1010-NEXT: [[PATCHED_DESC0:%[.a-zA-Z0-9]+]] = insertelement <8 x i32> %{{[0-9]+}}
; GFX1010: %.load = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 1, <8 x i32> [[PATCHED_DESC0]], i32 0, i32 0)
; GFX900: call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> zeroinitializer, i32 15, i32 0, i32 0, <8 x i32> %{{[0-9]+}}, i32 0, i32 0)
; GFX1010: call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> zeroinitializer, i32 15, i32 0, i32 0, <8 x i32> %{{[0-9]+}}, i32 0, i32 0)
; GFX900: %.sample = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> %{{[0-9]+}}, <4 x i32> %.sampler, i1 false, i32 0, i32 0)
; GFX1010: %.sample = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> %{{[0-9]+}}, <4 x i32> %.sampler, i1 false, i32 0, i32 0)
; GFX900: %.gather = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, <8 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, i1 false, i32 0, i32 0)
; GFX1010: %.gather = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, <8 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, i1 false, i32 0, i32 0)
; GFX900: %.atomic = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 1, i32 0, i32 0, <8 x i32> %{{[0-9]+}}, i32 0, i32 0)
; GFX1010: %.atomic = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 1, i32 0, <8 x i32> %{{[0-9]+}}, i32 0, i32 0)
; GFX900: %.lod = call <2 x float> @llvm.amdgcn.image.getlod.2d.v2f32.f32(i32 3, float 0.000000e+00, float 0.000000e+00, <8 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, i1 false, i32 0, i32 0)
; GFX1010: %.lod = call <2 x float> @llvm.amdgcn.image.getlod.2d.v2f32.f32(i32 3, float 0.000000e+00, float 0.000000e+00, <8 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, i1 false, i32 0, i32 0)
; CHECK: [[WFDESC:%[0-9]+]] = call <8 x i32> @llvm.amdgcn.waterfall.readfirstlane
; GFX900: [[WFDESC1:%[0-9]+]] = call <8 x i32> @llvm.amdgcn.waterfall.last.use.v8i32(i32 %{{[0-9]+}}, <8 x i32> [[WFDESC]])
; GFX900: call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> zeroinitializer, i32 15, i32 0, i32 0, <8 x i32> [[WFDESC1]], i32 0, i32 0)
; GFX1010: extractelement <8 x i32> [[WFDESC]], i64 3
; GFX1010-NEXT: icmp sge i32
; GFX1010-NEXT: and i32
; GFX1010-NEXT: select i1
; GFX1010-NEXT: [[PATCHED_DESC1:%[.a-zA-Z0-9]+]] = insertelement <8 x i32> [[WFDESC]]
; GFX1010: [[WFDESC1:%[0-9]+]] = call <8 x i32> @llvm.amdgcn.waterfall.last.use.v8i32(i32 %{{[0-9]+}}, <8 x i32> [[PATCHED_DESC1]])
; GFX1010: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> zeroinitializer, i32 15, i32 0, <8 x i32> [[WFDESC1]], i32 0, i32 0)
; ModuleID = 'lgcPipeline'
source_filename = "lgcPipeline"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
target triple = "amdgcn--amdpal"
; Function Attrs: nounwind
define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !lgc.shaderstage !0 {
.entry:
%.desc.ptr2 = call <8 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 3, i32 3)
%.desc.ptr1 = bitcast <8 x i32> addrspace(4)* %.desc.ptr2 to i8 addrspace(4)*
%.desc.ptr0 = getelementptr i8, i8 addrspace(4)* %.desc.ptr1, i64 0
%.desc.ptr = bitcast i8 addrspace(4)* %.desc.ptr0 to <8 x i32> addrspace(4)*
%.desc = load <8 x i32>, <8 x i32> addrspace(4)* %.desc.ptr, align 32
%.sampler.ptr = call <4 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v4i32(i32 2, i32 0, i32 13)
%.sampler = load <4 x i32>, <4 x i32> addrspace(4)* %.sampler.ptr, align 16
%.load = call <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 128, <8 x i32> %.desc, i32 1)
call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 1, i32 128, <8 x i32> %.desc, <2 x i32> zeroinitializer)
%.sample = call <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 128, <8 x i32> %.desc, <4 x i32> %.sampler, i32 1, <2 x float> zeroinitializer)
%.gather = call <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 128, <8 x i32> %.desc, <4 x i32> %.sampler, i32 37, <2 x float> zeroinitializer, i32 0, float 0.000000e+00)
%.atomic = call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 128, i32 0, <8 x i32> %.desc, i32 0, i32 1) #0
%.lod = call <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 1, i32 128, <8 x i32> %.desc, <4 x i32> %.sampler, <2 x float> zeroinitializer)
%.query.size = call <2 x i32> (...) @lgc.create.image.query.size.v2i32(i32 1, i32 128, <8 x i32> %.desc, i32 0)
%.query.levels = call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 128, <8 x i32> %.desc)
; Use a waterfall loop with last.use to test that is also handled correctly
%.desc2.ptr2 = call <8 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 3, i32 4)
%.desc2.ptr1 = bitcast <8 x i32> addrspace(4)* %.desc2.ptr2 to i8 addrspace(4)*
%.desc2.ptr0 = getelementptr i8, i8 addrspace(4)* %.desc2.ptr1, i64 0
%.desc2.ptr = bitcast i8 addrspace(4)* %.desc2.ptr0 to <8 x i32> addrspace(4)*
%.desc2 = load <8 x i32>, <8 x i32> addrspace(4)* %.desc2.ptr, align 32
call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 0, i32 8, <8 x i32> %.desc2, i32 zeroinitializer)
ret void
}
declare <8 x i32> addrspace(4)* @lgc.create.get.desc.ptr.p4v8i32(...) #1
declare <4 x i32> addrspace(4)* @lgc.create.get.desc.ptr.p4v4i32(...) #1
declare <4 x float> @lgc.create.image.load.v4f32(...) #1
declare void @lgc.create.image.store(...) #2
declare <4 x float> @lgc.create.image.sample.v4f32(...) #1
declare <4 x float> @lgc.create.image.gather.v4f32(...) #1
declare i32 @lgc.create.image.atomic.i32(...) #0
declare <2 x float> @lgc.create.image.get.lod.v2f32(...) #0
declare <2 x i32> @lgc.create.image.query.size.v2i32(...) #0
declare i32 @lgc.create.image.query.levels.i32(...) #0
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind writeonly }
!0 = !{i32 1}