forked from GPUOpen-Drivers/llpc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
TaskShaderOps.lgc
111 lines (100 loc) · 6.6 KB
/
TaskShaderOps.lgc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
; Test that the operations of task shader are handled as expected.
; RUN: lgc -mcpu=gfx1030 --emit-llvm -v -o=- - <%s | FileCheck --check-prefixes=CHECK %s
; In this test case, we check if the operations of a task shader is correctly handled. Three operations
; are ReadTaskPayload, WriteTaskPayload, EmitMeshTasks.
;
; CHECK-LABEL: lgc.shader.TASK.main
; CHECK: call float @lgc.mesh.task.read.task.payload.f32.i32(i32 %{{[0-9]*}})
; CHECK: call void @lgc.mesh.task.write.task.payload.i32.f32(i32 %{{[0-9]*}}, float %{{[0-9]*}})
; CHECK: call void @lgc.mesh.task.emit.mesh.tasks(i32 3, i32 1, i32 1)
;
; CHECK-LABEL: _amdgpu_cs_main
; CHECK: .entry:
; CHECK-DAG: [[groupIdZ:%[0-9]*]] = extractelement <3 x i32> %WorkgroupId, i64 2
; CHECK-DAG: [[dimY:%[0-9]*]] = extractelement <3 x i32> %meshTaskDispatchDims, i64 1
; CHECK-NEXT: [[tempResult0:%[0-9]*]] = mul i32 [[groupIdZ]], [[dimY]]
; CHECK-NEXT: [[groupIdY:%[0-9]*]] = extractelement <3 x i32> %WorkgroupId, i64 1
; CHECK-NEXT: [[tempResult1:%[0-9]*]] = add i32 [[tempResult0]], [[groupIdY]]
; CHECK-NEXT: [[dimX:%[0-9]*]] = extractelement <3 x i32> %meshTaskDispatchDims, i64 0
; CHECK-NEXT: [[tempResult2:%[0-9]*]] = mul i32 [[tempResult1]], [[dimX]]
; CHECK-NEXT: [[groupIdX:%[0-9]*]] = extractelement <3 x i32> %WorkgroupId, i64 0
; CHECK-NEXT: [[flattenId:%[0-9]*]] = add i32 [[tempResult2]], [[groupIdX]]
; CHECK-NEXT: [[entryIndex:%[0-9]*]] = add i32 %meshTaskRingIndex, [[flattenId]]
; CHECK: [[payloadRingDescPtr:%[0-9]*]] = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %{{[0-9]*}}, i32 13
; CHECK-NEXT: [[payloadRingDesc:%[0-9]*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[payloadRingDescPtr]], align 16
; CHECK: [[drawDataRingDescPtr:%[0-9]*]] = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %{{[0-9]*}}, i32 14
; CHECK-NEXT: [[drawDataRingDesc:%[0-9]*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[drawDataRingDescPtr]], align 16
; CHECK: [[meshPipeStatsBufAddr2x32:%[0-9]*]] = insertelement <2 x i32> %{{[0-9]*}}, i32 %meshPipeStatsBuf, i32 0
; CHECK-NEXT: [[meshPipeStatsBufAddr64:%[0-9]*]] = bitcast <2 x i32> [[meshPipeStatsBufAddr2x32]] to i64
; CHECK-NEXT: [[meshPipeStatsBufAddr:%[0-9]*]] = inttoptr i64 [[meshPipeStatsBufAddr64]] to i8 addrspace(1)*
; CHECK: [[ringSize:%[0-9]*]] = extractelement <4 x i32> [[payloadRingDesc]], i64 2
; CHECK-NEXT: [[numEntries:%[0-9]*]] = udiv i32 [[ringSize]], 16384
; CHECK-NEXT: [[wrapMask:%[0-9]*]] = sub i32 [[numEntries]], 1
; CHECK-NEXT: [[wrappedEntryIndex:%[0-9]*]] = and i32 [[entryIndex]], [[wrapMask]]
; CHECK-NEXT: [[entryOffset:%[0-9]*]] = mul i32 [[wrappedEntryIndex]], 16384
; CHECK: %{{[0-9]*}} = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[payloadRingDesc]], i32 %{{[0-9]*}}, i32 [[entryOffset]], i32 5)
; CHECK: call void @llvm.amdgcn.raw.buffer.store.f32(float %{{[0-9]*}}, <4 x i32> [[payloadRingDesc]], i32 %{{[0-9]*}}, i32 [[entryOffset]], i32 1)
; CHECK: br i1 %{{[0-9]*}}, label %.emitMeshs, label %.endEmitMeshs
;
; CHECK: .emitMeshs:
; CHECK-NEXT: [[numTaskThreadsPtr8:%[0-9]*]] = getelementptr i8, i8 addrspace(1)* [[meshPipeStatsBufAddr]], i32 16
; CHECK-NEXT: [[numTaskThreadsPtr:%[0-9]*]] = bitcast i8 addrspace(1)* [[numTaskThreadsPtr8]] to i64 addrspace(1)*
; CHECK: %{{[0-9]*}} = atomicrmw add i64 addrspace(1)* [[numTaskThreadsPtr]], i64 %{{[0-9]*}} monotonic, align 8
; CHECK: [[ringSize:%[0-9]*]] = extractelement <4 x i32> [[drawDataRingDesc]], i64 2
; CHECK-NEXT: [[numEntries:%[0-9]*]] = udiv i32 [[ringSize]], 16
; CHECK-NEXT: [[wrapMask:%[0-9]*]] = sub i32 [[numEntries]], 1
; CHECK-NEXT: [[wrappedEntryIndex:%[0-9]*]] = and i32 [[entryIndex]], [[wrapMask]]
; CHECK-NEXT: [[entryOffset:%[0-9]*]] = mul i32 [[wrappedEntryIndex]], 16
; CHECK: [[ringSize:%[0-9]*]] = extractelement <4 x i32> [[drawDataRingDesc]], i64 2
; CHECK-NEXT: [[numEntries:%[0-9]*]] = udiv i32 [[ringSize]], 16
; CHECK-NEXT: [[checkReadyBit:%[0-9]*]] = and i32 [[entryIndex]], [[numEntries]]
; CHECK-NEXT: [[readyBit:%[0-9]*]] = icmp ne i32 [[checkReadyBit]], 0
; CHECK-NEXT: [[readyBit32:%[0-9]*]] = zext i1 [[readyBit]] to i32
; CHECK-NEXT: [[drawData:%[0-9]*]] = insertelement <4 x i32> %{{[0-9]*}}, i32 [[readyBit32]], i64 3
; CHECK: call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> [[drawData]], <4 x i32> [[drawDataRingDesc]], i32 0, i32 [[entryOffset]], i32 0)
; CHECK-NEXT: br label %.endEmitMeshs
;
; CHECK: .endEmitMeshs:
; CHECK-NEXT: ret void
; ModuleID = 'lgcPipeline'
source_filename = "llpctask1"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn--amdpal"
; Function Attrs: nounwind
define dllexport spir_func void @lgc.shader.TASK.main() local_unnamed_addr #0 !lgc.shaderstage !7 {
.entry:
%0 = call <3 x i32> (...) @lgc.create.read.builtin.input.v3i32(i32 26, i32 0, i32 undef, i32 undef)
%__llpc_input_proxy_gl_WorkGroupID.0.vec.extract = extractelement <3 x i32> %0, i32 0
%1 = shl i32 %__llpc_input_proxy_gl_WorkGroupID.0.vec.extract, 3
%2 = or i32 %1, 4
%3 = call float (...) @lgc.create.read.task.payload.f32(i32 %2)
%4 = add i32 %1, 8
call void (...) @lgc.create.write.task.payload(float %3, i32 %4)
call void (...) @lgc.create.emit.mesh.tasks(i32 3, i32 1, i32 1)
ret void
}
; Function Attrs: nounwind
declare void @lgc.create.emit.mesh.tasks(...) local_unnamed_addr #0
; Function Attrs: nounwind readonly willreturn
declare <3 x i32> @lgc.create.read.builtin.input.v3i32(...) local_unnamed_addr #1
; Function Attrs: nounwind readonly willreturn
declare float @lgc.create.read.task.payload.f32(...) local_unnamed_addr #1
; Function Attrs: nounwind
declare void @lgc.create.write.task.payload(...) local_unnamed_addr #0
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly willreturn }
!llpc.compute.mode = !{!0}
!lgc.client = !{!1}
!lgc.unlinked = !{!2}
!lgc.options = !{!3}
!lgc.options.TASK = !{!4}
!lgc.input.assembly.state = !{!5}
!amdgpu.pal.metadata.msgpack = !{!6}
!0 = !{i32 32, i32 1, i32 1}
!1 = !{!"Vulkan"}
!2 = !{i32 1}
!3 = !{i32 1931310406, i32 1408637002, i32 -1184926787, i32 1622008494, i32 1, i32 0, i32 0, i32 552, i32 0, i32 0, i32 1, i32 256, i32 256, i32 2}
!4 = !{i32 -2080943554, i32 763869174, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800}
!5 = !{i32 0, i32 3}
!6 = !{!"\82\B0amdpal.pipelines\91\84\AA.registers\80\B0.spill_threshold\CE\FF\FF\FF\FF\B0.user_data_limit\00\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\D2\9D\9C\FE\\\B1\0A\1C\CF\F6\0B\90\C8\8D\1Bu\BC\AD.llpc_version\A452.2\AEamdpal.version\92\02\03"}
!7 = !{i32 0}