forked from GPUOpen-Drivers/llpc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
PackUnlinkVsGs.lgc
106 lines (92 loc) · 6.32 KB
/
PackUnlinkVsGs.lgc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
; Test that the output from the VS are removed when they are not used in the GS even for unlinked shaders.
; RUN: lgc -mcpu=gfx900 -o - - <%s | FileCheck --check-prefixes=CHECK %s
; In this simple case, the vertex shader will be the first basic block in _amdgpu_gs_main. So we find that block, and
; make sure the only ds_write instructions are for the VS output that are used by the GS, and that there are no others.
;
; CHECK-LABEL: _amdgpu_gs_main:
; CHECK-NOT: {{^[L\.]*}}BB
; CHECK-NOT: ds_write2_b32
; CHECK: ds_write2_b32 v{{[0-9]*}}, v{{[0-9]*}}, v{{[0-9]*}} offset1:1
; CHECK-NOT: ds_write2_b32
; CHECK: ds_write2_b32 v{{[0-9]*}}, v{{[0-9]*}}, v{{[0-9]*}} offset0:2 offset1:3
; CHECK-NOT: ds_write2_b32
; CHECK: {{^[L\.]*}}BB
; ModuleID = 'lgcPipeline'
source_filename = "lgcPipeline"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn--amdpal"
; Function Attrs: nounwind
define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !lgc.shaderstage !2 {
.entry:
%0 = call i8 addrspace(7)* (...) @lgc.create.load.buffer.desc.p7i8(i32 1, i32 1, i32 0, i32 0)
%1 = call {}* @llvm.invariant.start.p7i8(i64 -1, i8 addrspace(7)* %0)
%2 = getelementptr inbounds i8, i8 addrspace(7)* %0, i64 352
%3 = bitcast i8 addrspace(7)* %2 to <4 x float> addrspace(7)*
%4 = load <4 x float>, <4 x float> addrspace(7)* %3, align 16
call void (...) @lgc.create.write.builtin.output(<4 x float> %4, i32 0, i32 0, i32 undef, i32 undef)
call void (...) @lgc.create.write.builtin.output(float undef, i32 1, i32 0, i32 undef, i32 undef)
call void (...) @lgc.create.write.builtin.output([1 x float] undef, i32 3, i32 4096, i32 undef, i32 undef)
call void (...) @lgc.create.write.generic.output(<3 x float> zeroinitializer, i32 9, i32 0, i32 0, i32 0, i32 0, i32 undef)
call void (...) @lgc.create.write.generic.output(<3 x float> zeroinitializer, i32 10, i32 0, i32 0, i32 0, i32 0, i32 undef)
ret void
}
; Function Attrs: nounwind readonly willreturn
declare i8 addrspace(7)* @lgc.create.load.buffer.desc.p7i8(...) local_unnamed_addr #1
; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare {}* @llvm.invariant.start.p7i8(i64 immarg, i8 addrspace(7)* nocapture) #2
; Function Attrs: nounwind
declare void @lgc.create.write.builtin.output(...) local_unnamed_addr #0
; Function Attrs: nounwind
declare void @lgc.create.write.generic.output(...) local_unnamed_addr #0
; Function Attrs: nounwind
define dllexport spir_func void @lgc.shader.GS.main() local_unnamed_addr #0 !lgc.shaderstage !9 {
.entry:
%0 = call <4 x float> (...) @lgc.create.read.builtin.input.v4f32(i32 0, i32 0, i32 0, i32 undef)
%1 = call <4 x float> (...) @lgc.create.read.builtin.input.v4f32(i32 0, i32 0, i32 1, i32 undef)
%2 = call <4 x float> (...) @lgc.create.read.builtin.input.v4f32(i32 0, i32 0, i32 2, i32 undef)
%__llpc_output_proxy_1.sroa.0.4.vec.insert = insertelement <4 x float> %0, float 1.000000e+00, i32 1
call void (...) @lgc.create.write.generic.output(<2 x float> zeroinitializer, i32 5, i32 0, i32 0, i32 0, i32 1024, i32 undef)
call void (...) @lgc.create.write.builtin.output(<4 x float> %__llpc_output_proxy_1.sroa.0.4.vec.insert, i32 0, i32 1024, i32 undef, i32 undef)
call void (...) @lgc.create.write.builtin.output(float undef, i32 1, i32 1024, i32 undef, i32 undef)
call void (...) @lgc.create.write.builtin.output([1 x float] undef, i32 3, i32 5120, i32 undef, i32 undef)
call void (...) @lgc.create.emit.vertex(i32 0)
%__llpc_output_proxy_1.sroa.0.4.vec.insert25 = insertelement <4 x float> %1, float 0.000000e+00, i32 1
call void (...) @lgc.create.write.generic.output(<2 x float> zeroinitializer, i32 5, i32 0, i32 0, i32 0, i32 1024, i32 undef)
call void (...) @lgc.create.write.builtin.output(<4 x float> %__llpc_output_proxy_1.sroa.0.4.vec.insert25, i32 0, i32 1024, i32 undef, i32 undef)
call void (...) @lgc.create.write.builtin.output(float undef, i32 1, i32 1024, i32 undef, i32 undef)
call void (...) @lgc.create.write.builtin.output([1 x float] undef, i32 3, i32 5120, i32 undef, i32 undef)
call void (...) @lgc.create.emit.vertex(i32 0)
%__llpc_output_proxy_1.sroa.0.4.vec.insert27 = insertelement <4 x float> %2, float 1.000000e+00, i32 1
call void (...) @lgc.create.write.generic.output(<2 x float> zeroinitializer, i32 5, i32 0, i32 0, i32 0, i32 1024, i32 undef)
call void (...) @lgc.create.write.builtin.output(<4 x float> %__llpc_output_proxy_1.sroa.0.4.vec.insert27, i32 0, i32 1024, i32 undef, i32 undef)
call void (...) @lgc.create.write.builtin.output(float undef, i32 1, i32 1024, i32 undef, i32 undef)
call void (...) @lgc.create.write.builtin.output([1 x float] undef, i32 3, i32 5120, i32 undef, i32 undef)
call void (...) @lgc.create.emit.vertex(i32 0)
ret void
}
; Function Attrs: nounwind readonly willreturn
declare <4 x float> @lgc.create.read.builtin.input.v4f32(...) local_unnamed_addr #1
; Function Attrs: nounwind
declare void @lgc.create.emit.vertex(...) local_unnamed_addr #0
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly willreturn }
attributes #2 = { argmemonly nofree nosync nounwind willreturn }
!llpc.geometry.mode = !{!0}
!lgc.client = !{!1}
!lgc.unlinked = !{!2}
!lgc.options = !{!3}
!lgc.options.VS = !{!4}
!lgc.options.GS = !{!5}
!lgc.input.assembly.state = !{!6}
!lgc.rasterizer.state = !{!7}
!amdgpu.pal.metadata.msgpack = !{!8}
!0 = !{i32 3, i32 2, i32 1, i32 3}
!1 = !{!"Vulkan"}
!2 = !{i32 1}
!3 = !{i32 1647551108, i32 1333815706, i32 473866860, i32 -486104810, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2}
!4 = !{i32 65905242, i32 -234096046, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800}
!5 = !{i32 1488666502, i32 274213642, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800}
!6 = !{i32 2, i32 3}
!7 = !{i32 0, i32 0, i32 0, i32 1}
!8 = !{!"\82\B0amdpal.pipelines\91\84\AA.registers\80\B0.spill_threshold\CE\FF\FF\FF\FF\B0.user_data_limit\00\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\DD\BF\E3\A1\D6\C4\0F<\CF\EB\95#\AB\F5\17\DC\B4\AD.llpc_version\A449.0\AEamdpal.version\92\02\03"}
!9 = !{i32 4}