Skip to content

Commit

Permalink
AMDGPU: Regenerate baseline mir tests
Browse files Browse the repository at this point in the history
  • Loading branch information
arsenm committed Feb 27, 2024
1 parent b2a4f64 commit e7900e6
Show file tree
Hide file tree
Showing 109 changed files with 4,054 additions and 2,171 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32
; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn
; GFX90A_GFX940: bb.1 (%ir-block.0):
; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
Expand All @@ -32,6 +33,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32
; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX90A_GFX940-NEXT: S_ENDPGM 0
;
; GFX12-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn
; GFX12: bb.1 (%ir-block.0):
; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
Expand Down Expand Up @@ -64,6 +66,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offen_no_rtn(float %val, <4 x i32>
; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn
; GFX90A_GFX940: bb.1 (%ir-block.0):
; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
Expand All @@ -78,6 +81,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offen_no_rtn(float %val, <4 x i32>
; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX90A_GFX940-NEXT: S_ENDPGM 0
;
; GFX12-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn
; GFX12: bb.1 (%ir-block.0):
; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
Expand Down Expand Up @@ -111,6 +115,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_idxen_no_rtn(float %val, <4 x i32>
; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn
; GFX90A_GFX940: bb.1 (%ir-block.0):
; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
Expand All @@ -125,6 +130,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_idxen_no_rtn(float %val, <4 x i32>
; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX90A_GFX940-NEXT: S_ENDPGM 0
;
; GFX12-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn
; GFX12: bb.1 (%ir-block.0):
; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
Expand Down Expand Up @@ -160,6 +166,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_bothen_no_rtn(float %val, <4 x i32
; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn
; GFX90A_GFX940: bb.1 (%ir-block.0):
; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
Expand All @@ -176,6 +183,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_bothen_no_rtn(float %val, <4 x i32
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX90A_GFX940-NEXT: S_ENDPGM 0
;
; GFX12-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn
; GFX12: bb.1 (%ir-block.0):
; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
Expand Down Expand Up @@ -210,6 +218,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offset_no_rtn(float %val, ptr
; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
; GFX90A_GFX940: bb.1 (%ir-block.0):
; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
Expand All @@ -223,6 +232,20 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offset_no_rtn(float %val, ptr
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX90A_GFX940-NEXT: S_ENDPGM 0
;
; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
; GFX12: bb.1 (%ir-block.0):
; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX12-NEXT: S_ENDPGM 0
%ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
ret void
}
Expand All @@ -242,6 +265,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offen_no_rtn(float %val, ptr a
; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
; GFX90A_GFX940: bb.1 (%ir-block.0):
; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
Expand All @@ -256,6 +280,21 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offen_no_rtn(float %val, ptr a
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX90A_GFX940-NEXT: S_ENDPGM 0
;
; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
; GFX12: bb.1 (%ir-block.0):
; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX12-NEXT: S_ENDPGM 0
%ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
Expand All @@ -275,6 +314,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_idxen_no_rtn(float %val, ptr a
; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
; GFX90A_GFX940: bb.1 (%ir-block.0):
; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
Expand All @@ -289,6 +329,21 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_idxen_no_rtn(float %val, ptr a
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX90A_GFX940-NEXT: S_ENDPGM 0
;
; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
; GFX12: bb.1 (%ir-block.0):
; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX12-NEXT: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret void
}
Expand All @@ -310,6 +365,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr
; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
; GFX90A_GFX940: bb.1 (%ir-block.0):
; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
Expand All @@ -326,6 +382,23 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX90A_GFX940-NEXT: S_ENDPGM 0
;
; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
; GFX12: bb.1 (%ir-block.0):
; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX12-NEXT: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
ret void
}
Expand Down
Loading

0 comments on commit e7900e6

Please sign in to comment.