Skip to content

Commit

Permalink
[AMDGPU] Add llvm.amdgcn.raw.atomic.buffer.load intrinsic to support
Browse files Browse the repository at this point in the history
OpAtomicLoad lowering

This adds llvm.amdgcn.raw.atomic.buffer.load intrinsic to support
OpAtomicLoad lowering on AMDGPU. Previously this was lowered to
llvm.amdgcn.raw.buffer.load which caused the load in some cases
to be marked as invariant and hoisted in LICM.

Change-Id: I7d3989d98ab02508287223f24c3f7f81d312f0e6
  • Loading branch information
rtayl committed Mar 10, 2020
1 parent ab553d6 commit 0a6ed48
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 1 deletion.
13 changes: 13 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,19 @@ class AMDGPURawBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad<llvm_anyfloat_ty>;
def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;

class AMDGPURawAtomicBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
[data_ty],
[llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
// bit 1 = slc,
// bit 2 = dlc on gfx10+),
// swizzled buffer (bit 3 = swz))
[IntrArgMemOnly, ImmArg<3>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
def int_amdgcn_raw_atomic_buffer_load : AMDGPURawAtomicBufferLoad;

class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
[data_ty],
[llvm_v4i32_ty, // rsrc(SGPR)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3776,6 +3776,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
case Intrinsic::amdgcn_struct_tbuffer_store:
return legalizeBufferStore(MI, MRI, B, true, true);
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_atomic_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load:
return legalizeBufferLoad(MI, MRI, B, false, false);
case Intrinsic::amdgcn_raw_buffer_load_format:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3921,6 +3921,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_atomic_buffer_load:
case Intrinsic::amdgcn_raw_tbuffer_load: {
// FIXME: Should make intrinsic ID the last operand of the instruction,
// then this would be the same as store
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}

Info.flags = MachineMemOperand::MODereferenceable;
if (Attr.hasFnAttribute(Attribute::ReadOnly)) {
if (Attr.hasFnAttribute(Attribute::ReadOnly) ||
IntrID == Intrinsic::amdgcn_raw_atomic_buffer_load) {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// TODO: Account for dmask reducing loaded size.
Info.memVT = memVTFromImageReturn(CI.getType());
Expand Down Expand Up @@ -6214,6 +6215,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
M->getMemOperand(), DAG);
}
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_atomic_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format: {
const bool IsFormat = IntrID == Intrinsic::amdgcn_raw_buffer_load_format;

Expand Down
56 changes: 56 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.atomic.buffer.load.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=SI
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=SI
;RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=GFX10

;CHECK-LABEL: {{^}}raw_atomic_buffer_load
;CHECK-LABEL: BB0_1: ; %bb1
;CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
;CHECK-NEXT: s_waitcnt lgkmcnt(0)
;CHECK-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 glc
;CHECK-NEXT: s_waitcnt vmcnt(0)
;SI-NEXT: v_cmp_ne_u32_e32 vcc, v1, v0
;GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v0
;SI-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
;GFX10-NEXT: s_or_b32 s4, vcc_lo, s4
;SI-NEXT: s_andn2_b64 exec, exec, s[4:5]
;GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
;CHECK-NEXT: s_cbranch_execnz BB0_1
define amdgpu_kernel void @raw_atomic_buffer_load(<4 x i32> %addr) {
bb:
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
br label %bb1
bb1:
%0 = call i32 @llvm.amdgcn.raw.atomic.buffer.load.i32(<4 x i32> %addr, i32 4, i32 0, i32 1)
%1 = icmp eq i32 %0, %tmp0
br i1 %1, label %bb1, label %bb2
bb2:
ret void
}

;CHECK-LABEL: {{^}}raw_nonatomic_buffer_load
;CHECK: ; =>This Inner Loop Header: Depth=1
;SI-NEXT: s_and_b64 s[2:3], exec, vcc
;GFX10-NEXT: s_and_b32 s1, exec_lo, vcc_lo
;SI-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
;GFX10-NEXT: s_or_b32 s0, s1, s0
;SI-NEXT: s_andn2_b64 exec, exec, s[0:1]
;GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
;CHECK-NEXT: s_cbranch_execnz BB1_1
define amdgpu_kernel void @raw_nonatomic_buffer_load(<4 x i32> %addr) {
bb:
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
br label %bb1
bb1:
%0 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %addr, i32 4, i32 0, i32 1)
%1 = icmp eq i32 %0, %tmp0
br i1 %1, label %bb1, label %bb2
bb2:
ret void
}

; Function Attrs: nounwind readonly
declare i32 @llvm.amdgcn.raw.atomic.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg)
declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg)
declare i32 @llvm.amdgcn.workitem.id.x()

0 comments on commit 0a6ed48

Please sign in to comment.