Skip to content

Commit

Permalink
[AMDGPU] New ttracedata intrinsics (#70235)
Browse files Browse the repository at this point in the history
Add llvm.amdgcn.s.ttracedata and llvm.amdgcn.s.ttracedata.imm which map
directly to the corresponding instructions s_ttracedata and
s_ttracedata_imm. These are inherently whole-wave operations so any
non-uniform inputs are readfirstlaned.
  • Loading branch information
jayfoad authored Nov 2, 2023
1 parent 3a223f4 commit b90cfe4
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 2 deletions.
7 changes: 7 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1697,6 +1697,13 @@ def int_amdgcn_s_setprio :
DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
IntrHasSideEffects]>;

def int_amdgcn_s_ttracedata :
DefaultAttrsIntrinsic<[], [llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects]>;
def int_amdgcn_s_ttracedata_imm :
DefaultAttrsIntrinsic<[], [llvm_i16_ty],
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;

// This is IntrHasSideEffects so it can be used to read cycle counters.
def int_amdgcn_s_getreg :
ClangBuiltin<"__builtin_amdgcn_s_getreg">,
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3066,6 +3066,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(B, MI, 2);
return;
}
case Intrinsic::amdgcn_s_ttracedata:
constrainOpWithReadfirstlane(B, MI, 1); // M0
return;
case Intrinsic::amdgcn_raw_buffer_load_lds:
case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
applyDefaultMapping(OpdMapper);
Expand Down Expand Up @@ -4670,6 +4673,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
break;
}
case Intrinsic::amdgcn_s_ttracedata: {
// This must be an SGPR, but accept a VGPR.
unsigned Bank =
getRegBankID(MI.getOperand(1).getReg(), MRI, AMDGPU::SGPRRegBankID);
OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
break;
}
case Intrinsic::amdgcn_end_cf: {
unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1503,7 +1503,10 @@ def S_INCPERFLEVEL : SOPP_Pseudo <"s_incperflevel", (ins i32imm:$simm16), "$simm
def S_DECPERFLEVEL : SOPP_Pseudo <"s_decperflevel", (ins i32imm:$simm16), "$simm16",
[(int_amdgcn_s_decperflevel timm:$simm16)]> {
}
def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins)> {

let Uses = [M0] in
def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins), "",
[(int_amdgcn_s_ttracedata M0)]> {
let simm16 = 0;
let fixed_imm = 1;
}
Expand Down Expand Up @@ -1547,8 +1550,10 @@ let SubtargetPredicate = isGFX10Plus in {
[(SIdenorm_mode (i32 timm:$simm16))]>;
}

let hasSideEffects = 1 in
def S_TTRACEDATA_IMM :
SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16">;
SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16",
[(int_amdgcn_s_ttracedata_imm timm:$simm16)]>;
} // End SubtargetPredicate = isGFX10Plus

let SubtargetPredicate = isGFX11Plus in {
Expand Down
53 changes: 53 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.ttracedata.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s

declare void @llvm.amdgcn.s.ttracedata(i32)
declare void @llvm.amdgcn.s.ttracedata.imm(i16)

define amdgpu_cs void @ttracedata_c() {
; GFX11-LABEL: ttracedata_c:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_mov_b32 m0, 0xf4240
; GFX11-NEXT: s_ttracedata
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.s.ttracedata(i32 1000000)
ret void
}

define amdgpu_cs void @ttracedata_s(i32 inreg %val) {
; GFX11-LABEL: ttracedata_s:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_mov_b32 m0, s0
; GFX11-NEXT: s_ttracedata
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.s.ttracedata(i32 %val)
ret void
}

define amdgpu_cs void @ttracedata_v(i32 %val) {
; GFX11-SDAG-LABEL: ttracedata_v:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: s_mov_b32 m0, s0
; GFX11-SDAG-NEXT: s_ttracedata
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-LABEL: ttracedata_v:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: v_readfirstlane_b32 m0, v0
; GFX11-GISEL-NEXT: s_ttracedata
; GFX11-GISEL-NEXT: s_endpgm
call void @llvm.amdgcn.s.ttracedata(i32 %val)
ret void
}

define amdgpu_cs void @ttracedata_imm() {
; GFX11-LABEL: ttracedata_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_ttracedata_imm 0x3e8
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.s.ttracedata.imm(i16 1000)
ret void
}

0 comments on commit b90cfe4

Please sign in to comment.