Skip to content

Commit

Permalink
[AMDGPU] Fix regclass for operand subregs in SIInsertWaterfall
Browse files Browse the repository at this point in the history
If operand is a register with subregister marking then getRegClass will get us
larger class of full register. We need to check if a subreg is used instead of
full register and pick only that subreg in any later instructions.

Change-Id: I892f51f7668c250d33c811d705ea9a0e28b48478
  • Loading branch information
mbrkusanin committed Jul 21, 2020
1 parent 3dbeb7a commit 44408c8
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
9 changes: 6 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInsertWaterfall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ static void readFirstLaneReg(MachineBasicBlock &MBB, MachineRegisterInfo *MRI,

if (RegSize == 1)
BuildMI(MBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RFLReg)
.addReg(RFLSrcReg, getUndefRegState(RFLSrcOp.isUndef()));
.addReg(RFLSrcReg, getUndefRegState(RFLSrcOp.isUndef()),
RFLSrcOp.getSubReg());
else {
SmallVector<unsigned, 8> TRegs;
for (unsigned i = 0; i < RegSize; ++i) {
Expand Down Expand Up @@ -213,7 +214,8 @@ static unsigned compareIdx(MachineBasicBlock &MBB, MachineRegisterInfo *MRI,
// Iterate over the index in dword chunks and'ing the result with the
// CondReg
unsigned IndexReg = IndexOp.getReg();
auto IndexRC = MRI->getRegClass(IndexReg);
auto IndexRC = RI->getSubRegClass(MRI->getRegClass(IndexOp.getReg()),
IndexOp.getSubReg());
unsigned AndOpc =
IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
const auto *BoolXExecRC = RI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
Expand Down Expand Up @@ -485,7 +487,8 @@ bool SIInsertWaterfall::processWaterfall(MachineBasicBlock &MBB) {
// Loop is ended after the last SI_WATERFALL_END and these instructions are
// removed with the src replacing all dst uses
auto Index = TII->getNamedOperand(*(Item.Begin), AMDGPU::OpName::idx);
auto IndexRC = MRI->getRegClass(Index->getReg());
auto IndexRC = RI->getSubRegClass(MRI->getRegClass(Index->getReg()),
Index->getSubReg());

if (!RI->hasVGPRs(IndexRC)) {
// Waterfall loop index is uniform! Loop can be removed
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.waterfall.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
; GCN-32: v_cmp_eq_u32_e64 [[EXEC:s[0-9]+]], [[VAL1]], [[VAL2]]
; GCN-64: s_and_saveexec_b64 [[EXEC]], [[EXEC]]
; GCN-32: s_and_saveexec_b32 [[EXEC]], [[EXEC]]
; GCN: v_readlane_b32 [[RLVAL:s[0-9]+]], v1, [[VAL1]]
; GCN: v_readlane_b32 [[RLVAL:s[0-9]+]], v{{[0-9]+}}, [[RLVAL]]
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[RLVAL]]
; GCN: v_or_b32_e32 [[ACCUM:v[0-9]+]], [[ACCUM]], [[VVAL]]
; GCN-64: s_xor_b64 exec, exec, [[EXEC]]
Expand All @@ -20,14 +20,14 @@
; GCN-32: s_mov_b32 exec_lo, s{{[0-9]+}}
; VI: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[ACCUM]]
; GFX9_UP: global_store_dword v[{{[0-9]+:[0-9]+}}], [[ACCUM]], off
define amdgpu_ps void @test_waterfall_readlane(i32 addrspace(1)* inreg %out, <2 x i32> addrspace(1)* inreg %in, i32 %tid, i32 %val) #1 {
define amdgpu_ps void @test_waterfall_readlane(i32 addrspace(1)* inreg %out, <2 x i32> addrspace(1)* inreg %in, i32 %tid) #1 {
%gep.in = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid
%args = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in
%value = extractelement <2 x i32> %args, i32 0
%lane = extractelement <2 x i32> %args, i32 1
%wf_token = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 %lane)
%readlane = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %wf_token, i32 %lane)
%readlane1 = call i32 @llvm.amdgcn.readlane(i32 %val, i32 %readlane)
%readlane1 = call i32 @llvm.amdgcn.readlane(i32 %value, i32 %readlane)
%readlane2 = call i32 @llvm.amdgcn.waterfall.end.i32(i32 %wf_token, i32 %readlane1)
; This store instruction should be outside the waterfall loop and the value
; being stored generated incrementally in the loop itself
Expand Down

0 comments on commit 44408c8

Please sign in to comment.