Skip to content

Commit

Permalink
[AMDGPU] Improve waterfall uniform detection
Browse files Browse the repository at this point in the history
Detect uniform waterfall readfirstlane intrinsics during instruction
selection. This gives better access to the divergence analysis which can be used
more safely than attempting to identify uniformity later.

Change-Id: I919c218f0ca05985f1cfe42b79ee42ab7fd319df
  • Loading branch information
dstutt committed Sep 18, 2020
1 parent 04a8829 commit d63df0c
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 6 deletions.
9 changes: 8 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7366,6 +7366,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
DAG.setNodeMemRefs(NewNode, {MemRef});
return SDValue(NewNode, 0);
}
case Intrinsic::amdgcn_waterfall_readfirstlane: {
if (!Op->getOperand(3)->isDivergent()) {
// If waterfall_readfirstlane is uniform, it can be removed
DAG.ReplaceAllUsesWith(Op.getNode(), Op->getOperand(3).getNode());
return SDValue();
}
return Op;
}
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrID))
Expand Down Expand Up @@ -7690,7 +7698,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
case Intrinsic::amdgcn_end_cf:
return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other,
Op->getOperand(2), Chain), 0);

default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
Expand Down
15 changes: 10 additions & 5 deletions llvm/lib/Target/AMDGPU/SIInsertWaterfall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,9 @@ bool SIInsertWaterfall::removeRedundantWaterfall(WaterfallWorkitem &Item) {
// If all the readfirstlane intrinsics are actually for uniform values and
// the token used in the begin/end isn't used in anything else the waterfall
// can be removed.
// Alternatively, prior passes may have removed the readfirstlane intrinsics
// altogether, in this case the begin/end intrinsics are now redundant and can
// also be removed.
// The readfirstlane intrinsics are replaced with the uniform source value,
// the loop is removed and the defs in the end intrinsics are just replaced with
// the input operands
Expand Down Expand Up @@ -399,6 +402,8 @@ bool SIInsertWaterfall::removeRedundantWaterfall(WaterfallWorkitem &Item) {
}
}

// Note: this test also returns true when there are NO RFL intrinsics, the
// case where a prior pass has removed all of them and the loop is now redundant
if (Removed == Item.RFLList.size()) {
// Removed all of the RFLs
// We can remove the waterfall loop entirely
Expand Down Expand Up @@ -468,16 +473,16 @@ bool SIInsertWaterfall::processWaterfall(MachineBasicBlock &MBB) {
"Linked WATERFALL pseudo ops found in different BBs");
});

assert(Item.RFLList.size() &&
(Item.EndList.size() || Item.LastUseList.size()) &&
"SI_WATERFALL* pseudo instruction group must have at least 1 of "
"each type");

if (removeRedundantWaterfall(Item)) {
Changed = true;
continue;
}

assert(Item.RFLList.size() &&
(Item.EndList.size() || Item.LastUseList.size()) &&
"SI_WATERFALL* pseudo instruction group must have at least 1 of "
"each type");

// Insert the waterfall loop code around the identified region of
// instructions
// Loop starts at the SI_WATERFALL_BEGIN
Expand Down

0 comments on commit d63df0c

Please sign in to comment.