diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index fc0f1d91f80059..3ad0a66ace92a7 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7366,6 +7366,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, DAG.setNodeMemRefs(NewNode, {MemRef}); return SDValue(NewNode, 0); } + case Intrinsic::amdgcn_waterfall_readfirstlane: { + if (!Op->getOperand(3)->isDivergent()) { + // If waterfall_readfirstlane is uniform, it can be removed + DAG.ReplaceAllUsesWith(Op.getNode(), Op->getOperand(3).getNode()); + return SDValue(); + } + return Op; + } default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrID)) @@ -7690,7 +7698,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, case Intrinsic::amdgcn_end_cf: return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other, Op->getOperand(2), Chain), 0); - default: { if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaterfall.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaterfall.cpp index 58ec0c292fecbc..072d52f2faefeb 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaterfall.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaterfall.cpp @@ -370,6 +370,9 @@ bool SIInsertWaterfall::removeRedundantWaterfall(WaterfallWorkitem &Item) { // If all the readfirstlane intrinsics are actually for uniform values and // the token used in the begin/end isn't used in anything else the waterfall // can be removed. + // Alternatively, prior passes may have removed the readfirstlane intrinsics + // altogether, in this case the begin/end intrinsics are now redundant and can + // also be removed. // The readfirstlane intrinsics are replaced with the uniform source value, // the loop is removed and the defs in the end intrinsics are just replaced with // the input operands @@ -399,6 +402,8 @@ bool SIInsertWaterfall::removeRedundantWaterfall(WaterfallWorkitem &Item) { } } + // Note: this test also returns true when there are NO RFL intrinsics, the + // case where a prior pass has removed all of them and the loop is now redundant if (Removed == Item.RFLList.size()) { // Removed all of the RFLs // We can remove the waterfall loop entirely @@ -468,16 +473,16 @@ bool SIInsertWaterfall::processWaterfall(MachineBasicBlock &MBB) { "Linked WATERFALL pseudo ops found in different BBs"); }); - assert(Item.RFLList.size() && - (Item.EndList.size() || Item.LastUseList.size()) && - "SI_WATERFALL* pseudo instruction group must have at least 1 of " - "each type"); - if (removeRedundantWaterfall(Item)) { Changed = true; continue; } + assert(Item.RFLList.size() && + (Item.EndList.size() || Item.LastUseList.size()) && + "SI_WATERFALL* pseudo instruction group must have at least 1 of " + "each type"); + // Insert the waterfall loop code around the identified region of // instructions // Loop starts at the SI_WATERFALL_BEGIN