-
Notifications
You must be signed in to change notification settings - Fork 12.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Properly check op_sel in GCNDPPCombine #79122
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Mirko Brkušanin (mbrkusanin) ChangesFull diff: https://github.com/llvm/llvm-project/pull/79122.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index a75082268c7739f..94d28dc0a2c74a1 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -274,8 +274,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
break;
}
- if (auto *Mod0 = TII->getNamedOperand(OrigMI,
- AMDGPU::OpName::src0_modifiers)) {
+ auto *Mod0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0_modifiers);
+ if (Mod0) {
assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
AMDGPU::OpName::src0_modifiers));
assert(HasVOP3DPP ||
@@ -298,8 +298,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
DPPInst->getOperand(NumOperands).setIsKill(false);
++NumOperands;
- if (auto *Mod1 = TII->getNamedOperand(OrigMI,
- AMDGPU::OpName::src1_modifiers)) {
+ auto *Mod1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1_modifiers);
+ if (Mod1) {
assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
AMDGPU::OpName::src1_modifiers));
assert(HasVOP3DPP ||
@@ -330,8 +330,9 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
DPPInst.add(*Src1);
++NumOperands;
}
- if (auto *Mod2 =
- TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers)) {
+
+ auto *Mod2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers);
+ if (Mod2) {
assert(NumOperands ==
AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src2_modifiers));
assert(HasVOP3DPP ||
@@ -350,6 +351,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
DPPInst.add(*Src2);
++NumOperands;
}
+
if (HasVOP3DPP) {
auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp);
if (ClampOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::clamp)) {
@@ -368,7 +370,13 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
// all 1.
if (auto *OpSelOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel)) {
- auto OpSel = OpSelOpr->getImm();
+ int64_t OpSel = 0;
+ OpSel |= (Mod0 ? (!!(Mod0->getImm() & SISrcMods::OP_SEL_0) << 0) : 0);
+ OpSel |= (Mod1 ? (!!(Mod1->getImm() & SISrcMods::OP_SEL_0) << 1) : 0);
+ OpSel |= (Mod2 ? (!!(Mod2->getImm() & SISrcMods::OP_SEL_0) << 2) : 0);
+ if (Mod0 && TII->isVOP3(OrigMI) && !TII->isVOP3P(OrigMI))
+ OpSel |= !!(Mod0->getImm() & SISrcMods::DST_OP_SEL) << 3;
+
if (OpSel != 0) {
LLVM_DEBUG(dbgs() << " failed: op_sel must be zero\n");
Fail = true;
@@ -379,7 +387,11 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
}
if (auto *OpSelHiOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel_hi)) {
- auto OpSelHi = OpSelHiOpr->getImm();
+ int64_t OpSelHi = 0;
+ OpSelHi |= (Mod0 ? (!!(Mod0->getImm() & SISrcMods::OP_SEL_1) << 0) : 0);
+ OpSelHi |= (Mod1 ? (!!(Mod1->getImm() & SISrcMods::OP_SEL_1) << 1) : 0);
+ OpSelHi |= (Mod2 ? (!!(Mod2->getImm() & SISrcMods::OP_SEL_1) << 2) : 0);
+
// Only vop3p has op_sel_hi, and all vop3p have 3 operands, so check
// the bitmask for 3 op_sel_hi bits set
assert(Src2 && "Expected vop3p with 3 operands");
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index 89ea3ed12aea3b2..3df087256ad6e76 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -83,7 +83,8 @@ body: |
# Regression test for src_modifiers on base u16 opcode
# GCN-label: name: vop3_u16
# GCN: %5:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec
-# GCN: %7:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 4, %5, 8, %5, 0, 0, 1, 15, 15, 1, implicit $exec
+# GCN: %7:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 1, %5, 2, %5, 0, 0, 1, 15, 15, 1, implicit $exec
+# GCN: %9:vgpr_32 = V_ADD_NC_U16_e64 4, %8, 8, %7, 0, 0, implicit $exec
name: vop3_u16
tracksRegLiveness: true
body: |
@@ -97,7 +98,9 @@ body: |
%4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%5:vgpr_32 = V_ADD_NC_U16_e64 0, %4, 0, %3, 0, 0, implicit $exec
%6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec
- %7:vgpr_32 = V_ADD_NC_U16_e64 4, %6, 8, %5, 0, 0, implicit $exec
+ %7:vgpr_32 = V_ADD_NC_U16_e64 1, %6, 2, %5, 0, 0, implicit $exec
+ %8:vgpr_32 = V_MOV_B32_dpp %3, %7, 1, 15, 15, 1, implicit $exec
+ %9:vgpr_32 = V_ADD_NC_U16_e64 4, %8, 8, %7, 0, 0, implicit $exec
...
name: vop3p
@@ -116,7 +119,7 @@ body: |
; GCN: [[V_DOT2_F32_F16_:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp]], 0, [[COPY]], 0, [[COPY2]], 0, 5, 0, 0, 0, implicit $mode, implicit $exec
; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN: [[V_DOT2_F32_F16_1:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 4, 0, 0, implicit $mode, implicit $exec
- ; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 13, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec
+ ; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 9, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN: [[V_FMA_MIX_F32_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIX_F32_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN: [[V_FMA_MIXLO_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXLO_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 0, [[COPY2]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN: [[V_FMA_MIXHI_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXHI_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, [[COPY]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
@@ -134,7 +137,7 @@ body: |
%7:vgpr_32 = V_DOT2_F32_F16 0, %6, 0, %0, 0, %2, 0, 0, 4, 0, 0, implicit $mode, implicit $exec
%8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- %9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 13, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec
+ %9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 9, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec
%10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%11:vgpr_32 = V_FMA_MIX_F32 8, %10, 8, %0, 8, %2, 1, 0, 7, implicit $mode, implicit $exec
@@ -871,3 +874,71 @@ body: |
%5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
%6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
...
+
+# Check op_sel is all 0s when combining
+# GCN-LABEL: name: opsel_vop3
+# GCN: %4:vgpr_32 = V_ADD_I16_e64_dpp %2, 0, %0, 0, %1, 0, 0, 1, 15, 15, 1, implicit $exec
+# GCN: %6:vgpr_32 = V_ADD_I16_e64 4, %5, 0, %1, 0, 0, implicit $exec
+# GCN: %8:vgpr_32 = V_ADD_I16_e64 0, %7, 4, %1, 0, 0, implicit $exec
+# GCN: %10:vgpr_32 = V_ADD_I16_e64 4, %9, 4, %1, 0, 0, implicit $exec
+name: opsel_vop3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = IMPLICIT_DEF
+
+ ; Combine for op_sel:[0,0,0]
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_ADD_I16_e64 0, %3, 0, %1, 0, 0, implicit $exec
+
+ ; Do not combine for op_sel:[1,0,0]
+ %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_ADD_I16_e64 4, %5, 0, %1, 0, 0, implicit $exec
+
+ ; Do not combine for op_sel:[0,1,0]
+ %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %8:vgpr_32 = V_ADD_I16_e64 0, %7, 4, %1, 0, 0, implicit $exec
+
+ ; Do not combine for op_sel:[1,1,0]
+ %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %10:vgpr_32 = V_ADD_I16_e64 4, %9, 4, %1, 0, 0, implicit $exec
+...
+
+# Check op_sel is all 0s and op_sel_hi is all 1s when combining
+# GCN-LABEL: name: opsel_vop3p
+# GCN: %5:vgpr_32 = V_FMA_MIX_F32 0, %4, 0, %1, 0, %2, 0, 0, 0, implicit $mode, implicit $exec
+# GCN: %7:vgpr_32 = V_FMA_MIX_F32 4, %6, 4, %1, 4, %2, 0, 0, 0, implicit $mode, implicit $exec
+# GCN: %9:vgpr_32 = V_FMA_MIX_F32_dpp %3, 8, %0, 8, %1, 8, %2, 0, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %11:vgpr_32 = V_FMA_MIX_F32 12, %10, 12, %1, 12, %2, 0, 0, 0, implicit $mode, implicit $exec
+
+name: opsel_vop3p
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = COPY $vgpr2
+ %3:vgpr_32 = IMPLICIT_DEF
+
+ ; Do not combine for op_sel:[0,0,0] op_sel_hi:[0,0,0]
+ %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_FMA_MIX_F32 0, %4, 0, %1, 0, %2, 0, 0, 0, implicit $mode, implicit $exec
+
+ ; Do not combine for op_sel:[1,1,1] op_sel_hi:[0,0,0]
+ %6:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
+ %7:vgpr_32 = V_FMA_MIX_F32 4, %6, 4, %1, 4, %2, 0, 0, 0, implicit $mode, implicit $exec
+
+ ; Combine for op_sel:[0,0,0] op_sel_hi:[1,1,1]
+ %8:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
+ %9:vgpr_32 = V_FMA_MIX_F32 8, %8, 8, %1, 8, %2, 0, 0, 0, implicit $mode, implicit $exec
+
+ ; Do not combine for op_sel:[1,1,1] op_sel_hi:[1,1,1]
+ %10:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_FMA_MIX_F32 12, %10, 12, %1, 12, %2, 0, 0, 0, implicit $mode, implicit $exec
+...
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall looks great, thanks for fixing it so fast. I think it needs a test to reject formation when DST_OP_SEL is 1, but otherwise LGTM.
I verified the change, we will have tests in these PR #78414 |
4f120cb
to
48935fd
Compare
Rebased and added test that has only DST_OP_SEL set to 1. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
No description provided.