Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Correctly insert s_nops for dst forwarding hazard #100276

Merged
merged 17 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 104 additions & 17 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -875,13 +875,55 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
return DataIdx >= 0 &&
TRI->regsOverlap(MI.getOperand(DataIdx).getReg(), Reg);
};

int WaitStatesNeededForDef =
VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);

return WaitStatesNeeded;
}

/// Dest sel forwarding issue occurs if additional logic is needed to swizzle /
/// pack the computed value into correct bit position of the dest register. This
/// occurs if we have SDWA with dst_sel != DWORD or if we have op_sel with
/// dst_sel that is not aligned to the register. This function analayzes the \p
/// MI and \returns an operand with dst setl forwarding issue, or nullptr if
jrbyrnes marked this conversation as resolved.
Show resolved Hide resolved
/// none exists.
static const MachineOperand *
getDstSelForwardingOperand(const MachineInstr &MI, const GCNSubtarget &ST) {
if (!SIInstrInfo::isVALU(MI))
jrbyrnes marked this conversation as resolved.
Show resolved Hide resolved
return nullptr;

const SIInstrInfo *TII = ST.getInstrInfo();

unsigned Opcode = MI.getOpcode();

// There are three different types of instructions
// which produce forwarded dest: 1. SDWA with dst_sel != DWORD, 2. VOP3
// which write hi bits (e.g. op_sel[3] == 1), and 3. CVR_SR_FP8_F32 and
// CVT_SR_BF8_F32 with op_sel[3:2]
jrbyrnes marked this conversation as resolved.
Show resolved Hide resolved
// != 0
if (SIInstrInfo::isSDWA(MI)) {
// Type 1: SDWA with dst_sel != DWORD
if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
return nullptr;
} else {
// Type 2 && Type 3: (VOP3 which write the hi bits) || (CVT_SR_FP8_F32 and
// CVT_SR_BF8_F32 with op_sel[3:2] != 0)
if (!AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel) ||
!(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() &
SISrcMods::DST_OP_SEL ||
((Opcode == AMDGPU::V_CVT_SR_BF8_F32_e64 ||
jrbyrnes marked this conversation as resolved.
Show resolved Hide resolved
Opcode == AMDGPU::V_CVT_SR_FP8_F32_e64) &&
(TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() &
SISrcMods::OP_SEL_0))))
return nullptr;
}

return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
}

int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
int WaitStatesNeeded = 0;

Expand Down Expand Up @@ -913,30 +955,44 @@ int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
const int Shift16DefWaitstates = 1;

auto IsShift16BitDefFn = [this, VALU](const MachineInstr &MI) {
if (!SIInstrInfo::isVALU(MI))
return false;
const SIInstrInfo *TII = ST.getInstrInfo();
if (SIInstrInfo::isSDWA(MI)) {
if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
return false;
} else {
if (!AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::op_sel) ||
!(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)
->getImm() &
SISrcMods::DST_OP_SEL))
return false;
}
const SIRegisterInfo *TRI = ST.getRegisterInfo();
if (auto *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
SmallVector<const MachineOperand *, 4> Dsts;
jrbyrnes marked this conversation as resolved.
Show resolved Hide resolved
const MachineOperand *ForwardedDst = getDstSelForwardingOperand(MI, ST);
if (ForwardedDst) {
Dsts.push_back(ForwardedDst);
} else if (MI.isInlineAsm()) {
jrbyrnes marked this conversation as resolved.
Show resolved Hide resolved
// Assume inline asm has dst forwarding hazard
for (auto &Op :
drop_begin(MI.operands(), InlineAsm::MIOp_FirstOperand)) {
if (Op.isReg() && Op.isDef()) {
Dsts.push_back(&Op);
}
}
}

for (auto Dst : Dsts) {
Register Def = Dst->getReg();

for (const MachineOperand &Use : VALU->explicit_uses()) {
// We must consider implicit reads of the VALU. SDWA with dst_sel and
// UNUSED_PRESERVE will implicitly read the result from forwarded dest,
// and we must account for that hazard.
for (const MachineOperand &Use : VALU->all_uses()) {
if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg()))
jrbyrnes marked this conversation as resolved.
Show resolved Hide resolved
return true;
}
}

if (!TII->isVOP3(*VALU))
return false;

// We also must account for WAW hazards. In particular, WAW hazards with
// dest op_sel and dest preserve semantics will read the forwarded dest
// for parity check for ECC. Without accounting for this hazard, the ECC
// will be wrong.
if (auto *ThisDst = TII->getNamedOperand(*VALU, AMDGPU::OpName::vdst)) {
return TRI->regsOverlap(Def, ThisDst->getReg());
}
}
return false;
};

Expand Down Expand Up @@ -1029,7 +1085,7 @@ int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
// problematic thus far.

// see checkVALUHazards()
if (!ST.has12DWordStoreHazard())
if (!ST.has12DWordStoreHazard() && !ST.hasDstSelForwardingHazard())
return 0;

const MachineRegisterInfo &MRI = MF.getRegInfo();
Expand All @@ -1040,6 +1096,37 @@ int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
if (Op.isReg() && Op.isDef()) {
WaitStatesNeeded =
std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));

if (!TRI.isVectorRegister(MRI, Op.getReg()))
continue;

if (ST.hasDstSelForwardingHazard()) {
jrbyrnes marked this conversation as resolved.
Show resolved Hide resolved
const int Shift16DefWaitstates = 1;

auto IsShift16BitDefFn = [this, &IA](const MachineInstr &MI) {
const MachineOperand *Dst = getDstSelForwardingOperand(MI, ST);
// Assume inline asm reads the dst
if (Dst)
return true;

if (MI.isInlineAsm()) {
jrbyrnes marked this conversation as resolved.
Show resolved Hide resolved
// Assume other inline asm has dst forwarding hazard
for (auto &Op :
drop_begin(MI.operands(), InlineAsm::MIOp_FirstOperand)) {
if (Op.isReg() && IA->modifiesRegister(Op.getReg(), &TRI)) {
return true;
}
}
}

return false;
};

int WaitStatesNeededForDef =
Shift16DefWaitstates -
getWaitStatesSince(IsShift16BitDefFn, Shift16DefWaitstates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
}
}
}

Expand Down
Loading
Loading