Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU][GFX12] VOP encoding and codegen - add support for v_cvt fp8/… #78414

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1495,14 +1495,16 @@ def FeatureISAVersion12 : FeatureSet<
FeatureFlatAtomicFaddF32Inst,
FeatureImageInsts,
FeatureExtendedImageInsts,
FeatureFP8Insts,
mariusz-sikora-at-amd marked this conversation as resolved.
Show resolved Hide resolved
FeaturePackedTID,
FeatureVcmpxPermlaneHazard,
FeatureSALUFloatInsts,
FeaturePseudoScalarTrans,
FeatureHasRestrictedSOffset,
FeatureVGPRSingleUseHintInsts,
FeatureMADIntraFwdBug,
FeatureScalarDwordx3Loads]>;
FeatureScalarDwordx3Loads,
FeatureDPPSrc1SGPR]>;

//===----------------------------------------------------------------------===//

Expand Down
68 changes: 66 additions & 2 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3500,6 +3500,9 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
return !isInlineConstant(Inst, OpIdx);
} else if (MO.isReg()) {
auto Reg = MO.getReg();
if (!Reg) {
return false;
}
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
auto PReg = mc2PseudoReg(Reg);
return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
Expand Down Expand Up @@ -8273,6 +8276,16 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}

if (isVOP1Cvt_F32_Fp8_Bf8_e64(Opc) &&
Opc != AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 &&
Opc != AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I++]);
Op.addRegOrImmWithFPInputModsOperands(Inst, 1); // src0
// Add dummy src1
Inst.addOperand(MCOperand::createImm(0));
Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably no need to call getMCReg() for NoRegister?

Are these dummy operands really necessary? By having them, we just seem to give ourselves more work handling them with custom code. NoRegister register operands look a bit weird.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think we can remove getMCReg(). I will change in other places also.

We will need these dummy operands when doing cvtVOP3P here:

OpSel = Inst.getOperand(OpSelIdx).getImm();
. Without them we will crash on out-of-bound access.

}

for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Expand Down Expand Up @@ -8321,12 +8334,20 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;

if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 ||
Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) {
Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
Inst.addOperand(Inst.getOperand(0));
}

if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) {
// Adding vdst_in operand is already covered for these DPP instructions in
// cvtVOP3DPP.
if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
!(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) {
assert(!IsPacked);
Inst.addOperand(Inst.getOperand(0));
}
Expand Down Expand Up @@ -8765,6 +8786,11 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
int Src2ModIdx =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 ||
Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12;
mariusz-sikora-at-amd marked this conversation as resolved.
Show resolved Hide resolved
bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;

Expand All @@ -8788,6 +8814,20 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
}
}

if (VdstInIdx != -1) {
int NumOperands = Inst.getNumOperands();
if (VdstInIdx == NumOperands)
mariusz-sikora-at-amd marked this conversation as resolved.
Show resolved Hide resolved
Inst.addOperand(Inst.getOperand(0));
}

if (IsVOP3CvtSrDpp) {
int NumOperands = Inst.getNumOperands();
if (Src2ModIdx == NumOperands) {
Inst.addOperand(MCOperand::createImm(0));
Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
}
}

auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
MCOI::TIED_TO);
if (TiedTo != -1) {
Expand All @@ -8801,6 +8841,13 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
Fi = Op.getImm();
} else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
if (isVOP1Cvt_F32_Fp8_Bf8_e64(Inst.getOpcode()) &&
Inst.getOpcode() != AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 &&
Inst.getOpcode() != AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12) {
// Add dummy src1
Inst.addOperand(MCOperand::createImm(0));
Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
}
} else if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
} else if (Op.isImm() &&
Expand Down Expand Up @@ -8847,6 +8894,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
OptionalImmIndexMap OptionalIdx;

unsigned I = 1;
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
Expand Down Expand Up @@ -8874,6 +8922,14 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
Op.addImmOperands(Inst, 1);
} else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegWithFPInputModsOperands(Inst, 2);
if (Opc == AMDGPU::V_CVT_F32_BF8_dpp_gfx12 ||
Opc == AMDGPU::V_CVT_F32_FP8_dpp_gfx12 ||
Opc == AMDGPU::V_CVT_F32_BF8_dpp8_gfx12 ||
Opc == AMDGPU::V_CVT_F32_FP8_dpp8_gfx12) {
// Add dummy src1
Inst.addOperand(MCOperand::createImm(0));
Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
}
} else if (Op.isDppFI()) {
Fi = Op.getImm();
} else if (Op.isReg()) {
Expand All @@ -8884,6 +8940,14 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
} else {
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegWithFPInputModsOperands(Inst, 2);
if (Opc == AMDGPU::V_CVT_F32_BF8_dpp_gfx12 ||
Opc == AMDGPU::V_CVT_F32_FP8_dpp_gfx12 ||
Opc == AMDGPU::V_CVT_F32_BF8_dpp8_gfx12 ||
Opc == AMDGPU::V_CVT_F32_FP8_dpp8_gfx12) {
// Add dummy src1
Inst.addOperand(MCOperand::createImm(0));
Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
}
} else if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
} else if (Op.isDPPCtrl()) {
Expand Down
50 changes: 49 additions & 1 deletion llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,15 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
convertVOPCDPPInst(MI); // Special VOP3 case
} else {
assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);

if (AMDGPU::isVOP1Cvt_F32_Fp8_Bf8_e64(MI.getOpcode())) {
mariusz-sikora-at-amd marked this conversation as resolved.
Show resolved Hide resolved
// Add omod and clamp modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::omod);
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::clamp);
}

convertVOP3DPPInst(MI); // Regular VOP3 case
}
};
Expand Down Expand Up @@ -691,8 +700,15 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,

Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
Address, CS);
if (Res)
if (Res) {
if (AMDGPU::isVOP1Cvt_F32_Fp8_Bf8_e64(MI.getOpcode())) {
// Add omod and clamp modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::clamp);
}
break;
}

Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
Address, CS);
Expand All @@ -708,6 +724,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
AMDGPU::OpName::src2_modifiers);
}

if (Res && (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp)) {
// Insert dummy unused src2_modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::src2_modifiers);
}

if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
!AMDGPU::hasGDS(STI)) {
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
Expand Down Expand Up @@ -938,6 +961,13 @@ void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
// first add optional MI operands to check FI
DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();

if (AMDGPU::isVOP1Cvt_F32_Fp8_Bf8_e64(Opc)) {
// Add omod and clamp modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
}

if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
convertVOP3PDPPInst(MI);
} else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
Expand All @@ -947,6 +977,15 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
if (isMacDPP(MI))
convertMacDPPInst(MI);

int VDstInIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
if (VDstInIdx != -1)
insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);

if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12)
insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);

unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
Expand All @@ -973,6 +1012,15 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
if (isMacDPP(MI))
convertMacDPPInst(MI);

int VDstInIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
if (VDstInIdx != -1)
insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);

if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12)
insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);

unsigned Opc = MI.getOpcode();
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1300,7 +1300,9 @@ void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Opc = MI->getOpcode();
if (isPermlane16(Opc)) {
if (isPermlane16(Opc) || (isVOP1Cvt_F32_Fp8_Bf8_e64(Opc) &&
Opc != AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 &&
Opc != AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12)) {
auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
unsigned FI = !!(MI->getOperand(FIN).getImm() & SISrcMods::OP_SEL_0);
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,17 @@ bool isPermlane16(unsigned Opc) {
Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
}

bool isVOP1Cvt_F32_Fp8_Bf8_e64(unsigned Opc) {
return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 ||
Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12;
}

bool isGenericAtomic(unsigned Opc) {
return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,9 @@ bool isPermlane16(unsigned Opc);
LLVM_READNONE
bool isGenericAtomic(unsigned Opc);

LLVM_READNONE
bool isVOP1Cvt_F32_Fp8_Bf8_e64(unsigned Opc);

namespace VOPD {

enum Component : unsigned {
Expand Down
Loading