llvm · mariusz-sikora-at-amd · Jan 24, 2024 · Jan 15, 2024 · Jan 17, 2024 · Jan 18, 2024
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1495,14 +1495,16 @@ def FeatureISAVersion12 : FeatureSet<
    FeatureFlatAtomicFaddF32Inst,
    FeatureImageInsts,
    FeatureExtendedImageInsts,
+   FeatureFP8Insts,
    FeaturePackedTID,
    FeatureVcmpxPermlaneHazard,
    FeatureSALUFloatInsts,
    FeaturePseudoScalarTrans,
    FeatureHasRestrictedSOffset,
    FeatureVGPRSingleUseHintInsts,
    FeatureMADIntraFwdBug,
-   FeatureScalarDwordx3Loads]>;
+   FeatureScalarDwordx3Loads,
+   FeatureDPPSrc1SGPR]>;
 
 //===----------------------------------------------------------------------===//
 

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -3500,6 +3500,9 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
     return !isInlineConstant(Inst, OpIdx);
   } else if (MO.isReg()) {
     auto Reg = MO.getReg();
+    if (!Reg) {
+      return false;
+    }
     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
     auto PReg = mc2PseudoReg(Reg);
     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
@@ -8273,6 +8276,16 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
   }
 
+  if (isVOP1Cvt_F32_Fp8_Bf8_e64(Opc) &&
+      Opc != AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 &&
+      Opc != AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I++]);
+    Op.addRegOrImmWithFPInputModsOperands(Inst, 1); // src0
+    // Add dummy src1
+    Inst.addOperand(MCOperand::createImm(0));
+    Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
 OpSel = Inst.getOperand(OpSelIdx).getImm(); 
 OpSel = Inst.getOperand(OpSelIdx).getImm(); 
+  }
+
   for (unsigned E = Operands.size(); I != E; ++I) {
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
@@ -8321,12 +8334,20 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
 
   if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
-      Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
+      Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
+      Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 ||
+      Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) {
     Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
     Inst.addOperand(Inst.getOperand(0));
   }
 
-  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) {
+  // Adding vdst_in operand is already covered for these DPP instructions in
+  // cvtVOP3DPP.
+  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
+      !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
+        Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
+        Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
+        Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) {
     assert(!IsPacked);
     Inst.addOperand(Inst.getOperand(0));
   }
@@ -8765,6 +8786,11 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
   int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
   int Src2ModIdx =
       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
+  int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
+  bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
+                        Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 ||
+                        Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
+                        Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12;
   bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
                Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
 
@@ -8788,6 +8814,20 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
       }
     }
 
+    if (VdstInIdx != -1) {
+      int NumOperands = Inst.getNumOperands();
+      if (VdstInIdx == NumOperands)
+        Inst.addOperand(Inst.getOperand(0));
+    }
+
+    if (IsVOP3CvtSrDpp) {
+      int NumOperands = Inst.getNumOperands();
+      if (Src2ModIdx == NumOperands) {
+        Inst.addOperand(MCOperand::createImm(0));
+        Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
+      }
+    }
+
     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
                                             MCOI::TIED_TO);
     if (TiedTo != -1) {
@@ -8801,6 +8841,13 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
       Fi = Op.getImm();
     } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
+      if (isVOP1Cvt_F32_Fp8_Bf8_e64(Inst.getOpcode()) &&
+          Inst.getOpcode() != AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 &&
+          Inst.getOpcode() != AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12) {
+        // Add dummy src1
+        Inst.addOperand(MCOperand::createImm(0));
+        Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
+      }
     } else if (Op.isReg()) {
       Op.addRegOperands(Inst, 1);
     } else if (Op.isImm() &&
@@ -8847,6 +8894,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
   OptionalImmIndexMap OptionalIdx;
 
   unsigned I = 1;
+  const unsigned Opc = Inst.getOpcode();
   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
@@ -8874,6 +8922,14 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
         Op.addImmOperands(Inst, 1);
       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
         Op.addRegWithFPInputModsOperands(Inst, 2);
+        if (Opc == AMDGPU::V_CVT_F32_BF8_dpp_gfx12 ||
+            Opc == AMDGPU::V_CVT_F32_FP8_dpp_gfx12 ||
+            Opc == AMDGPU::V_CVT_F32_BF8_dpp8_gfx12 ||
+            Opc == AMDGPU::V_CVT_F32_FP8_dpp8_gfx12) {
+          // Add dummy src1
+          Inst.addOperand(MCOperand::createImm(0));
+          Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
+        }
       } else if (Op.isDppFI()) {
         Fi = Op.getImm();
       } else if (Op.isReg()) {
@@ -8884,6 +8940,14 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
     } else {
       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
         Op.addRegWithFPInputModsOperands(Inst, 2);
+        if (Opc == AMDGPU::V_CVT_F32_BF8_dpp_gfx12 ||
+            Opc == AMDGPU::V_CVT_F32_FP8_dpp_gfx12 ||
+            Opc == AMDGPU::V_CVT_F32_BF8_dpp8_gfx12 ||
+            Opc == AMDGPU::V_CVT_F32_FP8_dpp8_gfx12) {
+          // Add dummy src1
+          Inst.addOperand(MCOperand::createImm(0));
+          Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI())));
+        }
       } else if (Op.isReg()) {
         Op.addRegOperands(Inst, 1);
       } else if (Op.isDPPCtrl()) {

diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -522,6 +522,15 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
           convertVOPCDPPInst(MI); // Special VOP3 case
         } else {
           assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);
+
+          if (AMDGPU::isVOP1Cvt_F32_Fp8_Bf8_e64(MI.getOpcode())) {
+            // Add omod and clamp modifiers.
+            insertNamedMCOperand(MI, MCOperand::createImm(0),
+                                 AMDGPU::OpName::omod);
+            insertNamedMCOperand(MI, MCOperand::createImm(0),
+                                 AMDGPU::OpName::clamp);
+          }
+
           convertVOP3DPPInst(MI); // Regular VOP3 case
         }
       };
@@ -691,8 +700,15 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
 
     Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
                         Address, CS);
-    if (Res)
+    if (Res) {
+      if (AMDGPU::isVOP1Cvt_F32_Fp8_Bf8_e64(MI.getOpcode())) {
+        // Add omod and clamp modifiers.
+        insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
+        insertNamedMCOperand(MI, MCOperand::createImm(0),
+                             AMDGPU::OpName::clamp);
+      }
       break;
+    }
 
     Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
                         Address, CS);
@@ -708,6 +724,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                          AMDGPU::OpName::src2_modifiers);
   }
 
+  if (Res && (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
+              MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp)) {
+    // Insert dummy unused src2_modifiers.
+    insertNamedMCOperand(MI, MCOperand::createImm(0),
+                         AMDGPU::OpName::src2_modifiers);
+  }
+
   if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
       !AMDGPU::hasGDS(STI)) {
     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
@@ -938,6 +961,13 @@ void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
 // first add optional MI operands to check FI
 DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
   unsigned Opc = MI.getOpcode();
+
+  if (AMDGPU::isVOP1Cvt_F32_Fp8_Bf8_e64(Opc)) {
+    // Add omod and clamp modifiers.
+    insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
+    insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
+  }
+
   if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
     convertVOP3PDPPInst(MI);
   } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
@@ -947,6 +977,15 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
     if (isMacDPP(MI))
       convertMacDPPInst(MI);
 
+    int VDstInIdx =
+        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
+    if (VDstInIdx != -1)
+      insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
+
+    if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
+        MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12)
+      insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
+
     unsigned DescNumOps = MCII->get(Opc).getNumOperands();
     if (MI.getNumOperands() < DescNumOps &&
         AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
@@ -973,6 +1012,15 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
   if (isMacDPP(MI))
     convertMacDPPInst(MI);
 
+  int VDstInIdx =
+      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
+  if (VDstInIdx != -1)
+    insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
+
+  if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
+      MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12)
+    insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
+
   unsigned Opc = MI.getOpcode();
   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
   if (MI.getNumOperands() < DescNumOps &&

diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1300,7 +1300,9 @@ void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
                                    const MCSubtargetInfo &STI,
                                    raw_ostream &O) {
   unsigned Opc = MI->getOpcode();
-  if (isPermlane16(Opc)) {
+  if (isPermlane16(Opc) || (isVOP1Cvt_F32_Fp8_Bf8_e64(Opc) &&
+                            Opc != AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 &&
+                            Opc != AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12)) {
     auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
     auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
     unsigned FI = !!(MI->getOperand(FIN).getImm() & SISrcMods::OP_SEL_0);

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -503,6 +503,17 @@ bool isPermlane16(unsigned Opc) {
          Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
 }
 
+bool isVOP1Cvt_F32_Fp8_Bf8_e64(unsigned Opc) {
+  return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
+         Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
+         Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
+         Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
+         Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
+         Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
+         Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 ||
+         Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12;
+}
+
 bool isGenericAtomic(unsigned Opc) {
   return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
          Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -542,6 +542,9 @@ bool isPermlane16(unsigned Opc);
 LLVM_READNONE
 bool isGenericAtomic(unsigned Opc);
 
+LLVM_READNONE
+bool isVOP1Cvt_F32_Fp8_Bf8_e64(unsigned Opc);
+
 namespace VOPD {
 
 enum Component : unsigned {