[AMDGPU][True16][MC] VOP3 profile in True16 format #109031

broxigarchen · 2024-09-17T19:07:45Z

Modify VOP3 profile and pesudo, and add encoding info for VOP3 True16 including DPP and DPP8 in true16 and fake16 format.

This patch applies true16/fake16 changes and asm/dasm changes to
V_ADD_NC_U16
V_ADD_NC_I16
V_SUB_NC_U16
V_SUB_NC_I16

github-actions · 2024-09-18T15:12:49Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvmbot · 2024-10-02T17:18:54Z

@llvm/pr-subscribers-mc
@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-amdgpu

Author: Brox Chen (broxigarchen)

Changes

Patch is 447.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/109031.diff

21 Files Affected:

(modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+4-2)
(modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+2-2)
(modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+59-31)
(modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+268-40)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir (+39-1)
(modified) llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir (+16-16)
(modified) llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll (+3-3)
(added) llvm/test/MC/AMDGPU/gfx11_asm_opsel.s (+48)
(modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3.s (+148-112)
(modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s (+120-120)
(modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s (+104-68)
(added) llvm/test/MC/AMDGPU/gfx12_asm_opsel.s (+48)
(modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3.s (+96-60)
(modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s (+188-140)
(modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s (+112-64)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt (+348-96)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt (+320-56)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt (+176-20)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt (+324-60)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt (+432-56)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt (+288-20)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 087ca1f954464d..be42d3595bea61 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2237,8 +2237,9 @@ class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
   string clamp = !if(HasClamp, "$clamp", "");
   string omod = !if(HasOMod, "$omod", "");
 
-  string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod, "");
-
+  string ret = dst#!if(!eq(NumSrcArgs,0),
+                       "",
+                       !if(HasDst,", ", "")#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod);
 }
 
 class getAsmVOP3DPP<string base> {
@@ -2733,6 +2734,7 @@ def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
 def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
 def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
 def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
+def VOP_I32_I32_I32_I16 : VOPProfile <[i32, i32, i32, i16]>;
 def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
 def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
 def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index cdc32149249610..f411ea277bbe23 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1661,8 +1661,8 @@ multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> :
   VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>;
 
 multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> :
-  VOP3Only_Realtriple_t16<GFX11Gen, op, asmName, OpName>,
-  VOP3Only_Realtriple_t16<GFX12Gen, op, asmName, OpName>;
+  VOP3_Realtriple_t16_gfx11<op, asmName, OpName, "", 1>,
+  VOP3_Realtriple_t16_gfx12<op, asmName, OpName, "", 1>;
 
 multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> {
   defm OpName#"_t16": VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 78ca7a2f258cb3..fe37b56bcbf862 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -569,16 +569,10 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
                             getAsmVOP3OpSel<3, HasClamp, HasOMod,
                                             HasSrc0FloatMods, HasSrc1FloatMods,
                                             HasSrc2FloatMods>.ret);
-  let AsmVOP3DPP16 = !subst(", $src2_modifiers", "",
-                            getAsmVOP3DPP16<getAsmVOP3Base<3, 1, HasClamp, 1,
-                                            HasOMod, 0, 1, HasSrc0FloatMods,
-                                            HasSrc1FloatMods,
-                                            HasSrc2FloatMods>.ret>.ret);
-  let AsmVOP3DPP8 = !subst(", $src2_modifiers", "",
-                           getAsmVOP3DPP8<getAsmVOP3Base<3, 1, HasClamp, 1,
-                                          HasOMod, 0, 1, HasSrc0FloatMods,
-                                          HasSrc1FloatMods,
-                                          HasSrc2FloatMods>.ret>.ret);
+  let AsmVOP3Base = !subst(", $src2_modifiers", "",
+     getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
+     HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, 0/*Src1Mods*/,
+     HasModifiers, DstVT>.ret);
 }
 
 class VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> :
@@ -636,8 +630,8 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
   defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
 } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
 
-defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
-defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
+defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
+defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
 
 defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
 defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
@@ -752,6 +746,8 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),
                               (i32 (EXTRACT_SUBREG $src1, sub1)),
                               (i32 (EXTRACT_SUBREG $src2, sub1))), sub1)>;
 
+} // End SubtargetPredicate = isGFX9Plus
+
 // FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
 class OpSelBinOpClampPat<SDPatternOperator node,
                          Instruction inst> : GCNPat<
@@ -760,9 +756,14 @@ class OpSelBinOpClampPat<SDPatternOperator node,
   (inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE, 0)
 >;
 
-def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
-def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
-} // End SubtargetPredicate = isGFX9Plus
+let OtherPredicates = [isGFX9Plus], True16Predicate = NotHasTrue16BitInsts in {
+  def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
+  def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
+} // End OtherPredicates = [isGFX9Plus], True16Predicate = NotHasTrue16BitInsts
+let True16Predicate = UseFakeTrue16Insts in {
+  def : OpSelBinOpClampPat<saddsat, V_ADD_I16_fake16_e64>;
+  def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_fake16_e64>;
+} // End True16Predicate = UseFakeTrue16Insts
 
 multiclass IMAD32_Pats <VOP3_Pseudo inst> {
   def : GCNPat <
@@ -871,21 +872,31 @@ let SubtargetPredicate = isGFX10Plus in {
     def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
   }
 
-  defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
-  defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
-
-  def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
-  def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
-
-  // Undo sub x, c -> add x, -c canonicalization since c is more likely
-  // an inline immediate than -c.
-  def : GCNPat<
-    (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
-    (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
-  >;
+  defm V_ADD_NC_U16 : VOP3Inst_t16 <"v_add_nc_u16", VOP_I16_I16_I16, add>;
+  defm V_SUB_NC_U16 : VOP3Inst_t16 <"v_sub_nc_u16", VOP_I16_I16_I16, sub>;
 
 } // End SubtargetPredicate = isGFX10Plus
 
+let True16Predicate = NotHasTrue16BitInsts, OtherPredicates = [isGFX10Plus] in {
+   def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
+   def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
+   // Undo sub x, c -> add x, -c canonicalization since c is more likely
+   // an inline immediate than -c.
+   def : GCNPat<
+     (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
+     (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
+   >;
+} // End True16Predicate = NotHasTrue16BitInsts, OtherPredicates = [isGFX10Plus]
+
+let True16Predicate = UseFakeTrue16Insts in {
+   def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_fake16_e64>;
+   def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_fake16_e64>;
+   def : GCNPat<
+     (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
+     (V_SUB_NC_U16_fake16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
+   >;
+} // End True16Predicate = UseFakeTrue16Insts
+
 let SubtargetPredicate = isGFX12Plus in {
   let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
     defm V_PERMLANE16_VAR_B32  : VOP3Inst<"v_permlane16_var_b32",  VOP3_PERMLANE_VAR_Profile>;
@@ -1101,9 +1112,26 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
   VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
   VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
 
+multiclass VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string opName,
+                                                 string asmName> {
+  defm opName#"_t16": VOP3_Realtriple_with_name_gfx11_gfx12<op, opName#"_t16", asmName>;
+  defm opName#"_fake16": VOP3_Realtriple_with_name_gfx11_gfx12<op, opName#"_fake16", asmName>;
+}
+
 multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
   VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
 
+multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
+                                     string pseudo_mnemonic = "", bit isSingle = 0> :
+  VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
+  VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
+
+multiclass VOP3_Realtriple_t16_and_f16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
+                                     string pseudo_mnemonic = "", bit isSingle = 0> {
+  defm opName#"_t16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
+  defm opName#"_fake16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
+}
+
 multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
   VOP3be_Real<GFX11Gen, op, opName, asmName>,
   VOP3be_Real<GFX12Gen, op, opName, asmName>;
@@ -1189,8 +1217,8 @@ defm V_DIV_SCALE_F32       : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "
 defm V_DIV_SCALE_F64       : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
 defm V_MAD_U64_U32_gfx11   : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
 defm V_MAD_I64_I32_gfx11   : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">;
-defm V_ADD_NC_U16          : VOP3Only_Realtriple_gfx11_gfx12<0x303>;
-defm V_SUB_NC_U16          : VOP3Only_Realtriple_gfx11_gfx12<0x304>;
+defm V_ADD_NC_U16          : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x303, "v_add_nc_u16">;
+defm V_SUB_NC_U16          : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x304, "v_sub_nc_u16">;
 defm V_MUL_LO_U16          : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x305, "v_mul_lo_u16">;
 defm V_CVT_PK_I16_F32      : VOP3_Realtriple_gfx11_gfx12<0x306>;
 defm V_CVT_PK_U16_F32      : VOP3_Realtriple_gfx11_gfx12<0x307>;
@@ -1198,8 +1226,8 @@ defm V_MAX_U16             : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30
 defm V_MAX_I16             : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30a, "v_max_i16">;
 defm V_MIN_U16             : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30b, "v_min_u16">;
 defm V_MIN_I16             : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30c, "v_min_i16">;
-defm V_ADD_NC_I16          : VOP3_Realtriple_with_name_gfx11_gfx12<0x30d, "V_ADD_I16", "v_add_nc_i16">;
-defm V_SUB_NC_I16          : VOP3_Realtriple_with_name_gfx11_gfx12<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
+defm V_ADD_NC_I16          : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "V_ADD_I16", "v_add_nc_i16">;
+defm V_SUB_NC_I16          : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
 defm V_PACK_B32_F16        : VOP3_Realtriple_gfx11_gfx12<0x311>;
 defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
 defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 05a7d907d237ae..0c21ef29cc3c74 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -111,7 +111,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
 
   bit HasFP8DstByteSel = P.HasFP8DstByteSel;
 
-  let AsmOperands = !if(isVop3OpSel,
+  let AsmOperands = !if(!and(!not(P.IsTrue16), isVop3OpSel),
                         P.AsmVOP3OpSel,
                         !if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64));
 
@@ -178,6 +178,7 @@ class VOP3_Real <VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemoni
   let SubtargetPredicate = ps.SubtargetPredicate;
   let WaveSizePredicate  = ps.WaveSizePredicate;
   let OtherPredicates    = ps.OtherPredicates;
+  let True16Predicate    = ps.True16Predicate;
   let AsmMatchConverter  = ps.AsmMatchConverter;
   let AsmVariantName     = ps.AsmVariantName;
   let Constraints        = ps.Constraints;
@@ -242,6 +243,41 @@ class VOP3a<VOPProfile P> : Enc64 {
   let Inst{63}    = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
 }
 
+// To avoid having different version of every type of operand depending on if
+// they are part of a True16 instruction or not, the operand encoding should be
+// the same for SGPR, imm, and VGPR_32 whether the instruction is True16 or not.
+class VOP3a_t16<VOPProfile P> : Enc64 {
+  bits<11> vdst;
+  bits<4> src0_modifiers;
+  bits<11> src0;
+  bits<3> src1_modifiers;
+  bits<11> src1;
+  bits<3> src2_modifiers;
+  bits<11> src2;
+  bits<1> clamp;
+  bits<2> omod;
+
+  let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
+  let Inst{8}     = !if(P.HasSrc0Mods, src0_modifiers{1}, 0);
+  let Inst{9}     = !if(P.HasSrc1Mods, src1_modifiers{1}, 0);
+  let Inst{10}    = !if(P.HasSrc2Mods, src2_modifiers{1}, 0);
+  // 16-bit select fields which can be interpreted as OpSel or hi/lo suffix
+  let Inst{11} = !if(P.HasSrc0Mods, src0_modifiers{2}, 0);
+  let Inst{12} = !if(P.HasSrc1Mods, src1_modifiers{2}, 0);
+  let Inst{13} = !if(P.HasSrc2Mods, src2_modifiers{2}, 0);
+  let Inst{14} = !if(!and(P.HasDst, P.HasSrc0Mods), src0_modifiers{3}, 0);
+  let Inst{15}    = !if(P.HasClamp, clamp{0}, 0);
+
+  let Inst{31-26} = 0x35;
+  let Inst{40-32} = !if(P.HasSrc0, src0{8-0}, 0);
+  let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0);
+  let Inst{58-50} = !if(P.HasSrc2, src2{8-0}, 0);
+  let Inst{60-59} = !if(P.HasOMod, omod, 0);
+  let Inst{61}    = !if(P.HasSrc0Mods, src0_modifiers{0}, 0);
+  let Inst{62}    = !if(P.HasSrc1Mods, src1_modifiers{0}, 0);
+  let Inst{63}    = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
+}
+
 class VOP3a_gfx6_gfx7<bits<9> op, VOPProfile p> : VOP3a<p> {
   let Inst{11}    = !if(p.HasClamp, clamp{0}, 0);
   let Inst{25-17} = op;
@@ -272,6 +308,10 @@ class VOP3e_gfx10<bits<10> op, VOPProfile p> : VOP3a_gfx10<op, p> {
 
 class VOP3e_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p>;
 
+class VOP3e_t16_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3a_t16<p> {
+  let Inst{25-16} = op;
+}
+
 class VOP3e_vi <bits<10> op, VOPProfile P> : VOP3a_vi <op, P> {
   bits<8> vdst;
   let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
@@ -736,7 +776,12 @@ class VOP3_DPPe_Fields : VOP3_DPPe_Fields_Base {
   bits<8> src0;
 }
 
+class VOP3_DPPe_Fields_t16 : VOP3_DPPe_Fields_Base {
+  bits<11> src0;
+}
+
 // Common refers to common between DPP and DPP8
+// Base refers to a shared base between T16 and regular instructions
 class VOP3_DPPe_Common_Base<bits<10> op, VOPProfile P> : Enc96 {
   bits<4> src0_modifiers;
   bits<3> src1_modifiers;
@@ -748,7 +793,7 @@ class VOP3_DPPe_Common_Base<bits<10> op, VOPProfile P> : Enc96 {
   let Inst{8}     = !if(P.HasSrc0Mods, src0_modifiers{1}, 0);
   let Inst{9}     = !if(P.HasSrc1Mods, src1_modifiers{1}, 0);
   let Inst{10}    = !if(P.HasSrc2Mods, src2_modifiers{1}, 0);
-  // OPSEL must be set such that the low result only uses low inputs, and the high result only uses high inputs.
+  // 16-bit select fields which can be interpreted as OpSel or hi/lo suffix
   let Inst{11} = !if(P.HasOpSel, !if(P.HasSrc0Mods, src0_modifiers{2}, 0),
                                  !if(P.IsFP8SrcByteSel, byte_sel{1}, ?));
   let Inst{12} = !if(P.HasOpSel, !if(P.HasSrc1Mods, src1_modifiers{2}, 0),
@@ -777,6 +822,16 @@ class VOP3_DPPe_Common<bits<10> op, VOPProfile P> : VOP3_DPPe_Common_Base<op, P>
   let Inst{58-50} = !if(P.HasSrc2, src2, 0);
 }
 
+class VOP3_DPPe_Common_t16<bits<10> op, VOPProfile P> : VOP3_DPPe_Common_Base<op, P> {
+  bits<11> vdst;
+  bits<11> src1;
+  bits<11> src2;
+
+  let Inst{7-0}   = !if(P.EmitDst, vdst{7-0}, 0);
+  let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0);
+  let Inst{58-50} = !if(P.HasSrc2, src2{8-0}, 0);
+}
+
 class VOP3P_DPPe_Common_Base<bits<7> op, VOPProfile P> : Enc96 {
   bits<4> src0_modifiers;
   bits<4> src1_modifiers;
@@ -786,6 +841,7 @@ class VOP3P_DPPe_Common_Base<bits<7> op, VOPProfile P> : Enc96 {
   let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
   let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
   let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
+  // OPSEL must be set such that the low result only uses low inputs, and the high result only uses high inputs.
   let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2}, 0); // op_sel(0)
   let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1)
   let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2)
@@ -810,6 +866,16 @@ class VOP3P_DPPe_Common<bits<7> op, VOPProfile P> : VOP3P_DPPe_Common_Base<op, P
   let Inst{58-50} = !if(P.HasSrc2, src2, 0);
 }
 
+class VOP3P_DPPe_Common_t16<bits<7> op, VOPProfile P> : VOP3P_DPPe_Common_Base<op, P> {
+  bits<11> vdst;
+  bits<11> src1;
+  bits<11> src2;
+
+  let Inst{7-0} = vdst{7-0};
+  let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0);
+  let Inst{58-50} = !if(P.HasSrc2, src2{8-0}, 0);
+}
+
 class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[],
   dag Ins = P.InsDPP, string asmOps = P.AsmDPP> :
   VOP_Pseudo<OpName, "_dpp", P, P.OutsDPP, Ins, asmOps, pattern> {
@@ -870,6 +936,7 @@ class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> :
   // Copy relevant pseudo op flags
   let isConvergent         = ps.isConvergent;
   let SubtargetPredicate   = ps.SubtargetPredicate;
+  let True16Predicate      = ps.True16Predicate;
   let AssemblerPredicate   = ps.AssemblerPredicate;
   let OtherPredicates      = ps.OtherPredicates;
   let AsmMatchConverter    = ps.AsmMatchConverter;
@@ -928,11 +995,29 @@ class VOP3_DPP_Base <string OpName, VOPProfile P, bit IsDPP16,
   let Size = 12;
 }
 
+class VOP3_DPP_Enc <bits<10> op, VOPProfile P, bit IsDPP16> :
+  VOP3_DPPe_Common<op, P>,
+  VOP3_DPPe_Fields {
+
+  let Inst{40-32} = 0xfa;
+  let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
+  let Inst{80-72} = dpp_ctrl;
+  let Inst{82}    = !if(IsDPP16, fi, ?);
+  let Inst{83}    = bound_ctrl;
+
+  // Inst{87-84} ignored by hw
+  let Inst{91-88} = bank_mask;
+  let Inst{95-92} = row_mask;
+}
+
 class VOP3_DPP <bits<10> op, string OpName, VOPProfile P, bit IsDPP16,
                dag InsDPP = !if(IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP),
                string AsmDPP = !if(IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> :
-  VOP3_DPP_Base<OpName, P, IsDPP16, InsDPP, AsmDPP>, VOP3_DPPe_Common<op, P>,
-  VOP3_DPPe_Fields {
+  VOP3_DPP_Base<OpName, P, IsDPP16, InsDPP, AsmDPP>, VOP3_DPP_Enc<op, P, IsDPP16>;
+
+class VOP3_DPP_Enc_t16<bits<10> op, VOPProfile P, bit IsDPP16 >
+    : VOP3_DPPe_Common_t16<op, P>,
+      VOP3_DPPe_Fields_t16 {
 
   let Inst{40-32} = 0xfa;
   let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
@@ -945,6 +1030,13 @@ class VOP3_DPP <bits<10> op, string OpName, VOPProfile P, bit IsDPP16,
   let Inst{95-92} = row_mask;
 }
 
+class VOP3_DPP_t16<bits<10> op, string OpName, VOPProfile P, bit IsDPP16,
+                   dag InsDPP = !if (IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP),
+                   string AsmDPP = !if (IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)>
+    : VOP3_DPP_Base<OpName, P, IsDPP16, InsDPP, AsmDPP>,
+      VOP3_DPP_Enc_t16<op, P, IsDPP16> {
+}
+
 class VOP3P_DPP <bits<7> op, string OpName, VOPProfile P, bit IsDPP16,
                dag InsDPP = !if(IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP),
                string AsmDPP = !if(IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> :
@@ -979,6 +1071,12 @@ class VOP3_DPP8e_Fields {
   bits<9> fi;
 }
 
+class VOP3_DPP8e_Fields_t16 {
+  bits<11> src0;
+  bits<24> dpp8;
+  bits<9> fi;
+}
+
 class VOP_DPP8_Base<string OpName, VOPProfile P, dag InsDPP8 = P.InsDPP8, string AsmDPP8 = P.AsmDPP8> :
   InstSI<P.OutsDPP8, InsDPP8, OpName#AsmDPP8, []> {
 
@@ -1011,16 +1109,28 @@ class VOP3_DPP8_Base<string OpName, VOPProfile P> :
   let Size = 12;
 }
 ...
[truncated]

llvm/lib/Target/AMDGPU/VOP3Instructions.td

llvm/lib/Target/AMDGPU/VOP2Instructions.td

llvm/lib/Target/AMDGPU/VOP3Instructions.td

llvm/lib/Target/AMDGPU/VOPInstructions.td

Sisyph · 2024-10-07T14:11:44Z

llvm/lib/Target/AMDGPU/VOPInstructions.td

@@ -1705,4 +1933,4 @@ def VOPTrue16Table : GenericTable {

  let PrimaryKey = ["Opcode"];
  let PrimaryKeyName = "getTrue16OpcodeHelper";
-}


Remove whitespace change

Hmmm I don't think I change anything here. Not sure why there is a diff

Perhaps clang-format? It seems to have deleted the blank line. I don't see a clear answer on whether a EoF blank line is required for tablegen, but would prefer to keep it.

llvm/test/MC/AMDGPU/gfx12_asm_opsel.s

Sisyph

The commit message should also say it adds encoding info for VOP3 True16 including DPP and DPP8.

llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Sisyph

LGTM, but please wait a day for others to have a look.

Sisyph · 2024-10-08T14:31:27Z

llvm/lib/Target/AMDGPU/VOPInstructions.td

@@ -1705,4 +1933,4 @@ def VOPTrue16Table : GenericTable {

  let PrimaryKey = ["Opcode"];
  let PrimaryKeyName = "getTrue16OpcodeHelper";
-}


Perhaps clang-format? It seems to have deleted the blank line. I don't see a clear answer on whether a EoF blank line is required for tablegen, but would prefer to keep it.

kosarev

This feel a lot for a single patch. Could this be a series of smaller changes?

llvm/lib/Target/AMDGPU/VOP3Instructions.td

llvm/lib/Target/AMDGPU/SIInstrInfo.td

llvm/lib/Target/AMDGPU/VOP3Instructions.td

llvm/lib/Target/AMDGPU/VOPInstructions.td

kosarev · 2024-10-09T12:37:15Z

llvm/lib/Target/AMDGPU/VOPInstructions.td

-                     string opName = ps.OpName> :
-  VOP3_DPP16 <op, ps, Gen.Subtarget, opName> {
+                     string opName = ps.OpName>
+    : VOP3_DPP16 <op, ps, Gen.Subtarget, opName> {


Whitespace change? Doesn't seem to match what we have downstream either.

It should be matching the downstream I suppose

It seems it wasn't matching due the space after <. I think making whitespace changes upstream just because we have it differently downstream doesn't make much sense; we can always fix the downstream to match upstream instead.

kosarev · 2024-10-09T12:44:32Z

llvm/lib/Target/AMDGPU/VOPInstructions.td

+
+multiclass VOP3Only_Realtriple_t16_gfx11<bits<10> op, string asmName,
+                                     string opName = NAME, string pseudo_mnemonic = "">
+  : VOP3_Realtriple_t16_gfx11<op, asmName, opName, pseudo_mnemonic, 1>;


Why was this moved here?

I think it's to match downstream

Again, please move them downstream to match the upstream instead.

llvm/lib/Target/AMDGPU/VOPInstructions.td

llvm/test/MC/AMDGPU/gfx11_asm_vop3.s

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s

kosarev · 2024-10-09T13:32:54Z

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s

@@ -4475,30 +4523,6 @@ v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
 v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]

-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]


Please move the updated instructions back here so we can see none have been lost. Reshuffling tests also makes updating downstream branches a pain.

Ok. In this case I will put gfx11_asm_opsel.s back to the vop3.s. I'll also put up a patch in the downstream

I understand these op_sel cases are already in gfx11_asm_vop3_dpp16-fake16.s? Can you confirm they are all actually there, please?

Yes they are

kosarev

LGTM with a few minor things to refine. If you expect this to not land smoothly downstream, especially the test changes, please make sure to prepare a downstream version of the patch before submitting here.

kosarev · 2024-10-10T14:13:19Z

llvm/lib/Target/AMDGPU/VOPInstructions.td

-                     string opName = ps.OpName> :
-  VOP3_DPP16 <op, ps, Gen.Subtarget, opName> {
+                     string opName = ps.OpName>
+    : VOP3_DPP16 <op, ps, Gen.Subtarget, opName> {


It seems it wasn't matching due the space after <. I think making whitespace changes upstream just because we have it differently downstream doesn't make much sense; we can always fix the downstream to match upstream instead.

kosarev · 2024-10-10T14:17:38Z

llvm/lib/Target/AMDGPU/VOPInstructions.td

+
+multiclass VOP3Only_Realtriple_t16_gfx11<bits<10> op, string asmName,
+                                     string opName = NAME, string pseudo_mnemonic = "">
+  : VOP3_Realtriple_t16_gfx11<op, asmName, opName, pseudo_mnemonic, 1>;


Again, please move them downstream to match the upstream instead.

llvm/test/MC/AMDGPU/gfx11_asm_vop3.s

llvm/lib/Target/AMDGPU/VOP3Instructions.td

llvm/lib/Target/AMDGPU/VOPInstructions.td

kosarev · 2024-10-10T14:56:56Z

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s

@@ -4475,30 +4523,6 @@ v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
 v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]

-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]


I understand these op_sel cases are already in gfx11_asm_vop3_dpp16-fake16.s? Can you confirm they are all actually there, please?

llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s

llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt

Modify VOP3 profile and pesudo, and add encoding info for VOP3 True16 including DPP and DPP8 in true16 and fake16 format. This patch applies true16/fake16 changes and asm/dasm changes to V_ADD_NC_U16 V_ADD_NC_I16 V_SUB_NC_U16 V_SUB_NC_I16

broxigarchen force-pushed the main-merge-true16-vop3-mc branch 3 times, most recently from 5a8aa16 to cce56ef Compare September 18, 2024 15:09

broxigarchen force-pushed the main-merge-true16-vop3-mc branch 5 times, most recently from 0d1d66b to ef0e27d Compare September 20, 2024 22:58

broxigarchen force-pushed the main-merge-true16-vop3-mc branch 3 times, most recently from ecf3043 to 91af38a Compare October 2, 2024 17:17

broxigarchen marked this pull request as ready for review October 2, 2024 17:18

llvmbot added backend:AMDGPU mc Machine (object) code llvm:globalisel labels Oct 2, 2024

arsenm requested review from kosarev and Sisyph October 2, 2024 17:20

cdevadas reviewed Oct 7, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/VOP3Instructions.td Outdated Show resolved Hide resolved

llvm/lib/Target/AMDGPU/VOP3Instructions.td Outdated Show resolved Hide resolved

Sisyph reviewed Oct 7, 2024

View reviewed changes

Sisyph requested review from jayfoad and rampitec October 7, 2024 20:58

Sisyph reviewed Oct 7, 2024

View reviewed changes

llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt Outdated Show resolved Hide resolved

cdevadas reviewed Oct 8, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/VOP3Instructions.td Outdated Show resolved Hide resolved

cdevadas reviewed Oct 8, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/VOP3Instructions.td Outdated Show resolved Hide resolved

broxigarchen force-pushed the main-merge-true16-vop3-mc branch from 36be9b7 to 685062d Compare October 8, 2024 14:08

Sisyph approved these changes Oct 8, 2024

View reviewed changes

kosarev reviewed Oct 9, 2024

View reviewed changes

broxigarchen force-pushed the main-merge-true16-vop3-mc branch from 39f3ec9 to 33123f8 Compare October 9, 2024 16:10

broxigarchen requested a review from kosarev October 10, 2024 14:42

kosarev approved these changes Oct 10, 2024

View reviewed changes

[AMDGPU][True16][MC] VOP3 profile in True16 format

430b2ef

broxigarchen force-pushed the main-merge-true16-vop3-mc branch from 33123f8 to 430b2ef Compare October 15, 2024 13:37

broxigarchen merged commit 7b4c8b3 into llvm:main Oct 16, 2024
8 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AMDGPU][True16][MC] VOP3 profile in True16 format #109031

[AMDGPU][True16][MC] VOP3 profile in True16 format #109031

broxigarchen commented Sep 17, 2024 •

edited

Loading

github-actions bot commented Sep 18, 2024 •

edited

Loading

llvmbot commented Oct 2, 2024 •

edited

Loading

Sisyph Oct 7, 2024

broxigarchen Oct 7, 2024

Sisyph Oct 8, 2024

Sisyph left a comment

Sisyph left a comment

Sisyph Oct 8, 2024

kosarev left a comment

kosarev Oct 9, 2024

broxigarchen Oct 9, 2024

kosarev Oct 10, 2024

kosarev Oct 9, 2024

broxigarchen Oct 9, 2024

kosarev Oct 10, 2024

kosarev Oct 9, 2024

broxigarchen Oct 9, 2024

kosarev Oct 10, 2024

broxigarchen Oct 15, 2024

kosarev left a comment

kosarev Oct 10, 2024

kosarev Oct 10, 2024

kosarev Oct 10, 2024

[AMDGPU][True16][MC] VOP3 profile in True16 format #109031

[AMDGPU][True16][MC] VOP3 profile in True16 format #109031

Conversation

broxigarchen commented Sep 17, 2024 • edited Loading

github-actions bot commented Sep 18, 2024 • edited Loading

llvmbot commented Oct 2, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Sisyph left a comment

Choose a reason for hiding this comment

Sisyph left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

kosarev left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

kosarev left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

broxigarchen commented Sep 17, 2024 •

edited

Loading

github-actions bot commented Sep 18, 2024 •

edited

Loading

llvmbot commented Oct 2, 2024 •

edited

Loading