Skip to content

Commit

Permalink
[AMDGPU][True16][MC] VOP3 profile in True16 format (llvm#109031)
Browse files Browse the repository at this point in the history
Modify VOP3 profile and pesudo, and add encoding info for VOP3 True16
including DPP and DPP8 in true16 and fake16 format.

This patch applies true16/fake16 changes and asm/dasm changes to
V_ADD_NC_U16
V_ADD_NC_I16
V_SUB_NC_U16
V_SUB_NC_I16
  • Loading branch information
broxigarchen authored and EricWF committed Oct 22, 2024
1 parent ba82142 commit 576c186
Show file tree
Hide file tree
Showing 19 changed files with 2,896 additions and 926 deletions.
8 changes: 6 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2149,6 +2149,8 @@ class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
string ret = dst#", "#src0#src1#src2#opsel#mods#clamp;
}

// FIXME-TRUE16 AsmVOP3OpSel will be deprecated after all
// VOP3 16 bit instructions are replaced to true16 format
class getAsmVOP3OpSel <int NumSrcArgs,
bit HasClamp,
bit HasOMod,
Expand Down Expand Up @@ -2237,8 +2239,9 @@ class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
string clamp = !if(HasClamp, "$clamp", "");
string omod = !if(HasOMod, "$omod", "");

string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod, "");

string ret = dst#!if(!eq(NumSrcArgs,0),
"",
!if(HasDst,", ", "")#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod);
}

class getAsmVOP3DPP<string base> {
Expand Down Expand Up @@ -2733,6 +2736,7 @@ def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
def VOP_I32_I32_I32_I16 : VOPProfile <[i32, i32, i32, i16]>;
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1664,8 +1664,8 @@ multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> :
VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>;

multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> :
VOP3Only_Realtriple_t16<GFX11Gen, op, asmName, OpName>,
VOP3Only_Realtriple_t16<GFX12Gen, op, asmName, OpName>;
VOP3_Realtriple_t16_gfx11<op, asmName, OpName, "", /*IsSingle*/1>,
VOP3_Realtriple_t16_gfx12<op, asmName, OpName, "", /*IsSingle*/1>;

multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> {
defm OpName#"_t16": VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">;
Expand Down
84 changes: 53 additions & 31 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -569,16 +569,10 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
getAsmVOP3OpSel<3, HasClamp, HasOMod,
HasSrc0FloatMods, HasSrc1FloatMods,
HasSrc2FloatMods>.ret);
let AsmVOP3DPP16 = !subst(", $src2_modifiers", "",
getAsmVOP3DPP16<getAsmVOP3Base<3, 1, HasClamp, 1,
HasOMod, 0, 1, HasSrc0FloatMods,
HasSrc1FloatMods,
HasSrc2FloatMods>.ret>.ret);
let AsmVOP3DPP8 = !subst(", $src2_modifiers", "",
getAsmVOP3DPP8<getAsmVOP3Base<3, 1, HasClamp, 1,
HasOMod, 0, 1, HasSrc0FloatMods,
HasSrc1FloatMods,
HasSrc2FloatMods>.ret>.ret);
let AsmVOP3Base = !subst(", $src2_modifiers", "",
getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, 0/*Src1Mods*/,
HasModifiers, DstVT>.ret);
}

class VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> :
Expand Down Expand Up @@ -636,8 +630,8 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0

defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;

defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
Expand Down Expand Up @@ -752,6 +746,8 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),
(i32 (EXTRACT_SUBREG $src1, sub1)),
(i32 (EXTRACT_SUBREG $src2, sub1))), sub1)>;

} // End SubtargetPredicate = isGFX9Plus

// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
class OpSelBinOpClampPat<SDPatternOperator node,
Instruction inst> : GCNPat<
Expand All @@ -760,9 +756,14 @@ class OpSelBinOpClampPat<SDPatternOperator node,
(inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE, 0)
>;

def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
} // End SubtargetPredicate = isGFX9Plus
let SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts in {
def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
} // End SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts
let True16Predicate = UseFakeTrue16Insts in {
def : OpSelBinOpClampPat<saddsat, V_ADD_I16_fake16_e64>;
def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_fake16_e64>;
} // End True16Predicate = UseFakeTrue16Insts

multiclass IMAD32_Pats <VOP3_Pseudo inst> {
def : GCNPat <
Expand Down Expand Up @@ -871,21 +872,31 @@ let SubtargetPredicate = isGFX10Plus in {
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
}

defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;

def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;

// Undo sub x, c -> add x, -c canonicalization since c is more likely
// an inline immediate than -c.
def : GCNPat<
(add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
(V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
>;
defm V_ADD_NC_U16 : VOP3Inst_t16 <"v_add_nc_u16", VOP_I16_I16_I16, add>;
defm V_SUB_NC_U16 : VOP3Inst_t16 <"v_sub_nc_u16", VOP_I16_I16_I16, sub>;

} // End SubtargetPredicate = isGFX10Plus

let True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus in {
def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
// Undo sub x, c -> add x, -c canonicalization since c is more likely
// an inline immediate than -c.
def : GCNPat<
(add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
(V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
>;
} // End True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus

let True16Predicate = UseFakeTrue16Insts in {
def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_fake16_e64>;
def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_fake16_e64>;
def : GCNPat<
(add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
(V_SUB_NC_U16_fake16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
>;
} // End True16Predicate = UseFakeTrue16Insts

let SubtargetPredicate = isGFX12Plus in {
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
defm V_PERMLANE16_VAR_B32 : VOP3Inst<"v_permlane16_var_b32", VOP3_PERMLANE_VAR_Profile>;
Expand Down Expand Up @@ -1104,6 +1115,17 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;

multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
string pseudo_mnemonic = "", bit isSingle = 0> :
VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;

multiclass VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
string pseudo_mnemonic = "", bit isSingle = 0> {
defm opName#"_t16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
defm opName#"_fake16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
}

multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
VOP3be_Real<GFX11Gen, op, opName, asmName>,
VOP3be_Real<GFX12Gen, op, opName, asmName>;
Expand Down Expand Up @@ -1189,17 +1211,17 @@ defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">;
defm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x303>;
defm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x304>;
defm V_ADD_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x303, "v_add_nc_u16">;
defm V_SUB_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x304, "v_sub_nc_u16">;
defm V_MUL_LO_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x305, "v_mul_lo_u16">;
defm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11_gfx12<0x306>;
defm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11_gfx12<0x307>;
defm V_MAX_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x309, "v_max_u16">;
defm V_MAX_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30a, "v_max_i16">;
defm V_MIN_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30b, "v_min_u16">;
defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30c, "v_min_i16">;
defm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30d, "V_ADD_I16", "v_add_nc_i16">;
defm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "v_add_nc_i16", "V_ADD_I16">;
defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "v_sub_nc_i16", "V_SUB_I16">;
defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>;
defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
Expand Down
Loading

0 comments on commit 576c186

Please sign in to comment.