Skip to content

Commit

Permalink
[X86][AVX10.2] Support AVX10.2-COMEF new instructions. (llvm#108063)
Browse files Browse the repository at this point in the history
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965

Chapter 8  AVX10 COMPARE SCALAR FP WITH ENHANCED EFLAGS INSTRUCTIONS

---------

Co-authored-by: mattarde <mattarde@intel.com>
  • Loading branch information
mahesh-attarde and mattarde authored Sep 18, 2024
1 parent 707169a commit f5ad9e1
Show file tree
Hide file tree
Showing 12 changed files with 1,405 additions and 88 deletions.
31 changes: 27 additions & 4 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26159,22 +26159,43 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (CC == ISD::SETLT || CC == ISD::SETLE)
std::swap(LHS, RHS);

SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
// For AVX10.2, Support EQ and NE.
bool HasAVX10_2_COMX =
Subtarget.hasAVX10_2() && (CC == ISD::SETEQ || CC == ISD::SETNE);

// AVX10.2 COMPARE supports only v2f64, v4f32 or v8f16.
// For BF type we need to fall back.
bool HasAVX10_2_COMX_Ty = (LHS.getSimpleValueType() != MVT::v8bf16);

auto ComiOpCode = IntrData->Opc0;
auto isUnordered = (ComiOpCode == X86ISD::UCOMI);

if (HasAVX10_2_COMX && HasAVX10_2_COMX_Ty)
ComiOpCode = isUnordered ? X86ISD::UCOMX : X86ISD::COMX;

SDValue Comi = DAG.getNode(ComiOpCode, dl, MVT::i32, LHS, RHS);

SDValue SetCC;
switch (CC) {
case ISD::SETEQ: { // (ZF = 0 and PF = 0)
case ISD::SETEQ: {
SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
if (HasAVX10_2_COMX & HasAVX10_2_COMX_Ty) // ZF == 1
break;
// (ZF = 1 and PF = 0)
SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
break;
}
case ISD::SETNE: { // (ZF = 1 or PF = 1)
case ISD::SETNE: {
SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
if (HasAVX10_2_COMX & HasAVX10_2_COMX_Ty) // ZF == 0
break;
// (ZF = 0 or PF = 1)
SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
break;
}
case ISD::SETGT: // (CF = 0 and ZF = 0)
case ISD::SETGT: // (CF = 0 and ZF = 0)
case ISD::SETLT: { // Condition opposite to GT. Operands swapped above.
SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
break;
Expand Down Expand Up @@ -34083,6 +34104,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(STRICT_FCMPS)
NODE_NAME_CASE(COMI)
NODE_NAME_CASE(UCOMI)
NODE_NAME_CASE(COMX)
NODE_NAME_CASE(UCOMX)
NODE_NAME_CASE(CMPM)
NODE_NAME_CASE(CMPMM)
NODE_NAME_CASE(STRICT_CMPM)
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ namespace llvm {
COMI,
UCOMI,

// X86 compare with Intrinsics similar to COMI.
COMX,
UCOMX,

/// X86 bit-test instructions.
BT,

Expand Down
46 changes: 46 additions & 0 deletions llvm/lib/Target/X86/X86InstrAVX10.td
Original file line number Diff line number Diff line change
Expand Up @@ -1537,3 +1537,49 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_
defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
X86Fnmsub, SchedWriteFMA>;
}

//-------------------------------------------------
// AVX10 COMEF instructions
//-------------------------------------------------
multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
string OpcodeStr,
Domain d,
X86FoldableSchedWrite sched = WriteFComX> {
let ExeDomain = d, mayRaiseFPException = 1 in {
def rr_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2))]>,
EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
let mayLoad = 1 in {
def rm_Int : AVX512<Opc, MRMSrcMem, (outs), (ins _.RC:$src1, _.ScalarMemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (_.VT _.RC:$src1), (_.LdFrag addr:$src2)))]>,
EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
}
def rrb_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
[]>,
EVEX, EVEX_V128, EVEX_B, Sched<[sched]>, SIMD_EXC;
}
}

let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in {
defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512,
"vcomxsd", SSEPackedDouble>,
TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
defm VCOMXSHZ : avx10_com_ef_int<0x2f, v8f16x_info, X86comi512,
"vcomxsh", SSEPackedSingle>,
T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
defm VCOMXSSZ : avx10_com_ef_int<0x2f, v4f32x_info, X86comi512,
"vcomxss", SSEPackedSingle>,
TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VUCOMXSDZ : avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512,
"vucomxsd", SSEPackedDouble>,
TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
defm VUCOMXSHZ : avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512,
"vucomxsh", SSEPackedSingle>,
T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
defm VUCOMXSSZ : avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512,
"vucomxss", SSEPackedSingle>,
TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
}
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86FCmp>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86FCmp>;

def X86comi512 : SDNode<"X86ISD::COMX", SDTX86FCmp>;
def X86ucomi512 : SDNode<"X86ISD::UCOMX", SDTX86FCmp>;
def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
Expand Down
Loading

0 comments on commit f5ad9e1

Please sign in to comment.