Skip to content

Commit

Permalink
Fix some handling in the JIT related to embedded broadcast/masking
Browse files Browse the repository at this point in the history
  • Loading branch information
tannergooding committed May 6, 2024
1 parent 57efa0a commit 5e6e1af
Show file tree
Hide file tree
Showing 11 changed files with 262 additions and 42 deletions.
10 changes: 10 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -9480,6 +9480,16 @@ class Compiler
#endif
}

bool canUseEmbeddedBroadcast() const
{
return JitConfig.EnableEmbeddedBroadcast();
}

bool canUseEmbeddedMasking() const
{
return JitConfig.EnableEmbeddedMasking();
}

#ifdef TARGET_XARCH
public:
bool canUseVexEncoding() const
Expand Down
37 changes: 23 additions & 14 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -1653,21 +1653,30 @@ class emitter
{
assert(!idIsEvexbContextSet());

if (instOptions == INS_OPTS_EVEX_eb_er_rd)
switch (instOptions & INS_OPTS_EVEX_b_MASK)
{
_idEvexbContext = 1;
}
else if (instOptions == INS_OPTS_EVEX_er_ru)
{
_idEvexbContext = 2;
}
else if (instOptions == INS_OPTS_EVEX_er_rz)
{
_idEvexbContext = 3;
}
else
{
unreached();
case INS_OPTS_EVEX_eb_er_rd:
{
_idEvexbContext = 1;
break;
}

case INS_OPTS_EVEX_er_ru:
{
_idEvexbContext = 2;
break;
}

case INS_OPTS_EVEX_er_rz:
{
_idEvexbContext = 3;
break;
}

default:
{
unreached();
}
}
}

Expand Down
1 change: 0 additions & 1 deletion src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12480,7 +12480,6 @@ void emitter::emitDispIns(

case IF_NONE:
{
assert(!IsAvx512OrPriorInstruction(id->idIns()));
#if FEATURE_LOOP_ALIGN
if (ins == INS_align)
{
Expand Down
5 changes: 1 addition & 4 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19894,10 +19894,7 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp)
//
bool GenTree::isEvexCompatibleHWIntrinsic() const
{
// TODO-XARCH-AVX512 remove the ReturnsPerElementMask check once K registers have been properly
// implemented in the register allocator
return OperIsHWIntrinsic() && HWIntrinsicInfo::HasEvexSemantics(AsHWIntrinsic()->GetHWIntrinsicId()) &&
!HWIntrinsicInfo::ReturnsPerElementMask(AsHWIntrinsic()->GetHWIntrinsicId());
return OperIsHWIntrinsic() && HWIntrinsicInfo::HasEvexSemantics(AsHWIntrinsic()->GetHWIntrinsicId());
}

//------------------------------------------------------------------------
Expand Down
7 changes: 5 additions & 2 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
// We don't need to genProduceReg(node) since that will be handled by processing op2
// likewise, processing op2 will ensure its own registers are consumed

// Make sure we consume the registers that are getting specially handled
genConsumeReg(op1);
if (!mergeWithZero)
{
// Make sure we consume the registers that are getting specially handled
genConsumeReg(op1);
}
embMaskOp = op3;
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,7 @@ HARDWARE_INTRINSIC(AVX512F, BlendVariable,
HARDWARE_INTRINSIC(AVX512F, BroadcastScalarToVector512, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512F, BroadcastVector128ToVector512, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti128, INS_vbroadcasti128, INS_invalid, INS_invalid, INS_vbroadcastf128, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeMemoryLoad)
HARDWARE_INTRINSIC(AVX512F, BroadcastVector256ToVector512, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti64x4, INS_vbroadcasti64x4, INS_invalid, INS_vbroadcastf64x4}, HW_Category_SimpleSIMD, HW_Flag_MaybeMemoryLoad)
HARDWARE_INTRINSIC(AVX512F, Compare, 64, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(AVX512F, CompareEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(AVX512F, CompareGreaterThan, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(AVX512F, CompareGreaterThanOrEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId)
Expand Down Expand Up @@ -1349,6 +1350,7 @@ HARDWARE_INTRINSIC(AVX512F, AddMask,
HARDWARE_INTRINSIC(AVX512F, AndMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(AVX512F, AndNotMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(AVX512F, BlendVariableMask, -1, 3, true, {INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible)
HARDWARE_INTRINSIC(AVX512F, CompareMask, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible)
HARDWARE_INTRINSIC(AVX512F, CompareEqualMask, -1, 2, true, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
HARDWARE_INTRINSIC(AVX512F, CompareGreaterThanMask, -1, 2, true, {INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible)
HARDWARE_INTRINSIC(AVX512F, CompareGreaterThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible)
Expand Down
30 changes: 27 additions & 3 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ int HWIntrinsicInfo::lookupImmUpperBound(NamedIntrinsic id)
{
case NI_AVX_Compare:
case NI_AVX_CompareScalar:
case NI_AVX512F_Compare:
{
assert(!HWIntrinsicInfo::HasFullRangeImm(id));
return 31; // enum FloatComparisonMode has 32 values
Expand Down Expand Up @@ -2027,9 +2028,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
if (IsBaselineVector512IsaSupportedOpportunistically())
{
op1 = impSIMDPopStack();
op1 =
gtNewSimdHWIntrinsicNode(TYP_MASK, op1, NI_AVX512F_ConvertVectorToMask, simdBaseJitType, simdSize);

if (!varTypeIsMask(op1))
{
op1 = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, NI_AVX512F_ConvertVectorToMask, simdBaseJitType,
simdSize);
}
retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_AVX512F_MoveMask, simdBaseJitType, simdSize);
}
break;
Expand Down Expand Up @@ -4093,12 +4097,32 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
op2 = impSIMDPopStack();
op1 = impSIMDPopStack();

op3 = gtNewSimdHWIntrinsicNode(TYP_MASK, op3, NI_AVX512F_ConvertVectorToMask, simdBaseJitType, simdSize);
if (!varTypeIsMask(op3))
{
op3 =
gtNewSimdHWIntrinsicNode(TYP_MASK, op3, NI_AVX512F_ConvertVectorToMask, simdBaseJitType, simdSize);
}
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, NI_AVX512F_BlendVariableMask, simdBaseJitType,
simdSize);
break;
}

case NI_AVX512F_Compare:
{
assert(sig->numArgs == 3);

op3 = impPopStack().val;
op2 = impSIMDPopStack();
op1 = impSIMDPopStack();

retNode =
gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, op3, NI_AVX512F_CompareMask, simdBaseJitType, simdSize);
retNode =
gtNewSimdHWIntrinsicNode(retType, retNode, NI_AVX512F_ConvertMaskToVector, simdBaseJitType, simdSize);

break;
}

case NI_AVX512F_CompareEqual:
case NI_AVX512BW_CompareEqual:
{
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/instr.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ enum insOpts: unsigned

INS_OPTS_EVEX_er_rz = 3, // Round towards zero

// Two-bits: 0b0001_1100
// Three-bits: 0b0001_1100
INS_OPTS_EVEX_aaa_MASK = 0x1C, // mask for EVEX.aaa related features

INS_OPTS_EVEX_em_k1 = 1 << 2, // Embedded mask uses K1
Expand All @@ -254,7 +254,7 @@ enum insOpts: unsigned
// One-bit: 0b0010_0000
INS_OPTS_EVEX_z_MASK = 0x20, // mask for EVEX.z related features

INS_OPTS_EVEX_em_zero, // Embedded mask merges with zero
INS_OPTS_EVEX_em_zero = 1 << 5, // Embedded mask merges with zero
};

#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
Expand Down
9 changes: 7 additions & 2 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -360,10 +360,12 @@ RELEASE_CONFIG_INTEGER(EnableMultiRegLocals, W("EnableMultiRegLocals"), 1)
// Disables inlining of all methods
RELEASE_CONFIG_INTEGER(JitNoInline, W("JitNoInline"), 0)

// clang-format off

#if defined(TARGET_AMD64) || defined(TARGET_X86)
// Enable EVEX encoding for SIMD instructions when AVX-512VL is available.
CONFIG_INTEGER(JitStressEvexEncoding, W("JitStressEvexEncoding"), 0)

// clang-format off
#endif

RELEASE_CONFIG_INTEGER(PreferredVectorBitWidth, W("PreferredVectorBitWidth"), 0) // The preferred decimal width, in bits, to use for any implicit vectorization emitted. A value less than 128 is treated as the system default.

Expand Down Expand Up @@ -420,6 +422,9 @@ RELEASE_CONFIG_INTEGER(EnableArm64Sha256, W("EnableArm64Sha256"),
RELEASE_CONFIG_INTEGER(EnableArm64Sve, W("EnableArm64Sve"), 1) // Allows Arm64 Sve+ hardware intrinsics to be disabled
#endif

RELEASE_CONFIG_INTEGER(EnableEmbeddedBroadcast, W("EnableEmbeddedBroadcast"), 1) // Allows embedded broadcasts to be disabled
RELEASE_CONFIG_INTEGER(EnableEmbeddedMasking, W("EnableEmbeddedMasking"), 1) // Allows embedded masking to be disabled

// clang-format on

#ifdef FEATURE_SIMD
Expand Down
Loading

0 comments on commit 5e6e1af

Please sign in to comment.