Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for Vector512 bitwise operations: And, AndNot, Or, OnesComplement, and Xor #83354

Merged
merged 7 commits into from
Mar 16, 2023
23 changes: 22 additions & 1 deletion src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2287,7 +2287,28 @@ void Compiler::compSetProcessor()
instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW) &&
instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ))
{
if (!DoJitStressEvexEncoding())
// Using JitStressEVEXEncoding flag will force instructions which would
// otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding
// This requires AVX512VL support. JitForceEVEXEncoding forces this encoding, thus
// causing failure if not running on compatible hardware.

// We can't use !DoJitStressEvexEncoding() yet because opts.compSupportsISA hasn't
// been set yet as that's what we're trying to set here

bool enableAvx512 = false;

#if defined(DEBUG)
if (JitConfig.JitForceEVEXEncoding())
{
enableAvx512 = true;
}
else if (JitConfig.JitStressEvexEncoding() && instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL))
{
enableAvx512 = true;
}
#endif // DEBUG

if (!enableAvx512)
{
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F);
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL);
Expand Down
118 changes: 96 additions & 22 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,12 @@ bool emitter::IsEvexEncodedInstruction(instruction ins) const
// Since we are not using k registers yet, this will have no impact on correctness but will affect things
// once
// k registers are used (as that is the point of the "break out operand type" of these instructions)
// case INS_movdqa: // INS_movdqa32, INS_movdqa64.
// case INS_movdqu: // INS_movdqu8, INS_movdqu16, INS_movdqu32, INS_movdqu64.
// case INS_pand: // INS_pandd, INS_pandq.
// case INS_pandn: // INS_pandnd, INS_pandnq.
// case INS_por: // INS_pord, INS_porq.
// case INS_pxor: // INS_pxord, INS_pxorq
// case INS_movdqa: // INS_vmovdqa32, INS_vmovdqa64.
// case INS_movdqu: // INS_movdqu8, INS_movdqu16, INS_vmovdqu32, INS_vmovdqu64.
// case INS_pand: // INS_vpandd, INS_vpandq.
// case INS_pandn: // INS_vpandnd, INS_vpandnq.
// case INS_por: // INS_vpord, INS_vporq.
// case INS_pxor: // INS_vpxord, INS_vpxorq
// case INS_vextractf128: // INS_vextractf32x4, INS_vextractf64x2.
// case INS_vextracti128: // INS_vextracti32x4, INS_vextracti64x2.
// case INS_vinsertf128: // INS_vinsertf32x4, INS_vinsertf64x2.
Expand Down Expand Up @@ -492,6 +492,72 @@ bool emitter::IsFlagsAlwaysModified(instrDesc* id)
return true;
}

//------------------------------------------------------------------------
// IsRexW0Instruction: check if the instruction always encodes REX.W as 0
//
// Arguments:
// id - instruction to test
//
// Return Value:
// true if the instruction always encodes REX.W as 0; othwerwise, false
//
bool emitter::IsRexW0Instruction(instruction ins)
{
insFlags flags = CodeGenInterface::instInfo[ins];

if ((flags & REX_W0) != 0)
{
assert((flags & (REX_W1 | REX_WX)) == 0);
return true;
}

return false;
}

//------------------------------------------------------------------------
// IsRexW1Instruction: check if the instruction always encodes REX.W as 1
//
// Arguments:
// id - instruction to test
//
// Return Value:
// true if the instruction always encodes REX.W as 1; othwerwise, false
//
bool emitter::IsRexW1Instruction(instruction ins)
{
insFlags flags = CodeGenInterface::instInfo[ins];

if ((flags & REX_W1) != 0)
{
assert((flags & (REX_W0 | REX_WX)) == 0);
return true;
}

return false;
}

//------------------------------------------------------------------------
// IsRexWXInstruction: check if the instruction requires special REX.W encoding
//
// Arguments:
// id - instruction to test
//
// Return Value:
// true if the instruction requires special REX.W encoding; othwerwise, false
//
bool emitter::IsRexWXInstruction(instruction ins)
{
insFlags flags = CodeGenInterface::instInfo[ins];

if ((flags & REX_WX) != 0)
{
assert((flags & (REX_W0 | REX_W1)) == 0);
return true;
}

return false;
}

#ifdef TARGET_64BIT
//------------------------------------------------------------------------
// AreUpper32BitsZero: check if some previously emitted
Expand Down Expand Up @@ -5868,13 +5934,13 @@ bool emitter::IsMovInstruction(instruction ins)
case INS_movaps:
case INS_movd:
case INS_movdqa:
case INS_movdqa32:
case INS_movdqa64:
case INS_vmovdqa32:
case INS_vmovdqa64:
case INS_movdqu:
case INS_movdqu8:
case INS_movdqu16:
case INS_movdqu32:
case INS_movdqu64:
case INS_vmovdqu32:
case INS_vmovdqu64:
case INS_movsdsse2:
case INS_movss:
case INS_movsx:
Expand Down Expand Up @@ -6017,12 +6083,12 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size)
break;
}

case INS_movdqa32:
case INS_movdqa64:
case INS_vmovdqa32:
case INS_vmovdqa64:
case INS_movdqu8:
case INS_movdqu16:
case INS_movdqu32:
case INS_movdqu64:
case INS_vmovdqu32:
case INS_vmovdqu64:
{
// These EVEX instructions merges/masks based on k-register
// TODO-XArch-AVX512 : Handle merge/masks scenarios once k-mask support is added for these.
Expand Down Expand Up @@ -6233,13 +6299,13 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN
case INS_movapd:
case INS_movaps:
case INS_movdqa:
case INS_movdqa32:
case INS_movdqa64:
case INS_vmovdqa32:
case INS_vmovdqa64:
case INS_movdqu:
case INS_movdqu8:
case INS_movdqu16:
case INS_movdqu32:
case INS_movdqu64:
case INS_vmovdqu32:
case INS_vmovdqu64:
case INS_movsdsse2:
case INS_movss:
case INS_movupd:
Expand Down Expand Up @@ -17472,13 +17538,13 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
break;

case INS_movdqa:
case INS_movdqa32:
case INS_movdqa64:
case INS_vmovdqa32:
case INS_vmovdqa64:
case INS_movdqu:
case INS_movdqu8:
case INS_movdqu16:
case INS_movdqu32:
case INS_movdqu64:
case INS_vmovdqu32:
case INS_vmovdqu64:
case INS_movaps:
case INS_movups:
case INS_movapd:
Expand Down Expand Up @@ -17691,9 +17757,17 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_paddusw:
case INS_psubusw:
case INS_pand:
case INS_vpandd:
case INS_vpandq:
case INS_pandn:
case INS_vpandnd:
case INS_vpandnq:
case INS_por:
case INS_vpord:
case INS_vporq:
case INS_pxor:
case INS_vpxord:
case INS_vpxorq:
case INS_andpd:
case INS_andps:
case INS_andnpd:
Expand Down
22 changes: 18 additions & 4 deletions src/coreclr/jit/emitxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,21 @@ bool IsWEvexOpcodeExtension(const instrDesc* id)

instruction ins = id->idIns();

if (IsRexW0Instruction(ins))
{
return false;
}
else if (IsRexW1Instruction(ins))
{
return true;
}

if (IsRexWXInstruction(ins))
{
// TODO: Make this a simple assert once all instructions are annotated
unreached();
}

switch (ins)
{
case INS_movq:
Expand Down Expand Up @@ -291,9 +306,7 @@ bool IsWEvexOpcodeExtension(const instrDesc* id)
case INS_vfnmsub231sd:
case INS_unpcklpd:
case INS_vpermilpdvar:
case INS_movdqa64:
case INS_movdqu16:
case INS_movdqu64:
case INS_vinsertf64x4:
case INS_vinserti64x4:
{
Expand Down Expand Up @@ -409,9 +422,7 @@ bool IsWEvexOpcodeExtension(const instrDesc* id)
case INS_vpdpbusds:
case INS_vpdpwssds:
case INS_vpermilpsvar:
case INS_movdqa32:
case INS_movdqu8:
case INS_movdqu32:
case INS_vinsertf32x8:
case INS_vinserti32x8:
{
Expand Down Expand Up @@ -648,6 +659,9 @@ static bool DoesWriteZeroFlag(instruction ins);
bool DoesWriteSignFlag(instruction ins);
bool DoesResetOverflowAndCarryFlags(instruction ins);
bool IsFlagsAlwaysModified(instrDesc* id);
static bool IsRexW0Instruction(instruction ins);
static bool IsRexW1Instruction(instruction ins);
static bool IsRexWXInstruction(instruction ins);

bool IsThreeOperandAVXInstruction(instruction ins)
{
Expand Down
38 changes: 33 additions & 5 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19593,7 +19593,12 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,

case GT_AND:
{
if (simdSize == 32)
if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_And;
}
else if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));

Expand Down Expand Up @@ -19627,7 +19632,12 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,

case GT_AND_NOT:
{
if (simdSize == 32)
if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_AndNot;
}
else if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));

Expand Down Expand Up @@ -19892,7 +19902,12 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,

case GT_OR:
{
if (simdSize == 32)
if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_Or;
}
else if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));

Expand Down Expand Up @@ -19953,7 +19968,12 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,

case GT_XOR:
{
if (simdSize == 32)
if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_Xor;
}
else if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));

Expand Down Expand Up @@ -23446,7 +23466,15 @@ GenTree* Compiler::gtNewSimdUnOpNode(genTreeOps op,

case GT_NOT:
{
assert((simdSize != 32) || compIsaSupportedDebugOnly(InstructionSet_AVX));
if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
}
else if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
}

op2 = gtNewAllBitsSetConNode(type);
return gtNewSimdBinOpNode(GT_XOR, type, op1, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
}
Expand Down
22 changes: 17 additions & 5 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,7 @@ static bool isSupportedBaseType(NamedIntrinsic intrinsic, CorInfoType baseJitTyp
#ifdef DEBUG
CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsic);
#ifdef TARGET_XARCH
assert((isa == InstructionSet_Vector256) || (isa == InstructionSet_Vector128));
assert((isa == InstructionSet_Vector512) || (isa == InstructionSet_Vector256) || (isa == InstructionSet_Vector128));
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assert was missing and so we weren't actually testing any InstructionSet_Vector512 paths yet.

#endif // TARGET_XARCH
#ifdef TARGET_ARM64
assert((isa == InstructionSet_Vector64) || (isa == InstructionSet_Vector128));
Expand Down Expand Up @@ -976,11 +976,23 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

assert(numArgs >= 0);

if (!isScalar && ((HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType) == INS_invalid) ||
((simdSize != 8) && (simdSize != 16) && (simdSize != 32))))
if (!isScalar)
{
assert(!"Unexpected HW Intrinsic");
return nullptr;
if (HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType) == INS_invalid)
{
assert(!"Unexpected HW intrinsic");
return nullptr;
}

#if defined(TARGET_ARM64)
if ((simdSize != 8) && (simdSize != 16))
#elif defined(TARGET_XARCH)
if ((simdSize != 16) && (simdSize != 32) && (simdSize != 64))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same for this check against simdSize != 64

#endif // TARGET_*
{
assert(!"Unexpected SIMD size");
return nullptr;
}
}

GenTree* op1 = nullptr;
Expand Down
Loading