diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 6202210221d9d..1fad850f623e0 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -157,6 +157,7 @@ regNumber emitter::getSseShiftRegNumber(instruction ins)
case INS_psrad:
case INS_psraw:
+ case INS_vpsraq:
{
return (regNumber)4;
}
@@ -17916,15 +17917,20 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_pminsw:
case INS_pminud:
case INS_pminsd:
+ case INS_vpminuq:
+ case INS_vpminsq:
case INS_pmaxub:
case INS_pmaxsb:
case INS_pmaxuw:
case INS_pmaxsw:
case INS_pmaxsd:
case INS_pmaxud:
+ case INS_vpmaxsq:
+ case INS_vpmaxuq:
case INS_pabsb:
case INS_pabsw:
case INS_pabsd:
+ case INS_vpabsq:
case INS_psignb:
case INS_psignw:
case INS_psignd:
@@ -17949,6 +17955,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_psrlq:
case INS_psrad:
case INS_psraw:
+ case INS_vpsraq:
if (insFmt == IF_RWR_CNS)
{
result.insLatency = PERFSCORE_LATENCY_1C;
@@ -18193,6 +18200,11 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insLatency += PERFSCORE_LATENCY_10C;
break;
+ case INS_vpmullq:
+ result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+ result.insLatency += PERFSCORE_LATENCY_15C;
+ break;
+
case INS_vpbroadcastb:
case INS_vpbroadcastw:
case INS_vpbroadcastd:
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 8e1308f2bc451..aefec70c0e574 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -18966,6 +18966,12 @@ bool GenTree::isCommutativeHWIntrinsic() const
{
return false;
}
+
+ case NI_AVX512F_Max:
+ case NI_AVX512F_Min:
+ {
+ return !varTypeIsFloating(node->GetSimdBaseType());
+ }
#endif // TARGET_XARCH
default:
@@ -19269,11 +19275,27 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si
return gtNewSimdBinOpNode(GT_AND_NOT, type, op1, bitMask, simdBaseJitType, simdSize);
}
- assert((simdSize != 32) || compIsaSupportedDebugOnly(InstructionSet_AVX2));
+ NamedIntrinsic intrinsic = NI_Illegal;
+
+ if (simdBaseType == TYP_LONG)
+ {
+ if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
+ {
+ intrinsic = NI_AVX512F_VL_Abs;
+ }
+ }
+ else if (simdSize == 32)
+ {
+ assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
+ intrinsic = NI_AVX2_Abs;
+ }
+ else if (compOpportunisticallyDependsOn(InstructionSet_SSSE3))
+ {
+ intrinsic = NI_SSSE3_Abs;
+ }
- if ((simdBaseType != TYP_LONG) && ((simdSize == 32) || compOpportunisticallyDependsOn(InstructionSet_SSSE3)))
+ if (intrinsic != NI_Illegal)
{
- NamedIntrinsic intrinsic = (simdSize == 32) ? NI_AVX2_Abs : NI_SSSE3_Abs;
return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize);
}
else
@@ -19390,6 +19412,23 @@ GenTree* Compiler::gtNewSimdBinOpNode(
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_And;
+
+ if (varTypeIsIntegral(simdBaseType))
+ {
+ intrinsic = NI_AVX512F_And;
+ }
+ else if (compOpportunisticallyDependsOn(InstructionSet_AVX512DQ))
+ {
+ intrinsic = NI_AVX512DQ_And;
+ }
+ else
+ {
+ // Since this is a bitwise operation, we can still support it by lying
+ // about the type and doing the operation using a supported instruction
+
+ intrinsic = NI_AVX512F_And;
+ simdBaseJitType = (simdBaseType == TYP_DOUBLE) ? CORINFO_TYPE_LONG : CORINFO_TYPE_INT;
+ }
}
else if (simdSize == 32)
{
@@ -19409,7 +19448,7 @@ GenTree* Compiler::gtNewSimdBinOpNode(
// about the type and doing the operation using a supported instruction
intrinsic = NI_AVX_And;
- simdBaseJitType = CORINFO_TYPE_FLOAT;
+ simdBaseJitType = varTypeIsLong(simdBaseType) ? CORINFO_TYPE_DOUBLE : CORINFO_TYPE_FLOAT;
}
}
else if (simdBaseType == TYP_FLOAT)
@@ -19429,6 +19468,23 @@ GenTree* Compiler::gtNewSimdBinOpNode(
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_AndNot;
+
+ if (varTypeIsIntegral(simdBaseType))
+ {
+ intrinsic = NI_AVX512F_AndNot;
+ }
+ else if (compOpportunisticallyDependsOn(InstructionSet_AVX512DQ))
+ {
+ intrinsic = NI_AVX512DQ_AndNot;
+ }
+ else
+ {
+ // Since this is a bitwise operation, we can still support it by lying
+ // about the type and doing the operation using a supported instruction
+
+ intrinsic = NI_AVX512F_AndNot;
+ simdBaseJitType = (simdBaseType == TYP_DOUBLE) ? CORINFO_TYPE_LONG : CORINFO_TYPE_INT;
+ }
}
else if (simdSize == 32)
{
@@ -19448,7 +19504,7 @@ GenTree* Compiler::gtNewSimdBinOpNode(
// about the type and doing the operation using a supported instruction
intrinsic = NI_AVX_AndNot;
- simdBaseJitType = CORINFO_TYPE_FLOAT;
+ simdBaseJitType = varTypeIsLong(simdBaseType) ? CORINFO_TYPE_DOUBLE : CORINFO_TYPE_FLOAT;
}
}
else if (simdBaseType == TYP_FLOAT)
@@ -19510,7 +19566,6 @@ GenTree* Compiler::gtNewSimdBinOpNode(
}
assert(!varTypeIsByte(simdBaseType));
- assert((op != GT_RSH) || (!varTypeIsUnsigned(simdBaseType) && !varTypeIsLong(simdBaseType)));
// "over shifting" is platform specific behavior. We will match the C# behavior
// this requires we mask with (sizeof(T) * 8) - 1 which ensures the shift cannot
@@ -19541,7 +19596,16 @@ GenTree* Compiler::gtNewSimdBinOpNode(
}
else if (op == GT_RSH)
{
- intrinsic = NI_AVX2_ShiftRightArithmetic;
+ if (varTypeIsLong(simdBaseType) || (simdBaseType == TYP_DOUBLE))
+ {
+ assert(varTypeIsSigned(simdBaseType));
+ assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F_VL));
+ intrinsic = NI_AVX512F_VL_ShiftRightArithmetic;
+ }
+ else
+ {
+ intrinsic = NI_AVX2_ShiftRightArithmetic;
+ }
}
else
{
@@ -19555,7 +19619,16 @@ GenTree* Compiler::gtNewSimdBinOpNode(
}
else if (op == GT_RSH)
{
- intrinsic = NI_SSE2_ShiftRightArithmetic;
+ if (varTypeIsLong(simdBaseType) || (simdBaseType == TYP_DOUBLE))
+ {
+ assert(varTypeIsSigned(simdBaseType));
+ assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F_VL));
+ intrinsic = NI_AVX512F_VL_ShiftRightArithmetic;
+ }
+ else
+ {
+ intrinsic = NI_SSE2_ShiftRightArithmetic;
+ }
}
else
{
@@ -19654,6 +19727,16 @@ GenTree* Compiler::gtNewSimdBinOpNode(
break;
}
+ case TYP_LONG:
+ case TYP_ULONG:
+ {
+ assert((simdSize == 16) || (simdSize == 32));
+ assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ_VL));
+
+ intrinsic = NI_AVX512DQ_VL_MultiplyLow;
+ break;
+ }
+
case TYP_FLOAT:
{
if (simdSize == 32)
@@ -19696,6 +19779,23 @@ GenTree* Compiler::gtNewSimdBinOpNode(
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_Or;
+
+ if (varTypeIsIntegral(simdBaseType))
+ {
+ intrinsic = NI_AVX512F_Or;
+ }
+ else if (compOpportunisticallyDependsOn(InstructionSet_AVX512DQ))
+ {
+ intrinsic = NI_AVX512DQ_Or;
+ }
+ else
+ {
+ // Since this is a bitwise operation, we can still support it by lying
+ // about the type and doing the operation using a supported instruction
+
+ intrinsic = NI_AVX512F_Or;
+ simdBaseJitType = (simdBaseType == TYP_DOUBLE) ? CORINFO_TYPE_LONG : CORINFO_TYPE_INT;
+ }
}
else if (simdSize == 32)
{
@@ -19715,7 +19815,7 @@ GenTree* Compiler::gtNewSimdBinOpNode(
// about the type and doing the operation using a supported instruction
intrinsic = NI_AVX_Or;
- simdBaseJitType = CORINFO_TYPE_FLOAT;
+ simdBaseJitType = varTypeIsLong(simdBaseType) ? CORINFO_TYPE_DOUBLE : CORINFO_TYPE_FLOAT;
}
}
else if (simdBaseType == TYP_FLOAT)
@@ -19775,6 +19875,23 @@ GenTree* Compiler::gtNewSimdBinOpNode(
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_Xor;
+
+ if (varTypeIsIntegral(simdBaseType))
+ {
+ intrinsic = NI_AVX512F_Xor;
+ }
+ else if (compOpportunisticallyDependsOn(InstructionSet_AVX512DQ))
+ {
+ intrinsic = NI_AVX512DQ_Xor;
+ }
+ else
+ {
+ // Since this is a bitwise operation, we can still support it by lying
+ // about the type and doing the operation using a supported instruction
+
+ intrinsic = NI_AVX512F_Xor;
+ simdBaseJitType = (simdBaseType == TYP_DOUBLE) ? CORINFO_TYPE_LONG : CORINFO_TYPE_INT;
+ }
}
else if (simdSize == 32)
{
@@ -19794,7 +19911,7 @@ GenTree* Compiler::gtNewSimdBinOpNode(
// about the type and doing the operation using a supported instruction
intrinsic = NI_AVX_Xor;
- simdBaseJitType = CORINFO_TYPE_FLOAT;
+ simdBaseJitType = varTypeIsLong(simdBaseType) ? CORINFO_TYPE_DOUBLE : CORINFO_TYPE_FLOAT;
}
}
else if (simdBaseType == TYP_FLOAT)
@@ -21885,6 +22002,10 @@ GenTree* Compiler::gtNewSimdMaxNode(
{
intrinsic = NI_AVX2_Max;
}
+ else if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
+ {
+ intrinsic = NI_AVX512F_VL_Max;
+ }
}
}
else
@@ -21974,7 +22095,6 @@ GenTree* Compiler::gtNewSimdMaxNode(
if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
intrinsic = NI_SSE41_Max;
- break;
}
break;
}
@@ -21982,6 +22102,10 @@ GenTree* Compiler::gtNewSimdMaxNode(
case TYP_LONG:
case TYP_ULONG:
{
+ if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
+ {
+ intrinsic = NI_AVX512F_VL_Max;
+ }
break;
}
@@ -22072,6 +22196,10 @@ GenTree* Compiler::gtNewSimdMinNode(
{
intrinsic = NI_AVX2_Min;
}
+ else if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
+ {
+ intrinsic = NI_AVX512F_VL_Min;
+ }
}
}
else
@@ -22157,7 +22285,6 @@ GenTree* Compiler::gtNewSimdMinNode(
if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
intrinsic = NI_SSE41_Min;
- break;
}
break;
}
@@ -22165,6 +22292,10 @@ GenTree* Compiler::gtNewSimdMinNode(
case TYP_LONG:
case TYP_ULONG:
{
+ if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
+ {
+ intrinsic = NI_AVX512F_VL_Min;
+ }
break;
}
@@ -22496,7 +22627,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(
//
// var tmp1 = Avx.ConvertToVector128Single(op1).ToVector256Unsafe();
// var tmp2 = Avx.ConvertToVector128Single(op2);
- // return Avx.InsertVector128(tmp1, tmp2, 1);
+ // return tmp1.WithUpper(tmp2);
CorInfoType opBaseJitType = CORINFO_TYPE_DOUBLE;
diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
index 274cd7d79df70..ae0879e4ba473 100644
--- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
@@ -1799,13 +1799,20 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node)
case NI_AVX512BW_ConvertToVector256Byte:
case NI_AVX512BW_ConvertToVector256SByte:
{
- // These instructions are RM_R and so we need to ensure the targetReg
- // is passed in as the RM register and op1 is passed as the R register
-
- op1Reg = op1->GetRegNum();
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
- emit->emitIns_R_R(ins, attr, op1Reg, targetReg);
+ if (varTypeIsFloating(baseType))
+ {
+ genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1);
+ }
+ else
+ {
+ // These instructions are RM_R and so we need to ensure the targetReg
+ // is passed in as the RM register and op1 is passed as the R register
+
+ op1Reg = op1->GetRegNum();
+ emit->emitIns_R_R(ins, attr, op1Reg, targetReg);
+ }
break;
}
diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h
index ff7348f40bbb0..3e81d3f3da3f3 100644
--- a/src/coreclr/jit/hwintrinsiclistxarch.h
+++ b/src/coreclr/jit/hwintrinsiclistxarch.h
@@ -790,6 +790,7 @@ HARDWARE_INTRINSIC(AVX2, Xor,
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// AVX512F Intrinsics
+HARDWARE_INTRINSIC(AVX512F, Abs, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pabsd, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512F, Add, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX512F, And, 64, 2, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_vpandq, INS_vpandq, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX512F, AndNot, 64, 2, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
@@ -799,15 +800,21 @@ HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int32,
HARDWARE_INTRINSIC(AVX512F, ConvertToVector128UInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector128UInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Single, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Double, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int32, 64, 1, {INS_invalid, INS_invalid, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt32, 64, 1, {INS_invalid, INS_invalid, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int32, 64, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int64, 64, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Single, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt32, 64, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt64, 64, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F, Divide, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512F, DuplicateEvenIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX512F, DuplicateOddIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX512F, ExtractVector128, 64, 2, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextractf128, INS_vextractf128}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX512F, ExtractVector256, 64, 2, {INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextractf64x4, INS_vextractf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX512F, InsertVector128, 64, 3, {INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinsertf128, INS_vinsertf128}, HW_Category_IMM, HW_Flag_FullRangeIMM)
@@ -815,19 +822,44 @@ HARDWARE_INTRINSIC(AVX512F, InsertVector256,
HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512, 64, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512NonTemporal, 64, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512F, LoadVector512, 64, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(AVX512F, Max, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmaxsd, INS_pmaxud, INS_vpmaxsq, INS_vpmaxuq, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative)
+HARDWARE_INTRINSIC(AVX512F, Min, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pminsd, INS_pminud, INS_vpminsq, INS_vpminuq, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative)
+HARDWARE_INTRINSIC(AVX512F, Multiply, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX512F, MultiplyLow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX512F, Or, 64, 2, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_vporq, INS_vporq, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX512F, ShiftLeftLogical, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX512F, ShiftRightArithmetic, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrad, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX512F, ShiftRightLogical, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX512F, Shuffle, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_NoRMWSemantics|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX512F, Sqrt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX512F, Store, 64, 2, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(AVX512F, StoreAligned, 64, 2, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVX512F, StoreAlignedNonTemporal, 64, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVX512F, Subtract, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512F, UnpackHigh, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512F, UnpackLow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512F, Xor, 64, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-// AVX512F Intrinsics
-HARDWARE_INTRINSIC(AVX512BW, Add, 64, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+// AVX512F.VL Intrinsics
+HARDWARE_INTRINSIC(AVX512F_VL, Abs, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F_VL, Max, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaxsq, INS_vpmaxuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX512F_VL, Min, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpminsq, INS_vpminuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmetic, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// ISA Function name SIMD size NumArg Instructions Category Flags
+// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// AVX512BW Intrinsics
+HARDWARE_INTRINSIC(AVX512BW, Abs, 64, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512BW, Add, 64, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX512BW, AddSaturate, 64, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX512BW, AlignRight, 64, 3, {INS_palignr, INS_palignr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX512BW, Average, 64, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX512BW, BroadcastScalarToVector512, 64, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad)
HARDWARE_INTRINSIC(AVX512BW, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512BW, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
@@ -835,7 +867,47 @@ HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256Byte,
HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256SByte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512BW, ConvertToVector512Int16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512BW, ConvertToVector512UInt16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512BW, Subtract, 64, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512BW, LoadVector512, 64, 1, {INS_vmovdqu8, INS_vmovdqu8, INS_vmovdqu16, INS_vmovdqu16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(AVX512BW, Max, 64, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX512BW, Min, 64, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX512BW, MultiplyAddAdjacent, 64, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512BW, MultiplyHigh, 64, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX512BW, MultiplyHighRoundScale, 64, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512BW, MultiplyLow, 64, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX512BW, PackSignedSaturate, 64, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512BW, PackUnsignedSaturate, 64, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512BW, ShiftLeftLogical, 64, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX512BW, ShiftLeftLogical128BitLane, 64, 2, {INS_pslldq, INS_pslldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX512BW, ShiftRightArithmetic, 64, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX512BW, ShiftRightLogical, 64, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX512BW, ShiftRightLogical128BitLane, 64, 2, {INS_psrldq, INS_psrldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX512BW, Shuffle, 64, 2, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512BW, ShuffleHigh, 64, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX512BW, ShuffleLow, 64, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX512BW, Store, 64, 2, {INS_vmovdqu8, INS_vmovdqu8, INS_vmovdqu16, INS_vmovdqu16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(AVX512BW, Subtract, 64, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512BW, SubtractSaturate, 64, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512BW, SumAbsoluteDifferences, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512BW, UnpackHigh, 64, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512BW, UnpackLow, 64, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// ISA Function name SIMD size NumArg Instructions Category Flags
+// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// AVX512DQ Intrinsics
+HARDWARE_INTRINSIC(AVX512DQ, And, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX512DQ, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512DQ, MultiplyLow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX512DQ, Or, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX512DQ, Xor, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// ISA Function name SIMD size NumArg Instructions Category Flags
+// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// AVX512DQ.VL Intrinsics
+HARDWARE_INTRINSIC(AVX512DQ_VL, MultiplyLow, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index eec393b257dcd..e32befa5c637c 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -548,6 +548,13 @@ GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdT
case NI_AVX2_ShiftLeftLogical:
case NI_AVX2_ShiftRightArithmetic:
case NI_AVX2_ShiftRightLogical:
+ case NI_AVX512F_ShiftLeftLogical:
+ case NI_AVX512F_ShiftRightArithmetic:
+ case NI_AVX512F_ShiftRightLogical:
+ case NI_AVX512F_VL_ShiftRightArithmetic:
+ case NI_AVX512BW_ShiftLeftLogical:
+ case NI_AVX512BW_ShiftRightArithmetic:
+ case NI_AVX512BW_ShiftRightLogical:
{
impSpillSideEffect(true,
verCurrentState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic"));
@@ -1839,6 +1846,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case NI_SSE2_LoadVector128:
case NI_AVX_LoadVector256:
case NI_AVX512F_LoadVector512:
+ case NI_AVX512BW_LoadVector512:
case NI_Vector128_Load:
case NI_Vector256_Load:
case NI_Vector512_Load:
@@ -1961,8 +1969,18 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
if (varTypeIsLong(simdBaseType))
{
- // TODO-XARCH-CQ: We should support long/ulong multiplication
+ assert((simdSize == 16) || (simdSize == 32));
+
+ if (!compOpportunisticallyDependsOn(InstructionSet_AVX512DQ_VL))
+ {
+ // TODO-XARCH-CQ: We should support long/ulong multiplication
+ break;
+ }
+
+#if defined(TARGET_X86)
+ // TODO-XARCH-CQ: We need to support 64-bit CreateBroadcast
break;
+#endif // TARGET_X86
}
CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
@@ -2117,12 +2135,21 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 2);
- if (varTypeIsByte(simdBaseType) || varTypeIsLong(simdBaseType) || (simdBaseType == TYP_DOUBLE))
+ if (varTypeIsByte(simdBaseType))
{
- // byte, sbyte, long, and ulong would require more work to support
+ // byte and sbyte would require more work to support
break;
}
+ if (varTypeIsLong(simdBaseType) || (simdBaseType == TYP_DOUBLE))
+ {
+ if (!compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
+ {
+ // long, ulong, and double would require more work to support
+ break;
+ }
+ }
+
if ((simdSize != 32) || compExactlyDependsOn(InstructionSet_AVX2))
{
genTreeOps op = varTypeIsUnsigned(simdBaseType) ? GT_RSZ : GT_RSH;
@@ -2256,6 +2283,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case NI_SSE2_Store:
case NI_AVX_Store:
case NI_AVX512F_Store:
+ case NI_AVX512BW_Store:
{
assert(retType == TYP_VOID);
assert(sig->numArgs == 2);
diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h
index c16d16b466907..e4cf99d9b17f0 100644
--- a/src/coreclr/jit/instrsxarch.h
+++ b/src/coreclr/jit/instrsxarch.h
@@ -615,13 +615,19 @@ INST3(vextractf64x4, "extractf64x4", IUM_WR, SSE3A(0x1B), BAD_
INST3(vextracti64x4, "extracti64x4", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, INS_TT_TUPLE4, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Extract 256-bit packed quadword integer values
INST3(vinsertf64x4, "insertf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE4, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values
INST3(vinserti64x4, "inserti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE4, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values
-INST3(vmovdqa64, "movdqa64", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_FLAGS_None)
-INST3(vmovdqu64, "movdqu64", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_FLAGS_None)
+INST3(vmovdqa64, "movdqa64", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_64Bit | REX_W1_EVEX | Encoding_EVEX)
+INST3(vmovdqu64, "movdqu64", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_64Bit | REX_W1_EVEX | Encoding_EVEX)
+INST3(vpabsq, "pabsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1F), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX) // Packed absolute value of 64-bit integers
INST3(vpandq, "pandq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs
INST3(vpandnq, "pandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs
+INST3(vpmaxsq, "pmaxsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 64-bit signed integers
+INST3(vpmaxuq, "pmaxuq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 64-bit unsigned integers
+INST3(vpminsq, "pminsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 64-bit signed integers
+INST3(vpminuq, "pminuq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 64-bit unsigned integers
INST3(vpmovdw, "pmovdw", IUM_WR, PSSE38(0xF3, 0x33), BAD_CODE, PSSE38(0xF3, 0x33), INS_TT_HALF_MEM, Input_32Bit | REX_W0_EVEX | Encoding_EVEX)
INST3(vpmovqd, "pmovqd", IUM_WR, PSSE38(0xF3, 0x35), BAD_CODE, PSSE38(0xF3, 0x35), INS_TT_HALF_MEM, Input_64Bit | REX_W0_EVEX | Encoding_EVEX)
INST3(vporq, "porq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs
+INST3(vpsraq, "psraq", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_TT_FULL | INS_TT_MEM128, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 64-bit integers
INST3(vpternlogd, "pternlogd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(vpxorq, "pxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs
@@ -648,8 +654,8 @@ INST3(vpmovwb, "pmovwb", IUM_WR, PSSE38(0xF3, 0x30), BAD_
INST3(kortestb, "kortestb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction)
INST3(kmovb_gpr, "kmovb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction)
INST3(kmovb_msk, "kmovb", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction)
-INST3(vextractf32x8, "extractf32x8", IUM_WR, SSE3A(0x1B), BAD_CODE, BAD_CODE, INS_TT_TUPLE8, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values
-INST3(vextracti32x8, "extracti32x8", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, INS_TT_TUPLE8, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) // Extract 256-bit packed quadword integer values
+INST3(vextractf32x8, "extractf32x8", IUM_WR, SSE3A(0x1B), BAD_CODE, BAD_CODE, INS_TT_TUPLE8, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values
+INST3(vextracti32x8, "extracti32x8", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, INS_TT_TUPLE8, Input_32Bit | REX_W0_EVEX | Encoding_EVEX) // Extract 256-bit packed quadword integer values
INST3(vinsertf32x8, "insertf32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE8, Input_32Bit | REX_W0_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values
INST3(vinserti32x8, "inserti32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE8, Input_32Bit | REX_W0_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values
INST3(vpcmpd, "pcmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask)
@@ -660,6 +666,8 @@ INST3(vpmovd2m, "pmovd2m", IUM_WR, BAD_CODE, BAD_
INST3(vpmovm2d, "pmovm2d", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x38), INS_TT_NONE, Input_32Bit | REX_W0_EVEX | Encoding_EVEX)
INST3(vpmovm2q, "pmovm2q", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x38), INS_TT_NONE, Input_64Bit | REX_W1_EVEX | Encoding_EVEX)
INST3(vpmovq2m, "pmovq2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x39), INS_TT_NONE, Input_64Bit | REX_W1_EVEX | Encoding_EVEX)
+INST3(vpmullq, "pmullq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 64 bit unsigned integers and store lower 64 bits of each result
+
INST3(LAST_AVX512_INSTRUCTION, "LAST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index 2abdb461ba7f5..cde2cee13abd5 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -6835,6 +6835,8 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
case NI_AVX_DotProduct:
case NI_AVX_Permute:
case NI_AVX_Permute2x128:
+ case NI_AVX_Shuffle:
+ case NI_AVX2_AlignRight:
case NI_AVX2_Blend:
case NI_AVX2_MultipleSumAbsoluteDifferences:
case NI_AVX2_Permute2x128:
@@ -6842,8 +6844,20 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
case NI_AVX2_ShiftLeftLogical:
case NI_AVX2_ShiftRightArithmetic:
case NI_AVX2_ShiftRightLogical:
+ case NI_AVX2_Shuffle:
case NI_AVX2_ShuffleHigh:
case NI_AVX2_ShuffleLow:
+ case NI_AVX512F_ShiftLeftLogical:
+ case NI_AVX512F_ShiftRightArithmetic:
+ case NI_AVX512F_ShiftRightLogical:
+ case NI_AVX512F_Shuffle:
+ case NI_AVX512F_VL_ShiftRightArithmetic:
+ case NI_AVX512BW_AlignRight:
+ case NI_AVX512BW_ShiftLeftLogical:
+ case NI_AVX512BW_ShiftRightArithmetic:
+ case NI_AVX512BW_ShiftRightLogical:
+ case NI_AVX512BW_ShuffleHigh:
+ case NI_AVX512BW_ShuffleLow:
{
assert(!supportsSIMDScalarLoads);
@@ -7509,6 +7523,13 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_AVX2_ShiftLeftLogical:
case NI_AVX2_ShiftRightArithmetic:
case NI_AVX2_ShiftRightLogical:
+ case NI_AVX512F_ShiftLeftLogical:
+ case NI_AVX512F_ShiftRightArithmetic:
+ case NI_AVX512F_ShiftRightLogical:
+ case NI_AVX512F_VL_ShiftRightArithmetic:
+ case NI_AVX512BW_ShiftLeftLogical:
+ case NI_AVX512BW_ShiftRightArithmetic:
+ case NI_AVX512BW_ShiftRightLogical:
{
// These intrinsics can have op2 be imm or reg/mem
@@ -7526,13 +7547,37 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}
+ case NI_AVX2_Shuffle:
+ {
+ if (varTypeIsByte(simdBaseType))
+ {
+ // byte and sbyte are: pshufb ymm1, ymm2, ymm3/m256
+ assert(!isCommutative);
+
+ bool supportsRegOptional = false;
+
+ if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional))
+ {
+ MakeSrcContained(node, op2);
+ }
+ else if (supportsRegOptional)
+ {
+ MakeSrcRegOptional(node, op2);
+ }
+ break;
+ }
+ FALLTHROUGH;
+ }
+
case NI_SSE2_Shuffle:
case NI_SSE2_ShuffleHigh:
case NI_SSE2_ShuffleLow:
case NI_AVX2_Permute4x64:
- case NI_AVX2_Shuffle:
case NI_AVX2_ShuffleHigh:
case NI_AVX2_ShuffleLow:
+ case NI_AVX512F_Shuffle:
+ case NI_AVX512BW_ShuffleHigh:
+ case NI_AVX512BW_ShuffleLow:
{
// These intrinsics have op2 as an imm and op1 as a reg/mem
@@ -7601,6 +7646,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_SSE2_ShiftRightLogical128BitLane:
case NI_AVX2_ShiftLeftLogical128BitLane:
case NI_AVX2_ShiftRightLogical128BitLane:
+ case NI_AVX512BW_ShiftLeftLogical128BitLane:
+ case NI_AVX512BW_ShiftRightLogical128BitLane:
{
#if DEBUG
// These intrinsics should have been marked contained by the general-purpose handling
@@ -7852,6 +7899,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_AVX2_MultipleSumAbsoluteDifferences:
case NI_AVX2_Permute2x128:
case NI_AVX512F_InsertVector256:
+ case NI_AVX512F_Shuffle:
+ case NI_AVX512BW_AlignRight:
case NI_PCLMULQDQ_CarrylessMultiply:
{
if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional))
diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp
index 45f6c02fc0929..9cd7240d81e7b 100644
--- a/src/coreclr/jit/simdashwintrinsic.cpp
+++ b/src/coreclr/jit/simdashwintrinsic.cpp
@@ -772,6 +772,47 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
}
#endif // TARGET_XARCH
+#if defined(TARGET_XARCH)
+ case NI_VectorT128_Multiply:
+ case NI_VectorT128_op_Multiply:
+ case NI_VectorT256_Multiply:
+ case NI_VectorT256_op_Multiply:
+ {
+ if (varTypeIsLong(simdBaseType))
+ {
+ if (!compOpportunisticallyDependsOn(InstructionSet_AVX512DQ_VL))
+ {
+ // TODO-XARCH-CQ: We should support long/ulong multiplication
+ return nullptr;
+ }
+
+#if defined(TARGET_X86)
+ // TODO-XARCH-CQ: We need to support 64-bit CreateBroadcast
+ return nullptr;
+#endif // TARGET_X86
+ }
+ break;
+ }
+#endif // TARGET_XARCH
+
+#if defined(TARGET_XARCH)
+ case NI_VectorT128_ShiftRightArithmetic:
+ case NI_VectorT128_op_RightShift:
+ case NI_VectorT256_ShiftRightArithmetic:
+ case NI_VectorT256_op_RightShift:
+ {
+ if (varTypeIsLong(simdBaseType) || (simdBaseType == TYP_DOUBLE))
+ {
+ if (!compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
+ {
+ // TODO-XARCH-CQ: We should support long/ulong arithmetic shift
+ return nullptr;
+ }
+ }
+ break;
+ }
+#endif // TARGET_XARCH
+
default:
{
// Most intrinsics have some path that works even if only SSE2/AdvSimd is available
diff --git a/src/coreclr/jit/simdashwintrinsiclistxarch.h b/src/coreclr/jit/simdashwintrinsiclistxarch.h
index 397483a98c12b..5756db4798648 100644
--- a/src/coreclr/jit/simdashwintrinsiclistxarch.h
+++ b/src/coreclr/jit/simdashwintrinsiclistxarch.h
@@ -251,7 +251,7 @@ SIMD_AS_HWINTRINSIC_ID(VectorT128, LoadUnsafe,
SIMD_AS_HWINTRINSIC_NM(VectorT128, LoadUnsafeIndex, "LoadUnsafe", 2, {NI_VectorT128_LoadUnsafeIndex, NI_VectorT128_LoadUnsafeIndex, NI_VectorT128_LoadUnsafeIndex, NI_VectorT128_LoadUnsafeIndex, NI_VectorT128_LoadUnsafeIndex, NI_VectorT128_LoadUnsafeIndex, NI_VectorT128_LoadUnsafeIndex, NI_VectorT128_LoadUnsafeIndex, NI_VectorT128_LoadUnsafeIndex, NI_VectorT128_LoadUnsafeIndex}, SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet)
SIMD_AS_HWINTRINSIC_ID(VectorT128, Max, 2, {NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, Min, 2, {NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT128, Multiply, 2, {NI_Illegal, NI_Illegal, NI_VectorT128_Multiply, NI_VectorT128_Multiply, NI_VectorT128_Multiply, NI_VectorT128_Multiply, NI_Illegal, NI_Illegal, NI_VectorT128_Multiply, NI_VectorT128_Multiply}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128, Multiply, 2, {NI_Illegal, NI_Illegal, NI_VectorT128_Multiply, NI_VectorT128_Multiply, NI_VectorT128_Multiply, NI_VectorT128_Multiply, NI_VectorT128_Multiply, NI_VectorT128_Multiply, NI_VectorT128_Multiply, NI_VectorT128_Multiply}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, Narrow, 2, {NI_VectorT128_Narrow, NI_VectorT128_Narrow, NI_VectorT128_Narrow, NI_VectorT128_Narrow, NI_VectorT128_Narrow, NI_VectorT128_Narrow, NI_VectorT128_Narrow, NI_VectorT128_Narrow, NI_VectorT128_Narrow, NI_VectorT128_Narrow}, SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet)
SIMD_AS_HWINTRINSIC_ID(VectorT128, Negate, 1, {NI_VectorT128_Negate, NI_VectorT128_Negate, NI_VectorT128_Negate, NI_VectorT128_Negate, NI_VectorT128_Negate, NI_VectorT128_Negate, NI_VectorT128_Negate, NI_VectorT128_Negate, NI_VectorT128_Negate, NI_VectorT128_Negate}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, OnesComplement, 1, {NI_VectorT128_OnesComplement, NI_VectorT128_OnesComplement, NI_VectorT128_OnesComplement, NI_VectorT128_OnesComplement, NI_VectorT128_OnesComplement, NI_VectorT128_OnesComplement, NI_VectorT128_OnesComplement, NI_VectorT128_OnesComplement, NI_VectorT128_OnesComplement, NI_VectorT128_OnesComplement}, SimdAsHWIntrinsicFlag::None)
@@ -266,13 +266,13 @@ SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Inequality,
SIMD_AS_HWINTRINSIC_ID(VectorT128, op_LeftShift, 2, {NI_Illegal, NI_Illegal, NI_VectorT128_op_LeftShift, NI_VectorT128_op_LeftShift, NI_VectorT128_op_LeftShift, NI_VectorT128_op_LeftShift, NI_VectorT128_op_LeftShift, NI_VectorT128_op_LeftShift, NI_VectorT128_op_LeftShift, NI_VectorT128_op_LeftShift}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_VectorT128_op_Multiply, NI_VectorT128_op_Multiply, NI_VectorT128_op_Multiply, NI_VectorT128_op_Multiply, NI_Illegal, NI_Illegal, NI_VectorT128_op_Multiply, NI_VectorT128_op_Multiply}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, op_OnesComplement, 2, {NI_VectorT128_op_OnesComplement, NI_VectorT128_op_OnesComplement, NI_VectorT128_op_OnesComplement, NI_VectorT128_op_OnesComplement, NI_VectorT128_op_OnesComplement, NI_VectorT128_op_OnesComplement, NI_VectorT128_op_OnesComplement, NI_VectorT128_op_OnesComplement, NI_VectorT128_op_OnesComplement, NI_VectorT128_op_OnesComplement}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT128, op_RightShift, 2, {NI_Illegal, NI_Illegal, NI_VectorT128_op_RightShift, NI_VectorT128_op_RightShift, NI_VectorT128_op_RightShift, NI_VectorT128_op_RightShift, NI_Illegal, NI_Illegal, NI_VectorT128_op_RightShift, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128, op_RightShift, 2, {NI_Illegal, NI_Illegal, NI_VectorT128_op_RightShift, NI_VectorT128_op_RightShift, NI_VectorT128_op_RightShift, NI_VectorT128_op_RightShift, NI_VectorT128_op_RightShift, NI_VectorT128_op_RightShift, NI_VectorT128_op_RightShift, NI_VectorT128_op_RightShift}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Subtraction, 2, {NI_VectorT128_op_Subtraction, NI_VectorT128_op_Subtraction, NI_VectorT128_op_Subtraction, NI_VectorT128_op_Subtraction, NI_VectorT128_op_Subtraction, NI_VectorT128_op_Subtraction, NI_VectorT128_op_Subtraction, NI_VectorT128_op_Subtraction, NI_VectorT128_op_Subtraction, NI_VectorT128_op_Subtraction}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, op_UnaryNegation, 1, {NI_VectorT128_op_UnaryNegation, NI_VectorT128_op_UnaryNegation, NI_VectorT128_op_UnaryNegation, NI_VectorT128_op_UnaryNegation, NI_VectorT128_op_UnaryNegation, NI_VectorT128_op_UnaryNegation, NI_VectorT128_op_UnaryNegation, NI_VectorT128_op_UnaryNegation, NI_VectorT128_op_UnaryNegation, NI_VectorT128_op_UnaryNegation}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, op_UnaryPlus, 1, {NI_VectorT128_op_UnaryPlus, NI_VectorT128_op_UnaryPlus, NI_VectorT128_op_UnaryPlus, NI_VectorT128_op_UnaryPlus, NI_VectorT128_op_UnaryPlus, NI_VectorT128_op_UnaryPlus, NI_VectorT128_op_UnaryPlus, NI_VectorT128_op_UnaryPlus, NI_VectorT128_op_UnaryPlus, NI_VectorT128_op_UnaryPlus}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, op_UnsignedRightShift, 2, {NI_Illegal, NI_Illegal, NI_VectorT128_op_UnsignedRightShift, NI_VectorT128_op_UnsignedRightShift, NI_VectorT128_op_UnsignedRightShift, NI_VectorT128_op_UnsignedRightShift, NI_VectorT128_op_UnsignedRightShift, NI_VectorT128_op_UnsignedRightShift, NI_VectorT128_op_UnsignedRightShift, NI_VectorT128_op_UnsignedRightShift}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, ShiftLeft, 2, {NI_Illegal, NI_Illegal, NI_VectorT128_ShiftLeft, NI_VectorT128_ShiftLeft, NI_VectorT128_ShiftLeft, NI_VectorT128_ShiftLeft, NI_VectorT128_ShiftLeft, NI_VectorT128_ShiftLeft, NI_Illegal, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT128, ShiftRightArithmetic, 2, {NI_Illegal, NI_Illegal, NI_VectorT128_ShiftRightArithmetic, NI_Illegal, NI_VectorT128_ShiftRightArithmetic, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128, ShiftRightArithmetic, 2, {NI_Illegal, NI_Illegal, NI_VectorT128_ShiftRightArithmetic, NI_Illegal, NI_VectorT128_ShiftRightArithmetic, NI_Illegal, NI_VectorT128_ShiftRightArithmetic, NI_Illegal, NI_Illegal, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, ShiftRightLogical, 2, {NI_Illegal, NI_Illegal, NI_VectorT128_ShiftRightLogical, NI_VectorT128_ShiftRightLogical, NI_VectorT128_ShiftRightLogical, NI_VectorT128_ShiftRightLogical, NI_VectorT128_ShiftRightLogical, NI_VectorT128_ShiftRightLogical, NI_Illegal, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_NM(VectorT128, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT128_Sqrt, NI_VectorT128_Sqrt}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, Store, 2, {NI_VectorT128_Store, NI_VectorT128_Store, NI_VectorT128_Store, NI_VectorT128_Store, NI_VectorT128_Store, NI_VectorT128_Store, NI_VectorT128_Store, NI_VectorT128_Store, NI_VectorT128_Store, NI_VectorT128_Store}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
@@ -350,7 +350,7 @@ SIMD_AS_HWINTRINSIC_ID(VectorT256, LoadUnsafe,
SIMD_AS_HWINTRINSIC_NM(VectorT256, LoadUnsafeIndex, "LoadUnsafe", 2, {NI_VectorT256_LoadUnsafeIndex, NI_VectorT256_LoadUnsafeIndex, NI_VectorT256_LoadUnsafeIndex, NI_VectorT256_LoadUnsafeIndex, NI_VectorT256_LoadUnsafeIndex, NI_VectorT256_LoadUnsafeIndex, NI_VectorT256_LoadUnsafeIndex, NI_VectorT256_LoadUnsafeIndex, NI_VectorT256_LoadUnsafeIndex, NI_VectorT256_LoadUnsafeIndex}, SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet)
SIMD_AS_HWINTRINSIC_ID(VectorT256, Min, 2, {NI_VectorT256_Min, NI_VectorT256_Min, NI_VectorT256_Min, NI_VectorT256_Min, NI_VectorT256_Min, NI_VectorT256_Min, NI_VectorT256_Min, NI_VectorT256_Min, NI_VectorT256_Min, NI_VectorT256_Min}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, Max, 2, {NI_VectorT256_Max, NI_VectorT256_Max, NI_VectorT256_Max, NI_VectorT256_Max, NI_VectorT256_Max, NI_VectorT256_Max, NI_VectorT256_Max, NI_VectorT256_Max, NI_VectorT256_Max, NI_VectorT256_Max}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT256, Multiply, 2, {NI_Illegal, NI_Illegal, NI_VectorT256_Multiply, NI_VectorT256_Multiply, NI_VectorT256_Multiply, NI_VectorT256_Multiply, NI_Illegal, NI_Illegal, NI_VectorT256_Multiply, NI_VectorT256_Multiply}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT256, Multiply, 2, {NI_Illegal, NI_Illegal, NI_VectorT256_Multiply, NI_VectorT256_Multiply, NI_VectorT256_Multiply, NI_VectorT256_Multiply, NI_VectorT256_Multiply, NI_VectorT256_Multiply, NI_VectorT256_Multiply, NI_VectorT256_Multiply}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, Narrow, 2, {NI_VectorT256_Narrow, NI_VectorT256_Narrow, NI_VectorT256_Narrow, NI_VectorT256_Narrow, NI_VectorT256_Narrow, NI_VectorT256_Narrow, NI_VectorT256_Narrow, NI_VectorT256_Narrow, NI_VectorT256_Narrow, NI_VectorT256_Narrow}, SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet)
SIMD_AS_HWINTRINSIC_ID(VectorT256, Negate, 1, {NI_VectorT256_Negate, NI_VectorT256_Negate, NI_VectorT256_Negate, NI_VectorT256_Negate, NI_VectorT256_Negate, NI_VectorT256_Negate, NI_VectorT256_Negate, NI_VectorT256_Negate, NI_VectorT256_Negate, NI_VectorT256_Negate}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, OnesComplement, 2, {NI_VectorT256_OnesComplement, NI_VectorT256_OnesComplement, NI_VectorT256_OnesComplement, NI_VectorT256_OnesComplement, NI_VectorT256_OnesComplement, NI_VectorT256_OnesComplement, NI_VectorT256_OnesComplement, NI_VectorT256_OnesComplement, NI_VectorT256_OnesComplement, NI_VectorT256_OnesComplement}, SimdAsHWIntrinsicFlag::None)
@@ -365,13 +365,13 @@ SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Inequality,
SIMD_AS_HWINTRINSIC_ID(VectorT256, op_LeftShift, 2, {NI_Illegal, NI_Illegal, NI_VectorT256_op_LeftShift, NI_VectorT256_op_LeftShift, NI_VectorT256_op_LeftShift, NI_VectorT256_op_LeftShift, NI_VectorT256_op_LeftShift, NI_VectorT256_op_LeftShift, NI_VectorT256_op_LeftShift, NI_VectorT256_op_LeftShift}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_VectorT256_op_Multiply, NI_VectorT256_op_Multiply, NI_VectorT256_op_Multiply, NI_VectorT256_op_Multiply, NI_Illegal, NI_Illegal, NI_VectorT256_op_Multiply, NI_VectorT256_op_Multiply}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, op_OnesComplement, 1, {NI_VectorT256_op_OnesComplement, NI_VectorT256_op_OnesComplement, NI_VectorT256_op_OnesComplement, NI_VectorT256_op_OnesComplement, NI_VectorT256_op_OnesComplement, NI_VectorT256_op_OnesComplement, NI_VectorT256_op_OnesComplement, NI_VectorT256_op_OnesComplement, NI_VectorT256_op_OnesComplement, NI_VectorT256_op_OnesComplement}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT256, op_RightShift, 2, {NI_Illegal, NI_Illegal, NI_VectorT256_op_RightShift, NI_VectorT256_op_RightShift, NI_VectorT256_op_RightShift, NI_VectorT256_op_RightShift, NI_Illegal, NI_Illegal, NI_VectorT256_op_RightShift, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT256, op_RightShift, 2, {NI_Illegal, NI_Illegal, NI_VectorT256_op_RightShift, NI_VectorT256_op_RightShift, NI_VectorT256_op_RightShift, NI_VectorT256_op_RightShift, NI_VectorT256_op_RightShift, NI_VectorT256_op_RightShift, NI_VectorT256_op_RightShift, NI_VectorT256_op_RightShift}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Subtraction, 2, {NI_VectorT256_op_Subtraction, NI_VectorT256_op_Subtraction, NI_VectorT256_op_Subtraction, NI_VectorT256_op_Subtraction, NI_VectorT256_op_Subtraction, NI_VectorT256_op_Subtraction, NI_VectorT256_op_Subtraction, NI_VectorT256_op_Subtraction, NI_VectorT256_op_Subtraction, NI_VectorT256_op_Subtraction}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, op_UnaryNegation, 1, {NI_VectorT256_op_UnaryNegation, NI_VectorT256_op_UnaryNegation, NI_VectorT256_op_UnaryNegation, NI_VectorT256_op_UnaryNegation, NI_VectorT256_op_UnaryNegation, NI_VectorT256_op_UnaryNegation, NI_VectorT256_op_UnaryNegation, NI_VectorT256_op_UnaryNegation, NI_VectorT256_op_UnaryNegation, NI_VectorT256_op_UnaryNegation}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, op_UnaryPlus, 1, {NI_VectorT256_op_UnaryPlus, NI_VectorT256_op_UnaryPlus, NI_VectorT256_op_UnaryPlus, NI_VectorT256_op_UnaryPlus, NI_VectorT256_op_UnaryPlus, NI_VectorT256_op_UnaryPlus, NI_VectorT256_op_UnaryPlus, NI_VectorT256_op_UnaryPlus, NI_VectorT256_op_UnaryPlus, NI_VectorT256_op_UnaryPlus}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, op_UnsignedRightShift, 2, {NI_Illegal, NI_Illegal, NI_VectorT256_op_UnsignedRightShift, NI_VectorT256_op_UnsignedRightShift, NI_VectorT256_op_UnsignedRightShift, NI_VectorT256_op_UnsignedRightShift, NI_VectorT256_op_UnsignedRightShift, NI_VectorT256_op_UnsignedRightShift, NI_VectorT256_op_UnsignedRightShift, NI_VectorT256_op_UnsignedRightShift}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, ShiftLeft, 2, {NI_Illegal, NI_Illegal, NI_VectorT256_ShiftLeft, NI_VectorT256_ShiftLeft, NI_VectorT256_ShiftLeft, NI_VectorT256_ShiftLeft, NI_VectorT256_ShiftLeft, NI_VectorT256_ShiftLeft, NI_Illegal, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT256, ShiftRightArithmetic, 2, {NI_Illegal, NI_Illegal, NI_VectorT256_ShiftRightArithmetic, NI_Illegal, NI_VectorT256_ShiftRightArithmetic, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT256, ShiftRightArithmetic, 2, {NI_Illegal, NI_Illegal, NI_VectorT256_ShiftRightArithmetic, NI_Illegal, NI_VectorT256_ShiftRightArithmetic, NI_Illegal, NI_VectorT256_ShiftRightArithmetic, NI_Illegal, NI_Illegal, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, ShiftRightLogical, 2, {NI_Illegal, NI_Illegal, NI_VectorT256_ShiftRightLogical, NI_VectorT256_ShiftRightLogical, NI_VectorT256_ShiftRightLogical, NI_VectorT256_ShiftRightLogical, NI_VectorT256_ShiftRightLogical, NI_VectorT256_ShiftRightLogical, NI_Illegal, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_NM(VectorT256, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT256_Sqrt, NI_VectorT256_Sqrt}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, Store, 2, {NI_VectorT256_Store, NI_VectorT256_Store, NI_VectorT256_Store, NI_VectorT256_Store, NI_VectorT256_Store, NI_VectorT256_Store, NI_VectorT256_Store, NI_VectorT256_Store, NI_VectorT256_Store, NI_VectorT256_Store}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp
index 263c2accb11f0..18c4d946c2736 100644
--- a/src/coreclr/jit/valuenum.cpp
+++ b/src/coreclr/jit/valuenum.cpp
@@ -7049,6 +7049,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type,
case NI_SSE2_Add:
case NI_AVX_Add:
case NI_AVX2_Add:
+ case NI_AVX512F_Add:
+ case NI_AVX512BW_Add:
#endif
{
return EvaluateBinarySimd(this, GT_ADD, /* scalar */ false, type, baseType, arg0VN, arg1VN);
@@ -7098,6 +7100,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type,
case NI_SSE_Divide:
case NI_SSE2_Divide:
case NI_AVX_Divide:
+ case NI_AVX512F_Divide:
#endif
{
return EvaluateBinarySimd(this, GT_DIV, /* scalar */ false, type, baseType, arg0VN, arg1VN);
@@ -7179,6 +7182,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type,
#else
case NI_SSE2_ShiftLeftLogical:
case NI_AVX2_ShiftLeftLogical:
+ case NI_AVX512F_ShiftLeftLogical:
+ case NI_AVX512BW_ShiftLeftLogical:
#endif
{
return EvaluateBinarySimd(this, GT_LSH, /* scalar */ false, type, baseType, arg0VN, arg1VN);
@@ -7189,6 +7194,9 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type,
#else
case NI_SSE2_ShiftRightArithmetic:
case NI_AVX2_ShiftRightArithmetic:
+ case NI_AVX512F_ShiftRightArithmetic:
+ case NI_AVX512F_VL_ShiftRightArithmetic:
+ case NI_AVX512BW_ShiftRightArithmetic:
#endif
{
return EvaluateBinarySimd(this, GT_RSH, /* scalar */ false, type, baseType, arg0VN, arg1VN);
@@ -7199,6 +7207,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type,
#else
case NI_SSE2_ShiftRightLogical:
case NI_AVX2_ShiftRightLogical:
+ case NI_AVX512F_ShiftRightLogical:
+ case NI_AVX512BW_ShiftRightLogical:
#endif
{
return EvaluateBinarySimd(this, GT_RSZ, /* scalar */ false, type, baseType, arg0VN, arg1VN);
@@ -7229,6 +7239,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type,
case NI_SSE2_Subtract:
case NI_AVX_Subtract:
case NI_AVX2_Subtract:
+ case NI_AVX512F_Subtract:
+ case NI_AVX512BW_Subtract:
#endif
{
return EvaluateBinarySimd(this, GT_SUB, /* scalar */ false, type, baseType, arg0VN, arg1VN);
@@ -7272,6 +7284,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type,
case NI_SSE2_Add:
case NI_AVX_Add:
case NI_AVX2_Add:
+ case NI_AVX512F_Add:
+ case NI_AVX512BW_Add:
#endif
{
if (varTypeIsFloating(baseType))
@@ -7364,6 +7378,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type,
case NI_SSE_Divide:
case NI_SSE2_Divide:
case NI_AVX_Divide:
+ case NI_AVX512F_Divide:
#endif
{
// Handle `x / 1 == x`.
@@ -7398,6 +7413,11 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type,
case NI_SSE41_MultiplyLow:
case NI_AVX_Multiply:
case NI_AVX2_MultiplyLow:
+ case NI_AVX512F_Multiply:
+ case NI_AVX512F_MultiplyLow:
+ case NI_AVX512BW_MultiplyLow:
+ case NI_AVX512DQ_MultiplyLow:
+ case NI_AVX512DQ_VL_MultiplyLow:
#endif
{
if (!varTypeIsFloating(baseType))
@@ -7470,6 +7490,13 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type,
case NI_AVX2_ShiftLeftLogical:
case NI_AVX2_ShiftRightArithmetic:
case NI_AVX2_ShiftRightLogical:
+ case NI_AVX512F_ShiftLeftLogical:
+ case NI_AVX512F_ShiftRightArithmetic:
+ case NI_AVX512F_ShiftRightLogical:
+ case NI_AVX512F_VL_ShiftRightArithmetic:
+ case NI_AVX512BW_ShiftLeftLogical:
+ case NI_AVX512BW_ShiftRightArithmetic:
+ case NI_AVX512BW_ShiftRightLogical:
#endif
{
// Handle `x << 0 == x` and `0 << x == 0`
@@ -7492,6 +7519,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type,
case NI_SSE2_Subtract:
case NI_AVX_Subtract:
case NI_AVX2_Subtract:
+ case NI_AVX512F_Subtract:
+ case NI_AVX512BW_Subtract:
#endif
{
if (varTypeIsFloating(baseType))
@@ -7584,6 +7613,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type,
case NI_SSE2_Subtract:
case NI_AVX_Subtract:
case NI_AVX2_Subtract:
+ case NI_AVX512F_Subtract:
+ case NI_AVX512BW_Subtract:
#endif
{
if (varTypeIsFloating(baseType))
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.PlatformNotSupported.cs
index 31ae3725d46fb..0d93c5f5d6c4f 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.PlatformNotSupported.cs
@@ -29,5 +29,447 @@ internal X64() { }
public static new bool IsSupported { [Intrinsic] get { return false; } }
}
+
+ ///
+ /// __m512i _mm512_abs_epi8 (__m512i a)
+ /// VPABSB zmm1 {k1}{z}, zmm2/m512
+ ///
+ public static Vector512 Abs(Vector512 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_abs_epi16 (__m512i a)
+ /// VPABSW zmm1 {k1}{z}, zmm2/m512
+ ///
+ public static Vector512 Abs(Vector512 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_add_epi8 (__m512i a, __m512i b)
+ /// VPADDB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Add(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_add_epi8 (__m512i a, __m512i b)
+ /// VPADDB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Add(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_add_epi16 (__m512i a, __m512i b)
+ /// VPADDW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Add(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_add_epi16 (__m512i a, __m512i b)
+ /// VPADDW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Add(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_adds_epi8 (__m512i a, __m512i b)
+ /// VPADDSB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 AddSaturate(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_adds_epu8 (__m512i a, __m512i b)
+ /// VPADDUSB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 AddSaturate(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_adds_epi16 (__m512i a, __m512i b)
+ /// VPADDSW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 AddSaturate(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_adds_epu16 (__m512i a, __m512i b)
+ /// VPADDUSW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 AddSaturate(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_alignr_epi8 (__m512i a, __m512i b, const int count)
+ /// VPALIGNR zmm1 {k1}{z}, zmm2, zmm3/m512, imm8
+ ///
+ public static Vector512 AlignRight(Vector512 left, Vector512 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_alignr_epi8 (__m512i a, __m512i b, const int count)
+ /// VPALIGNR zmm1 {k1}{z}, zmm2, zmm3/m512, imm8
+ ///
+ public static Vector512 AlignRight(Vector512 left, Vector512 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_avg_epu8 (__m512i a, __m512i b)
+ /// VPAVGB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Average(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_avg_epu16 (__m512i a, __m512i b)
+ /// VPAVGW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Average(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_cvtepi8_epi16 (__m128i a)
+ /// VPMOVSXBW zmm1 {k1}{z}, ymm2/m256
+ ///
+ public static Vector512 ConvertToVector512Int16(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_cvtepu8_epi16 (__m128i a)
+ /// VPMOVZXBW zmm1 {k1}{z}, ymm2/m256
+ ///
+ public static Vector512 ConvertToVector512Int16(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_cvtepi8_epi16 (__m128i a)
+ /// VPMOVSXBW zmm1 {k1}{z}, ymm2/m256
+ ///
+ public static Vector512 ConvertToVector512UInt16(Vector256 value) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_cvtepu8_epi16 (__m128i a)
+ /// VPMOVZXBW zmm1 {k1}{z}, ymm2/m256
+ ///
+ public static Vector512 ConvertToVector512UInt16(Vector256 value) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_loadu_epi8 (__m512i const * mem_addr)
+ /// VMOVDQU8 zmm1 {k1}{z}, m512
+ ///
+ public static new unsafe Vector512 LoadVector512(sbyte* address) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_loadu_epi8 (__m512i const * mem_addr)
+ /// VMOVDQU8 zmm1 {k1}{z}, m512
+ ///
+ public static new unsafe Vector512 LoadVector512(byte* address) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_loadu_epi16 (__m512i const * mem_addr)
+ /// VMOVDQU16 zmm1 {k1}{z}, m512
+ ///
+ public static new unsafe Vector512 LoadVector512(short* address) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_loadu_epi16 (__m512i const * mem_addr)
+ /// VMOVDQU16 zmm1 {k1}{z}, m512
+ ///
+ public static new unsafe Vector512 LoadVector512(ushort* address) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_max_epi8 (__m512i a, __m512i b)
+ /// VPMAXSB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Max(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_max_epu8 (__m512i a, __m512i b)
+ /// VPMAXUB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Max(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_max_epi16 (__m512i a, __m512i b)
+ /// VPMAXSW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Max(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_max_epu16 (__m512i a, __m512i b)
+ /// VPMAXUW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Max(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_min_epi8 (__m512i a, __m512i b)
+ /// VPMINSB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Min(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_min_epu8 (__m512i a, __m512i b)
+ /// VPMINUB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Min(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_min_epi16 (__m512i a, __m512i b)
+ /// VPMINSW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Min(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_min_epu16 (__m512i a, __m512i b)
+ /// VPMINUW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Min(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_madd_epi16 (__m512i a, __m512i b)
+ /// VPMADDWD zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 MultiplyAddAdjacent(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_maddubs_epi16 (__m512i a, __m512i b)
+ /// VPMADDUBSW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 MultiplyAddAdjacent(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_mulhi_epi16 (__m512i a, __m512i b)
+ /// VPMULHW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 MultiplyHigh(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_mulhi_epu16 (__m512i a, __m512i b)
+ /// VPMULHUW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 MultiplyHigh(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_mulhrs_epi16 (__m512i a, __m512i b)
+ /// VPMULHRSW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 MultiplyHighRoundScale(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_mullo_epi16 (__m512i a, __m512i b)
+ /// VPMULLW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 MultiplyLow(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_mullo_epi16 (__m512i a, __m512i b)
+ /// VPMULLW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 MultiplyLow(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_packs_epi16 (__m512i a, __m512i b)
+ /// VPACKSSWB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 PackSignedSaturate(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_packs_epi32 (__m512i a, __m512i b)
+ /// VPACKSSDW zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst
+ ///
+ public static Vector512 PackSignedSaturate(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_packus_epi16 (__m512i a, __m512i b)
+ /// VPACKUSWB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 PackUnsignedSaturate(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_packus_epi32 (__m512i a, __m512i b)
+ /// VPACKUSDW zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst
+ ///
+ public static Vector512 PackUnsignedSaturate(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_sll_epi16 (__m512i a, __m128i count)
+ /// VPSLLW zmm1 {k1}{z}, zmm2, xmm3/m128
+ ///
+ public static Vector512 ShiftLeftLogical(Vector512 value, Vector128 count) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_sll_epi16 (__m512i a, __m128i count)
+ /// VPSLLW zmm1 {k1}{z}, zmm2, xmm3/m128
+ ///
+ public static Vector512 ShiftLeftLogical(Vector512 value, Vector128 count) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_slli_epi16 (__m512i a, int imm8)
+ /// VPSLLW zmm1 {k1}{z}, zmm2, imm8
+ ///
+ public static Vector512 ShiftLeftLogical(Vector512 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_slli_epi16 (__m512i a, int imm8)
+ /// VPSLLW zmm1 {k1}{z}, zmm2, imm8
+ ///
+ public static Vector512 ShiftLeftLogical(Vector512 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_bslli_epi128 (__m512i a, const int imm8)
+ /// VPSLLDQ zmm1, zmm2/m512, imm8
+ ///
+ public static Vector512 ShiftLeftLogical128BitLane(Vector512 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_bslli_epi128 (__m512i a, const int imm8)
+ /// VPSLLDQ zmm1, zmm2/m512, imm8
+ ///
+ public static Vector512 ShiftLeftLogical128BitLane(Vector512 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// _mm512_sra_epi16 (__m512i a, __m128i count)
+ /// VPSRAW zmm1 {k1}{z}, zmm2, xmm3/m128
+ ///
+ public static Vector512 ShiftRightArithmetic(Vector512 value, Vector128 count) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_srai_epi16 (__m512i a, int imm8)
+ /// VPSRAW zmm1 {k1}{z}, zmm2, imm8
+ ///
+ public static Vector512 ShiftRightArithmetic(Vector512 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_srl_epi16 (__m512i a, __m128i count)
+ /// VPSRLW zmm1 {k1}{z}, zmm2, xmm3/m128
+ ///
+ public static Vector512 ShiftRightLogical(Vector512 value, Vector128 count) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_srl_epi16 (__m512i a, __m128i count)
+ /// VPSRLW zmm1 {k1}{z}, zmm2, xmm3/m128
+ ///
+ public static Vector512 ShiftRightLogical(Vector512 value, Vector128 count) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_srli_epi16 (__m512i a, int imm8)
+ /// VPSRLW zmm1 {k1}{z}, zmm2, imm8
+ ///
+ public static Vector512 ShiftRightLogical(Vector512 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_srli_epi16 (__m512i a, int imm8)
+ /// VPSRLW zmm1 {k1}{z}, zmm2, imm8
+ ///
+ public static Vector512 ShiftRightLogical(Vector512 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_bsrli_epi128 (__m512i a, const int imm8)
+ /// VPSRLDQ zmm1, zmm2/m128, imm8
+ ///
+ public static Vector512 ShiftRightLogical128BitLane(Vector512 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_bsrli_epi128 (__m512i a, const int imm8)
+ /// VPSRLDQ zmm1, zmm2/m128, imm8
+ ///
+ public static Vector512 ShiftRightLogical128BitLane(Vector512 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_shuffle_epi8 (__m512i a, __m512i b)
+ /// VPSHUFB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Shuffle(Vector512 value, Vector512 mask) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_shuffle_epi8 (__m512i a, __m512i b)
+ /// VPSHUFB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Shuffle(Vector512 value, Vector512 mask) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_shufflehi_epi16 (__m512i a, const int imm8)
+ /// VPSHUFHW zmm1 {k1}{z}, zmm2/m512, imm8
+ ///
+ public static Vector512 ShuffleHigh(Vector512 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_shufflehi_epi16 (__m512i a, const int imm8)
+ /// VPSHUFHW zmm1 {k1}{z}, zmm2/m512, imm8
+ ///
+ public static Vector512 ShuffleHigh(Vector512 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_shufflelo_epi16 (__m512i a, const int imm8)
+ /// VPSHUFLW zmm1 {k1}{z}, zmm2/m512, imm8
+ ///
+ public static Vector512 ShuffleLow(Vector512 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_shufflelo_epi16 (__m512i a, const int imm8)
+ /// VPSHUFLW zmm1 {k1}{z}, zmm2/m512, imm8
+ ///
+ public static Vector512 ShuffleLow(Vector512 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// void _mm512_storeu_epi8 (__m512i * mem_addr, __m512i a)
+ /// VMOVDQU8 m512 {k1}{z}, zmm1
+ ///
+ public static new unsafe void Store(sbyte* address, Vector512 source) { throw new PlatformNotSupportedException(); }
+ ///
+ /// void _mm512_storeu_epi8 (__m512i * mem_addr, __m512i a)
+ /// VMOVDQU8 m512 {k1}{z}, zmm1
+ ///
+ public static new unsafe void Store(byte* address, Vector512 source) { throw new PlatformNotSupportedException(); }
+ ///
+ /// void _mm512_storeu_epi16 (__m512i * mem_addr, __m512i a)
+ /// VMOVDQU16 m512 {k1}{z}, zmm1
+ ///
+ public static new unsafe void Store(short* address, Vector512 source) { throw new PlatformNotSupportedException(); }
+ ///
+ /// void _mm512_storeu_epi16 (__m512i * mem_addr, __m512i a)
+ /// VMOVDQU16 m512 {k1}{z}, zmm1
+ ///
+ public static new unsafe void Store(ushort* address, Vector512 source) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_sub_epi8 (__m512i a, __m512i b)
+ /// VPSUBB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Subtract(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_sub_epi8 (__m512i a, __m512i b)
+ /// VPSUBB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Subtract(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_sub_epi16 (__m512i a, __m512i b)
+ /// VPSUBW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Subtract(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_sub_epi16 (__m512i a, __m512i b)
+ /// VPSUBW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Subtract(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_subs_epi8 (__m512i a, __m512i b)
+ /// VPSUBSB zmm1 {k1}{z}, zmm2, zmm3/m128
+ ///
+ public static Vector512 SubtractSaturate(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_subs_epi16 (__m512i a, __m512i b)
+ /// VPSUBSW zmm1 {k1}{z}, zmm2, zmm3/m128
+ ///
+ public static Vector512 SubtractSaturate(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_subs_epu8 (__m512i a, __m512i b)
+ /// VPSUBUSB zmm1 {k1}{z}, zmm2, zmm3/m128
+ ///
+ public static Vector512 SubtractSaturate(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_subs_epu16 (__m512i a, __m512i b)
+ /// VPSUBUSW zmm1 {k1}{z}, zmm2, zmm3/m128
+ ///
+ public static Vector512 SubtractSaturate(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_sad_epu8 (__m512i a, __m512i b)
+ /// VPSADBW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 SumAbsoluteDifferences(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_unpackhi_epi8 (__m512i a, __m512i b)
+ /// VPUNPCKHBW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 UnpackHigh(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_unpackhi_epi8 (__m512i a, __m512i b)
+ /// VPUNPCKHBW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 UnpackHigh(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_unpackhi_epi16 (__m512i a, __m512i b)
+ /// VPUNPCKHWD zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 UnpackHigh(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_unpackhi_epi16 (__m512i a, __m512i b)
+ /// VPUNPCKHWD zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 UnpackHigh(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_unpacklo_epi8 (__m512i a, __m512i b)
+ /// VPUNPCKLBW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 UnpackLow(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_unpacklo_epi8 (__m512i a, __m512i b)
+ /// VPUNPCKLBW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 UnpackLow(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_unpacklo_epi16 (__m512i a, __m512i b)
+ /// VPUNPCKLWD zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 UnpackLow(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
+ ///
+ /// __m512i _mm512_unpacklo_epi16 (__m512i a, __m512i b)
+ /// VPUNPCKLWD zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 UnpackLow(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); }
}
}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs
index b0fe1c60f8b33..01811c5a108db 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs
@@ -30,5 +30,447 @@ internal X64() { }
public static new bool IsSupported { get => IsSupported; }
}
+
+ ///
+ /// __m512i _mm512_abs_epi8 (__m512i a)
+ /// VPABSB zmm1 {k1}{z}, zmm2/m512
+ ///
+ public static Vector512 Abs(Vector512 value) => Abs(value);
+ ///
+ /// __m512i _mm512_abs_epi16 (__m512i a)
+ /// VPABSW zmm1 {k1}{z}, zmm2/m512
+ ///
+ public static Vector512 Abs(Vector512 value) => Abs(value);
+
+ ///
+ /// __m512i _mm512_add_epi8 (__m512i a, __m512i b)
+ /// VPADDB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Add(Vector512 left, Vector512 right) => Add(left, right);
+ ///
+ /// __m512i _mm512_add_epi8 (__m512i a, __m512i b)
+ /// VPADDB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Add(Vector512 left, Vector512 right) => Add(left, right);
+ ///
+ /// __m512i _mm512_add_epi16 (__m512i a, __m512i b)
+ /// VPADDW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Add(Vector512 left, Vector512 right) => Add(left, right);
+ ///
+ /// __m512i _mm512_add_epi16 (__m512i a, __m512i b)
+ /// VPADDW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Add(Vector512 left, Vector512 right) => Add(left, right);
+
+ ///
+ /// __m512i _mm512_adds_epi8 (__m512i a, __m512i b)
+ /// VPADDSB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 AddSaturate(Vector512 left, Vector512 right) => AddSaturate(left, right);
+ ///
+ /// __m512i _mm512_adds_epu8 (__m512i a, __m512i b)
+ /// VPADDUSB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 AddSaturate(Vector512 left, Vector512 right) => AddSaturate(left, right);
+ ///
+ /// __m512i _mm512_adds_epi16 (__m512i a, __m512i b)
+ /// VPADDSW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 AddSaturate(Vector512 left, Vector512 right) => AddSaturate(left, right);
+ ///
+ /// __m512i _mm512_adds_epu16 (__m512i a, __m512i b)
+ /// VPADDUSW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 AddSaturate(Vector512 left, Vector512 right) => AddSaturate(left, right);
+
+ ///
+ /// __m512i _mm512_alignr_epi8 (__m512i a, __m512i b, const int count)
+ /// VPALIGNR zmm1 {k1}{z}, zmm2, zmm3/m512, imm8
+ ///
+ public static Vector512 AlignRight(Vector512 left, Vector512 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
+ ///
+ /// __m512i _mm512_alignr_epi8 (__m512i a, __m512i b, const int count)
+ /// VPALIGNR zmm1 {k1}{z}, zmm2, zmm3/m512, imm8
+ ///
+ public static Vector512 AlignRight(Vector512 left, Vector512 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
+
+ ///
+ /// __m512i _mm512_avg_epu8 (__m512i a, __m512i b)
+ /// VPAVGB zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Average(Vector512 left, Vector512 right) => Average(left, right);
+ ///
+ /// __m512i _mm512_avg_epu16 (__m512i a, __m512i b)
+ /// VPAVGW zmm1 {k1}{z}, zmm2, zmm3/m512
+ ///
+ public static Vector512 Average(Vector512 left, Vector512 right) => Average(left, right);
+
+ ///
+ /// __m512i _mm512_cvtepi8_epi16 (__m128i a)
+ /// VPMOVSXBW zmm1 {k1}{z}, ymm2/m256
+ ///
+ public static Vector512 ConvertToVector512Int16(Vector256 value) => ConvertToVector512Int16(value);
+ ///
+ /// __m512i _mm512_cvtepu8_epi16 (__m128i a)
+ /// VPMOVZXBW zmm1 {k1}{z}, ymm2/m256
+ ///
+ public static Vector512 ConvertToVector512Int16(Vector256