From d25304a73bd1ab6ef65863ca96eb9c5f6f062cdf Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 13 Mar 2023 12:55:33 -0700 Subject: [PATCH 1/7] Adding support for Vector512 bitwise operations: And, AndNot, Or, OnesComplement, and Xor --- src/coreclr/jit/emitxarch.cpp | 74 ++++++++++++++++++++++++-- src/coreclr/jit/emitxarch.h | 18 +++++++ src/coreclr/jit/gentree.cpp | 38 +++++++++++-- src/coreclr/jit/hwintrinsiclistxarch.h | 21 ++++++-- src/coreclr/jit/hwintrinsicxarch.cpp | 9 ++++ src/coreclr/jit/instr.h | 6 +++ src/coreclr/jit/instrsxarch.h | 25 +++++---- 7 files changed, 169 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 757f5eb204558..e090277a88baa 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -289,10 +289,10 @@ bool emitter::IsEvexEncodedInstruction(instruction ins) const // k registers are used (as that is the point of the "break out operand type" of these instructions) // case INS_movdqa: // INS_movdqa32, INS_movdqa64. // case INS_movdqu: // INS_movdqu8, INS_movdqu16, INS_movdqu32, INS_movdqu64. - // case INS_pand: // INS_pandd, INS_pandq. - // case INS_pandn: // INS_pandnd, INS_pandnq. - // case INS_por: // INS_pord, INS_porq. - // case INS_pxor: // INS_pxord, INS_pxorq + // case INS_pand: // INS_vpandd, INS_vpandq. + // case INS_pandn: // INS_vpandnd, INS_vpandnq. + // case INS_por: // INS_vpord, INS_vporq. + // case INS_pxor: // INS_vpxord, INS_vpxorq // case INS_vextractf128: // INS_vextractf32x4, INS_vextractf64x2. // case INS_vextracti128: // INS_vextracti32x4, INS_vextracti64x2. // case INS_vinsertf128: // INS_vinsertf32x4, INS_vinsertf64x2. @@ -492,6 +492,72 @@ bool emitter::IsFlagsAlwaysModified(instrDesc* id) return true; } +//------------------------------------------------------------------------ +// IsRexW0Instruction: check if the instruction always encodes REX.W as 0 +// +// Arguments: +// id - instruction to test +// +// Return Value: +// true if the instruction always encodes REX.W as 0; othwerwise, false +// +bool emitter::IsRexW0Instruction(instruction ins) +{ + insFlags flags = CodeGenInterface::instInfo[ins]; + + if ((flags & REX_W0) != 0) + { + assert((flags & (REX_W1 | REX_WX)) == 0); + return true; + } + + return false; +} + +//------------------------------------------------------------------------ +// IsRexW1Instruction: check if the instruction always encodes REX.W as 1 +// +// Arguments: +// id - instruction to test +// +// Return Value: +// true if the instruction always encodes REX.W as 1; othwerwise, false +// +bool emitter::IsRexW1Instruction(instruction ins) +{ + insFlags flags = CodeGenInterface::instInfo[ins]; + + if ((flags & REX_W1) != 0) + { + assert((flags & (REX_W0 | REX_WX)) == 0); + return true; + } + + return false; +} + +//------------------------------------------------------------------------ +// IsRexWXInstruction: check if the instruction requires special REX.W encoding +// +// Arguments: +// id - instruction to test +// +// Return Value: +// true if the instruction requires special REX.W encoding; othwerwise, false +// +bool emitter::IsRexWXInstruction(instruction ins) +{ + insFlags flags = CodeGenInterface::instInfo[ins]; + + if ((flags & REX_WX) != 0) + { + assert((flags & (REX_W0 | REX_W1)) == 0); + return true; + } + + return false; +} + #ifdef TARGET_64BIT //------------------------------------------------------------------------ // AreUpper32BitsZero: check if some previously emitted diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index eb6ebf375bd76..fc3eef7bab533 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -202,6 +202,21 @@ bool IsWEvexOpcodeExtension(const instrDesc* id) instruction ins = id->idIns(); + if (IsRexW0Instruction(ins)) + { + return false; + } + else if (IsRexW1Instruction(ins)) + { + return true; + } + + if (IsRexWXInstruction(ins)) + { + // TODO: Make this a simple assert once all instructions are annotated + unreached(); + } + switch (ins) { case INS_movq: @@ -648,6 +663,9 @@ static bool DoesWriteZeroFlag(instruction ins); bool DoesWriteSignFlag(instruction ins); bool DoesResetOverflowAndCarryFlags(instruction ins); bool IsFlagsAlwaysModified(instrDesc* id); +static bool IsRexW0Instruction(instruction ins); +static bool IsRexW1Instruction(instruction ins); +static bool IsRexWXInstruction(instruction ins); bool IsThreeOperandAVXInstruction(instruction ins) { diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 5fec340b53e51..baef29856f3d6 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -19593,7 +19593,12 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, case GT_AND: { - if (simdSize == 32) + if (simdSize == 64) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + intrinsic = NI_AVX512F_And; + } + else if (simdSize == 32) { assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); @@ -19627,7 +19632,12 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, case GT_AND_NOT: { - if (simdSize == 32) + if (simdSize == 64) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + intrinsic = NI_AVX512F_AndNot; + } + else if (simdSize == 32) { assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); @@ -19892,7 +19902,12 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, case GT_OR: { - if (simdSize == 32) + if (simdSize == 64) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + intrinsic = NI_AVX512F_Or; + } + else if (simdSize == 32) { assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); @@ -19953,7 +19968,12 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, case GT_XOR: { - if (simdSize == 32) + if (simdSize == 64) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + intrinsic = NI_AVX512F_Xor; + } + else if (simdSize == 32) { assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); @@ -23446,7 +23466,15 @@ GenTree* Compiler::gtNewSimdUnOpNode(genTreeOps op, case GT_NOT: { - assert((simdSize != 32) || compIsaSupportedDebugOnly(InstructionSet_AVX)); + if (simdSize == 64) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + } + else if (simdSize == 32) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); + } + op2 = gtNewAllBitsSetConNode(type); return gtNewSimdBinOpNode(GT_XOR, type, op1, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); } diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 054273c147d73..b93ae97a709a1 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -236,9 +236,13 @@ HARDWARE_INTRINSIC(Vector256, Xor, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Vector512 Intrinsics +HARDWARE_INTRINSIC(Vector512, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, BitwiseAnd, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, BitwiseOr, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, Create, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, CreateScalar, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, CreateScalarUnsafe, 64, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector512, ExtractMostSignificantBits, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, get_AllBitsSet, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Vector512, get_One, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, get_Zero, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) @@ -246,12 +250,16 @@ HARDWARE_INTRINSIC(Vector512, Load, HARDWARE_INTRINSIC(Vector512, LoadAligned, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, LoadAlignedNonTemporal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, LoadUnsafe, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, OnesComplement, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, op_BitwiseAnd, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector512, op_BitwiseOr, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector512, op_ExclusiveOr, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, op_OnesComplement, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, Store, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, StoreUnsafe, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) - -HARDWARE_INTRINSIC(Vector512, ExtractMostSignificantBits, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, Xor, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -673,7 +681,7 @@ HARDWARE_INTRINSIC(AVX, Xor, HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2, Add, 32, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2, AddSaturate, 32, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, AlignRight, 32, 3, {INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, AlignRight, 32, 3, {INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2, And, 32, 2, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2, AndNot, 32, 2, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, Average, 32, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) @@ -736,18 +744,22 @@ HARDWARE_INTRINSIC(AVX2, SubtractSaturate, HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) + // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512F Intrinsics +HARDWARE_INTRINSIC(AVX512F, And, 64, 2, {INS_vpandd, INS_vpandd, INS_vpandd, INS_vpandd, INS_vpandd, INS_vpandd, INS_vpandq, INS_vpandq, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512F, AndNot, 64, 2, {INS_vpandnd, INS_vpandnd, INS_vpandnd, INS_vpandnd, INS_vpandnd, INS_vpandnd, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX512F, BroadcastScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad) HARDWARE_INTRINSIC(AVX512F, InsertVector256, 64, 3, {INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf64x4, INS_vinsertf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512, 64, 1, {INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa64, INS_movdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512NonTemporal, 64, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512F, Or, 64, 2, {INS_vpord, INS_vpord, INS_vpord, INS_vpord, INS_vpord, INS_vpord, INS_vporq, INS_vporq, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX512F, StoreAligned, 64, 2, {INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa64, INS_movdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVX512F, StoreAlignedNonTemporal, 64, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) - +HARDWARE_INTRINSIC(AVX512F, Xor, 64, 2, {INS_vpxord, INS_vpxord, INS_vpxord, INS_vpxord, INS_vpxord, INS_vpxord, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -763,6 +775,7 @@ HARDWARE_INTRINSIC(AVX512BW, BroadcastScalarToVector512, // AVXVNNI Intrinsics HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg) + // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index a0ddeb10460ac..43a88c95952eb 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -553,6 +553,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector128_AndNot: case NI_Vector256_AndNot: + case NI_Vector512_AndNot: { assert(sig->numArgs == 2); @@ -780,8 +781,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector128_BitwiseAnd: case NI_Vector256_BitwiseAnd: + case NI_Vector512_BitwiseAnd: case NI_Vector128_op_BitwiseAnd: case NI_Vector256_op_BitwiseAnd: + case NI_Vector512_op_BitwiseAnd: { assert(sig->numArgs == 2); @@ -795,8 +798,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector128_BitwiseOr: case NI_Vector256_BitwiseOr: + case NI_Vector512_BitwiseOr: case NI_Vector128_op_BitwiseOr: case NI_Vector256_op_BitwiseOr: + case NI_Vector512_op_BitwiseOr: { assert(sig->numArgs == 2); @@ -1860,8 +1865,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector128_OnesComplement: case NI_Vector256_OnesComplement: + case NI_Vector512_OnesComplement: case NI_Vector128_op_OnesComplement: case NI_Vector256_op_OnesComplement: + case NI_Vector512_op_OnesComplement: { assert(sig->numArgs == 1); op1 = impSIMDPopStack(retType); @@ -2368,8 +2375,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector128_Xor: case NI_Vector256_Xor: + case NI_Vector512_Xor: case NI_Vector128_op_ExclusiveOr: case NI_Vector256_op_ExclusiveOr: + case NI_Vector512_op_ExclusiveOr: { assert(sig->numArgs == 2); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 72295fa640af6..67f55f3057a63 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -154,6 +154,12 @@ enum insFlags : uint64_t Input_64Bit = 1ULL << 32, Input_Mask = (0xFULL) << 29, + // encoding of the REX.W-bit + REX_W0 = 1ULL << 33, + REX_W1 = 1ULL << 34, + REX_WX = 1ULL << 35, + REX_WIG = REX_W0, + // TODO-Cleanup: Remove this flag and its usage from TARGET_XARCH INS_FLAGS_DONT_CARE = 0x00ULL, }; diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 69bb4e243909c..dd444bb5a015c 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -340,10 +340,9 @@ INST3(pmulhw, "pmulhw", IUM_WR, BAD_CODE, BAD_CODE, INST3(pmulhuw, "pmulhuw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE4), INS_TT_FULL_MEM, Input_16Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit unsigned integers INST3(pmuludq, "pmuludq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF4), INS_TT_FULL_MEM, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit unsigned integers and store 64-bit result INST3(pmullw, "pmullw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD5), INS_TT_FULL_MEM, Input_16Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result -// TODO-XArch-AVX512: pand, pandn, por, and pxor have AVX512 instructions under different names, pandd, pandq etc -INST3(pand, "pand", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs // TODO-XARCH-AVX512 TT and IP encoded is pand32 -INST3(pandn, "pandn", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs // TODO-XARCH-AVX512 TT and IP encoded is pand32 -INST3(por, "por", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs // TODO-XARCH-AVX512 TT and IP encoded is pand32 +INST3(pand, "pand", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs +INST3(pandn, "pandn", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs +INST3(por, "por", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs INST3(pxor, "pxor", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs INST3(psadbw, "psadbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF6), INS_TT_FULL_MEM, Input_8Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Compute the sum of absolute differences of packed unsigned 8-bit integers INST3(psubsb, "psubsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE8), INS_TT_FULL_MEM, Input_8Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 8-bit integers in b from packed 8-bit integers in a using saturation @@ -648,13 +647,21 @@ INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, INST3(FIRST_AVX512_INSTRUCTION, "FIRST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) INST3(FIRST_AVX512F_INSTRUCTION, "FIRST_AVX512F_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) +INST3(vinsertf64x4, "insertf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE4, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values +INST3(vinserti64x4, "inserti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE4, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values INST3(movdqa32, "movdqa32", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_32Bit | INS_FLAGS_None) INST3(movdqa64, "movdqa64", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_64Bit | INS_FLAGS_None) INST3(movdqu32, "movdqu32", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_32Bit | INS_FLAGS_None) INST3(movdqu64, "movdqu64", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_64Bit | INS_FLAGS_None) -INST3(vinsertf64x4, "insertf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE4, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values -INST3(vinserti64x4, "inserti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE4, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values -INST3(vpternlogd, "pternlogd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(vpandd, "pandd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W0) // Packed bit-wise AND of two xmm regs +INST3(vpandq, "pandq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W1) // Packed bit-wise AND of two xmm regs +INST3(vpandnd, "pandnd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W0) // Packed bit-wise AND NOT of two xmm regs +INST3(vpandnq, "pandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W1) // Packed bit-wise AND NOT of two xmm regs +INST3(vpord, "pord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W0) // Packed bit-wise OR of two xmm regs +INST3(vporq, "porq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W1) // Packed bit-wise OR of two xmm regs +INST3(vpternlogd, "pternlogd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(vpxord, "pxord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W0) // Packed bit-wise XOR of two xmm regs +INST3(vpxorq, "pxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W1) // Packed bit-wise XOR of two xmm regs INST3(LAST_AVX512F_INSTRUCTION, "LAST_AVX512F_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) INST3(FIRST_AVX512BW_INSTRUCTION, "FIRST_AVX512BW_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) @@ -703,9 +710,9 @@ INST2(ror_N, "ror", IUM_RW, 0x0008C0, 0x0008C0, INST2(rcl, "rcl", IUM_RW, 0x0010D2, BAD_CODE, INS_TT_NONE, Undefined_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit ) INST2(rcl_1, "rcl", IUM_RW, 0x0010D0, 0x0010D0, INS_TT_NONE, Writes_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit ) -INST2(rcl_N, "rcl", IUM_RW, 0x0010C0, 0x0010C0, INS_TT_NONE, Undefined_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit ) +INST2(rcl_N, "rcl", IUM_RW, 0x0010C0, 0x0010C0, INS_TT_NONE, Undefined_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit ) INST2(rcr, "rcr", IUM_RW, 0x0018D2, BAD_CODE, INS_TT_NONE, Undefined_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit ) -INST2(rcr_1, "rcr", IUM_RW, 0x0018D0, 0x0018D0, INS_TT_NONE, Writes_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit ) +INST2(rcr_1, "rcr", IUM_RW, 0x0018D0, 0x0018D0, INS_TT_NONE, Writes_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit ) INST2(rcr_N, "rcr", IUM_RW, 0x0018C0, 0x0018C0, INS_TT_NONE, Undefined_OF | Writes_CF | Reads_CF | INS_FLAGS_Has_Wbit ) INST2(shl, "shl", IUM_RW, 0x0020D2, BAD_CODE, INS_TT_NONE, Undefined_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Wbit ) INST2(shl_1, "shl", IUM_RW, 0x0020D0, 0x0020D0, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Wbit ) From 4b82857b9bb762d266beae005404ed745404ecc3 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 13 Mar 2023 13:50:28 -0700 Subject: [PATCH 2/7] Adding AVX512F APIs for And, AndNot, Load, Or, Store, and Xor --- src/coreclr/jit/emitxarch.cpp | 36 +- src/coreclr/jit/emitxarch.h | 4 - src/coreclr/jit/hwintrinsiclistxarch.h | 8 +- src/coreclr/jit/hwintrinsicxarch.cpp | 2 + src/coreclr/jit/instrsxarch.h | 24 +- .../X86/Avx512F.PlatformNotSupported.cs | 459 ++++++++++++++++++ .../System/Runtime/Intrinsics/X86/Avx512F.cs | 459 ++++++++++++++++++ .../ref/System.Runtime.Intrinsics.cs | 90 ++++ .../GenerateHWIntrinsicTests_X86.cs | 56 +++ .../X86/Avx512F/Avx512F_handwritten_r.csproj | 20 + .../X86/Avx512F/Avx512F_handwritten_ro.csproj | 20 + .../X86/Avx512F/Avx512F_r.csproj | 15 + .../X86/Avx512F/Avx512F_ro.csproj | 15 + .../X86/Avx512F/LoadAlignedVector512.cs | 267 ++++++++++ .../X86/Avx512F/Program.Avx512F.cs | 16 + .../HardwareIntrinsics/X86/Avx512F/Store.cs | 198 ++++++++ .../X86/Avx512F/StoreAligned.cs | 277 +++++++++++ .../X86/Avx512F/StoreAlignedNonTemporal.cs | 277 +++++++++++ 18 files changed, 2206 insertions(+), 37 deletions(-) create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_handwritten_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_handwritten_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Avx512F/LoadAlignedVector512.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Program.Avx512F.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Store.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAligned.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAlignedNonTemporal.cs diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index e090277a88baa..a2704f301a7c9 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -287,8 +287,8 @@ bool emitter::IsEvexEncodedInstruction(instruction ins) const // Since we are not using k registers yet, this will have no impact on correctness but will affect things // once // k registers are used (as that is the point of the "break out operand type" of these instructions) - // case INS_movdqa: // INS_movdqa32, INS_movdqa64. - // case INS_movdqu: // INS_movdqu8, INS_movdqu16, INS_movdqu32, INS_movdqu64. + // case INS_movdqa: // INS_vmovdqa32, INS_vmovdqa64. + // case INS_movdqu: // INS_movdqu8, INS_movdqu16, INS_vmovdqu32, INS_vmovdqu64. // case INS_pand: // INS_vpandd, INS_vpandq. // case INS_pandn: // INS_vpandnd, INS_vpandnq. // case INS_por: // INS_vpord, INS_vporq. @@ -5934,13 +5934,13 @@ bool emitter::IsMovInstruction(instruction ins) case INS_movaps: case INS_movd: case INS_movdqa: - case INS_movdqa32: - case INS_movdqa64: + case INS_vmovdqa32: + case INS_vmovdqa64: case INS_movdqu: case INS_movdqu8: case INS_movdqu16: - case INS_movdqu32: - case INS_movdqu64: + case INS_vmovdqu32: + case INS_vmovdqu64: case INS_movsdsse2: case INS_movss: case INS_movsx: @@ -6083,12 +6083,12 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size) break; } - case INS_movdqa32: - case INS_movdqa64: + case INS_vmovdqa32: + case INS_vmovdqa64: case INS_movdqu8: case INS_movdqu16: - case INS_movdqu32: - case INS_movdqu64: + case INS_vmovdqu32: + case INS_vmovdqu64: { // These EVEX instructions merges/masks based on k-register // TODO-XArch-AVX512 : Handle merge/masks scenarios once k-mask support is added for these. @@ -6299,13 +6299,13 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN case INS_movapd: case INS_movaps: case INS_movdqa: - case INS_movdqa32: - case INS_movdqa64: + case INS_vmovdqa32: + case INS_vmovdqa64: case INS_movdqu: case INS_movdqu8: case INS_movdqu16: - case INS_movdqu32: - case INS_movdqu64: + case INS_vmovdqu32: + case INS_vmovdqu64: case INS_movsdsse2: case INS_movss: case INS_movupd: @@ -17538,13 +17538,13 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; case INS_movdqa: - case INS_movdqa32: - case INS_movdqa64: + case INS_vmovdqa32: + case INS_vmovdqa64: case INS_movdqu: case INS_movdqu8: case INS_movdqu16: - case INS_movdqu32: - case INS_movdqu64: + case INS_vmovdqu32: + case INS_vmovdqu64: case INS_movaps: case INS_movups: case INS_movapd: diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index fc3eef7bab533..76197ee1ca825 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -306,9 +306,7 @@ bool IsWEvexOpcodeExtension(const instrDesc* id) case INS_vfnmsub231sd: case INS_unpcklpd: case INS_vpermilpdvar: - case INS_movdqa64: case INS_movdqu16: - case INS_movdqu64: case INS_vinsertf64x4: case INS_vinserti64x4: { @@ -424,9 +422,7 @@ bool IsWEvexOpcodeExtension(const instrDesc* id) case INS_vpdpbusds: case INS_vpdpwssds: case INS_vpermilpsvar: - case INS_movdqa32: case INS_movdqu8: - case INS_movdqu32: case INS_vinsertf32x8: case INS_vinserti32x8: { diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index b93ae97a709a1..72603878059de 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -225,7 +225,7 @@ HARDWARE_INTRINSIC(Vector256, StoreUnsafe, HARDWARE_INTRINSIC(Vector256, Subtract, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, Sum, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, ToScalar, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ToVector512Unsafe, 32, 1, {INS_movdqu8, INS_movdqu8, INS_movdqu16, INS_movdqu16, INS_movdqu32, INS_movdqu32, INS_movdqu64, INS_movdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector256, ToVector512Unsafe, 32, 1, {INS_movdqu8, INS_movdqu8, INS_movdqu16, INS_movdqu16, INS_vmovdqu32, INS_vmovdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256, WidenLower, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, WidenUpper, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, WithElement, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) @@ -754,10 +754,12 @@ HARDWARE_INTRINSIC(AVX512F, And, HARDWARE_INTRINSIC(AVX512F, AndNot, 64, 2, {INS_vpandnd, INS_vpandnd, INS_vpandnd, INS_vpandnd, INS_vpandnd, INS_vpandnd, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX512F, BroadcastScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad) HARDWARE_INTRINSIC(AVX512F, InsertVector256, 64, 3, {INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf64x4, INS_vinsertf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512, 64, 1, {INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa64, INS_movdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512, 64, 1, {INS_vmovdqa32, INS_vmovdqa32, INS_vmovdqa32, INS_vmovdqa32, INS_vmovdqa32, INS_vmovdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512NonTemporal, 64, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512F, LoadVector512, 64, 1, {INS_vmovdqu32, INS_vmovdqu32, INS_vmovdqu32, INS_vmovdqu32, INS_vmovdqu32, INS_vmovdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(AVX512F, Or, 64, 2, {INS_vpord, INS_vpord, INS_vpord, INS_vpord, INS_vpord, INS_vpord, INS_vporq, INS_vporq, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512F, StoreAligned, 64, 2, {INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa64, INS_movdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512F, Store, 64, 2, {INS_vmovdqu32, INS_vmovdqu32, INS_vmovdqu32, INS_vmovdqu32, INS_vmovdqu32, INS_vmovdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(AVX512F, StoreAligned, 64, 2, {INS_vmovdqa32, INS_vmovdqa32, INS_vmovdqa32, INS_vmovdqa32, INS_vmovdqa32, INS_vmovdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVX512F, StoreAlignedNonTemporal, 64, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVX512F, Xor, 64, 2, {INS_vpxord, INS_vpxord, INS_vpxord, INS_vpxord, INS_vpxord, INS_vpxord, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 43a88c95952eb..f3eb33c52c13e 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1685,6 +1685,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_SSE_LoadVector128: case NI_SSE2_LoadVector128: case NI_AVX_LoadVector256: + case NI_AVX512F_LoadVector512: case NI_Vector128_Load: case NI_Vector256_Load: case NI_Vector512_Load: @@ -2093,6 +2094,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_SSE_Store: case NI_SSE2_Store: case NI_AVX_Store: + case NI_AVX512F_Store: { assert(retType == TYP_VOID); assert(sig->numArgs == 2); diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index dd444bb5a015c..e01cd4bfd303f 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -649,19 +649,19 @@ INST3(FIRST_AVX512_INSTRUCTION, "FIRST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BA INST3(FIRST_AVX512F_INSTRUCTION, "FIRST_AVX512F_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) INST3(vinsertf64x4, "insertf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE4, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values INST3(vinserti64x4, "inserti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE4, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values -INST3(movdqa32, "movdqa32", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_32Bit | INS_FLAGS_None) -INST3(movdqa64, "movdqa64", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_64Bit | INS_FLAGS_None) -INST3(movdqu32, "movdqu32", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_32Bit | INS_FLAGS_None) -INST3(movdqu64, "movdqu64", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_64Bit | INS_FLAGS_None) -INST3(vpandd, "pandd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W0) // Packed bit-wise AND of two xmm regs -INST3(vpandq, "pandq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W1) // Packed bit-wise AND of two xmm regs -INST3(vpandnd, "pandnd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W0) // Packed bit-wise AND NOT of two xmm regs -INST3(vpandnq, "pandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W1) // Packed bit-wise AND NOT of two xmm regs -INST3(vpord, "pord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W0) // Packed bit-wise OR of two xmm regs -INST3(vporq, "porq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W1) // Packed bit-wise OR of two xmm regs +INST3(vmovdqa32, "movdqa32", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_32Bit | REX_W0) +INST3(vmovdqa64, "movdqa64", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_64Bit | REX_W1) +INST3(vmovdqu32, "movdqu32", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_32Bit | REX_W0) +INST3(vmovdqu64, "movdqu64", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_64Bit | REX_W1) +INST3(vpandd, "pandd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_32Bit | REX_W0 | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs +INST3(vpandq, "pandq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_64Bit | REX_W1 | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs +INST3(vpandnd, "pandnd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_32Bit | REX_W0 | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs +INST3(vpandnq, "pandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_64Bit | REX_W1 | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs +INST3(vpord, "pord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_32Bit | REX_W0 | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs +INST3(vporq, "porq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_64Bit | REX_W1 | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs INST3(vpternlogd, "pternlogd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(vpxord, "pxord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W0) // Packed bit-wise XOR of two xmm regs -INST3(vpxorq, "pxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction|REX_W1) // Packed bit-wise XOR of two xmm regs +INST3(vpxord, "pxord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_32Bit | REX_W0 | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs +INST3(vpxorq, "pxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_64Bit | REX_W1 | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs INST3(LAST_AVX512F_INSTRUCTION, "LAST_AVX512F_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) INST3(FIRST_AVX512BW_INSTRUCTION, "FIRST_AVX512BW_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs index 4d23668759a6a..c74496886ddfa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs @@ -29,5 +29,464 @@ internal X64() { } public static new bool IsSupported { [Intrinsic] get { return false; } } } + + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512 _mm512_and_ps (__m512 a, __m512 b) + /// VANDPS zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512d _mm512_and_pd (__m512d a, __m512d b) + /// VANDPD zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512 _mm512_andnot_ps (__m512 a, __m512 b) + /// VANDNPS zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512d _mm512_andnot_pd (__m512d a, __m512d b) + /// VANDNPD zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(sbyte* address) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(byte* address) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(short* address) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(ushort* address) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(int* address) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(uint* address) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU64 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(long* address) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU64 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(ulong* address) { throw new PlatformNotSupportedException; } + /// + /// __m512 _mm512_loadu_ps (float const * mem_addr) + /// VMOVUPS zmm, zmm/m512 + /// + public static unsafe Vector512 LoadVector512(float* address) { throw new PlatformNotSupportedException; } + /// + /// __m512d _mm512_loadu_pd (double const * mem_addr) + /// VMOVUPD zmm, zmm/m512 + /// + public static unsafe Vector512 LoadVector512(double* address) { throw new PlatformNotSupportedException; } + + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA32 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(sbyte* address) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA32 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(byte* address) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA32 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(short* address) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA32 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(ushort* address) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA32 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(int* address) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA32 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(uint* address) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA64 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(long* address) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA64 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(ulong* address) { throw new PlatformNotSupportedException; } + /// + /// __m512 _mm512_load_ps (float const * mem_addr) + /// VMOVAPS zmm, zmm/m512 + /// + public static unsafe Vector512 LoadAlignedVector512(float* address) { throw new PlatformNotSupportedException; } + /// + /// __m512d _mm512_load_pd (double const * mem_addr) + /// VMOVAPD zmm, zmm/m512 + /// + public static unsafe Vector512 LoadAlignedVector512(double* address) { throw new PlatformNotSupportedException; } + + /// + /// __m512 _mm512_or_ps (__m512 a, __m512 b) + /// VORPS zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512d _mm512_or_pd (__m512d a, __m512d b) + /// VORPD zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU32 m512, zmm + /// + public static unsafe void Store(sbyte* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU32 m512, zmm + /// + public static unsafe void Store(byte* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU32 m512, zmm + /// + public static unsafe void Store(short* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU32 m512, zmm + /// + public static unsafe void Store(ushort* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU32 m512, zmm + /// + public static unsafe void Store(int* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU32 m512, zmm + /// + public static unsafe void Store(uint* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU64 m512, zmm + /// + public static unsafe void Store(long* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU64 m512, zmm + /// + public static unsafe void Store(ulong* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_storeu_ps (float * mem_addr, __m512 a) + /// VMOVUPS m512, zmm + /// + public static unsafe void Store(float* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_storeu_pd (double * mem_addr, __m512d a) + /// VMOVUPD m512, zmm + /// + public static unsafe void Store(double* address, Vector512 source) { throw new PlatformNotSupportedException; } + + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA32 m512, zmm + /// + public static unsafe void StoreAligned(sbyte* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA32 m512, zmm + /// + public static unsafe void StoreAligned(byte* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA32 m512, zmm + /// + public static unsafe void StoreAligned(short* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA32 m512, zmm + /// + public static unsafe void StoreAligned(ushort* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA32 m512, zmm + /// + public static unsafe void StoreAligned(int* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA32 m512, zmm + /// + public static unsafe void StoreAligned(uint* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA64 m512, zmm + /// + public static unsafe void StoreAligned(long* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA64 m512, zmm + /// + public static unsafe void StoreAligned(ulong* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_store_ps (float * mem_addr, __m512 a) + /// VMOVAPS m512, zmm + /// + public static unsafe void StoreAligned(float* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_store_pd (double * mem_addr, __m512d a) + /// VMOVAPD m512, zmm + /// + public static unsafe void StoreAligned(double* address, Vector512 source) { throw new PlatformNotSupportedException; } + + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(byte* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(short* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(int* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(uint* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(long* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_stream_ps (float * mem_addr, __m512 a) + /// MOVNTPS m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(float* address, Vector512 source) { throw new PlatformNotSupportedException; } + /// + /// void _mm512_stream_pd (double * mem_addr, __m512d a) + /// MOVNTPD m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(double* address, Vector512 source) { throw new PlatformNotSupportedException; } + + /// + /// __m512 _mm512_xor_ps (__m512 a, __m512 b) + /// VXORPS zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512d _mm512_xor_pd (__m512d a, __m512d b) + /// VXORPS zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.cs index afd7d48a622c1..e8d64a3373aaa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.cs @@ -30,5 +30,464 @@ internal X64() { } public static new bool IsSupported { get => IsSupported; } } + + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); + /// + /// __m512i _mm512_and_si512 (__m512i a, __m512i b) + /// VPAND zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); + /// + /// __m512 _mm512_and_ps (__m512 a, __m512 b) + /// VANDPS zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); + /// + /// __m512d _mm512_and_pd (__m512d a, __m512d b) + /// VANDPD zmm, zmm, zmm/m512 + /// + public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); + + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); + /// + /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) + /// VPANDN zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); + /// + /// __m512 _mm512_andnot_ps (__m512 a, __m512 b) + /// VANDNPS zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); + /// + /// __m512d _mm512_andnot_pd (__m512d a, __m512d b) + /// VANDNPD zmm, zmm, zmm/m512 + /// + public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); + + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(sbyte* address) => LoadVector512(address); + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(byte* address) => LoadVector512(address); + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(short* address) => LoadVector512(address); + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(ushort* address) => LoadVector512(address); + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(int* address) => LoadVector512(address); + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(uint* address) => LoadVector512(address); + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU64 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(long* address) => LoadVector512(address); + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU64 zmm, m512 + /// + public static unsafe Vector512 LoadVector512(ulong* address) => LoadVector512(address); + /// + /// __m512 _mm512_loadu_ps (float const * mem_addr) + /// VMOVUPS zmm, zmm/m512 + /// + public static unsafe Vector512 LoadVector512(float* address) => LoadVector512(address); + /// + /// __m512d _mm512_loadu_pd (double const * mem_addr) + /// VMOVUPD zmm, zmm/m512 + /// + public static unsafe Vector512 LoadVector512(double* address) => LoadVector512(address); + + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA32 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(sbyte* address) => LoadAlignedVector512(address); + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA32 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(byte* address) => LoadAlignedVector512(address); + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA32 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(short* address) => LoadAlignedVector512(address); + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA32 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(ushort* address) => LoadAlignedVector512(address); + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA32 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(int* address) => LoadAlignedVector512(address); + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA32 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(uint* address) => LoadAlignedVector512(address); + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA64 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(long* address) => LoadAlignedVector512(address); + /// + /// __m512i _mm512_load_si512 (__m512i const * mem_addr) + /// VMOVDQA64 zmm, m512 + /// + public static unsafe Vector512 LoadAlignedVector512(ulong* address) => LoadAlignedVector512(address); + /// + /// __m512 _mm512_load_ps (float const * mem_addr) + /// VMOVAPS zmm, zmm/m512 + /// + public static unsafe Vector512 LoadAlignedVector512(float* address) => LoadAlignedVector512(address); + /// + /// __m512d _mm512_load_pd (double const * mem_addr) + /// VMOVAPD zmm, zmm/m512 + /// + public static unsafe Vector512 LoadAlignedVector512(double* address) => LoadAlignedVector512(address); + + /// + /// __m512 _mm512_or_ps (__m512 a, __m512 b) + /// VORPS zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + /// + /// __m512d _mm512_or_pd (__m512d a, __m512d b) + /// VORPD zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPOR zmm, zmm, zmm/m512 + /// + public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU32 m512, zmm + /// + public static unsafe void Store(sbyte* address, Vector512 source) => Store(address, source); + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU32 m512, zmm + /// + public static unsafe void Store(byte* address, Vector512 source) => Store(address, source); + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU32 m512, zmm + /// + public static unsafe void Store(short* address, Vector512 source) => Store(address, source); + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU32 m512, zmm + /// + public static unsafe void Store(ushort* address, Vector512 source) => Store(address, source); + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU32 m512, zmm + /// + public static unsafe void Store(int* address, Vector512 source) => Store(address, source); + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU32 m512, zmm + /// + public static unsafe void Store(uint* address, Vector512 source) => Store(address, source); + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU64 m512, zmm + /// + public static unsafe void Store(long* address, Vector512 source) => Store(address, source); + /// + /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQU64 m512, zmm + /// + public static unsafe void Store(ulong* address, Vector512 source) => Store(address, source); + /// + /// void _mm512_storeu_ps (float * mem_addr, __m512 a) + /// VMOVUPS m512, zmm + /// + public static unsafe void Store(float* address, Vector512 source) => Store(address, source); + /// + /// void _mm512_storeu_pd (double * mem_addr, __m512d a) + /// VMOVUPD m512, zmm + /// + public static unsafe void Store(double* address, Vector512 source) => Store(address, source); + + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA32 m512, zmm + /// + public static unsafe void StoreAligned(sbyte* address, Vector512 source) => StoreAligned(address, source); + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA32 m512, zmm + /// + public static unsafe void StoreAligned(byte* address, Vector512 source) => StoreAligned(address, source); + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA32 m512, zmm + /// + public static unsafe void StoreAligned(short* address, Vector512 source) => StoreAligned(address, source); + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA32 m512, zmm + /// + public static unsafe void StoreAligned(ushort* address, Vector512 source) => StoreAligned(address, source); + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA32 m512, zmm + /// + public static unsafe void StoreAligned(int* address, Vector512 source) => StoreAligned(address, source); + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA32 m512, zmm + /// + public static unsafe void StoreAligned(uint* address, Vector512 source) => StoreAligned(address, source); + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA64 m512, zmm + /// + public static unsafe void StoreAligned(long* address, Vector512 source) => StoreAligned(address, source); + /// + /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) + /// VMOVDQA64 m512, zmm + /// + public static unsafe void StoreAligned(ulong* address, Vector512 source) => StoreAligned(address, source); + /// + /// void _mm512_store_ps (float * mem_addr, __m512 a) + /// VMOVAPS m512, zmm + /// + public static unsafe void StoreAligned(float* address, Vector512 source) => StoreAligned(address, source); + /// + /// void _mm512_store_pd (double * mem_addr, __m512d a) + /// VMOVAPD m512, zmm + /// + public static unsafe void StoreAligned(double* address, Vector512 source) => StoreAligned(address, source); + + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector512 source) => StoreAlignedNonTemporal(address, source); + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(byte* address, Vector512 source) => StoreAlignedNonTemporal(address, source); + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(short* address, Vector512 source) => StoreAlignedNonTemporal(address, source); + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector512 source) => StoreAlignedNonTemporal(address, source); + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(int* address, Vector512 source) => StoreAlignedNonTemporal(address, source); + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(uint* address, Vector512 source) => StoreAlignedNonTemporal(address, source); + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(long* address, Vector512 source) => StoreAlignedNonTemporal(address, source); + /// + /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) + /// VMOVNTDQ m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector512 source) => StoreAlignedNonTemporal(address, source); + /// + /// void _mm512_stream_ps (float * mem_addr, __m512 a) + /// MOVNTPS m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(float* address, Vector512 source) => StoreAlignedNonTemporal(address, source); + /// + /// void _mm512_stream_pd (double * mem_addr, __m512d a) + /// MOVNTPD m512, zmm + /// + public static unsafe void StoreAlignedNonTemporal(double* address, Vector512 source) => StoreAlignedNonTemporal(address, source); + + /// + /// __m512 _mm512_xor_ps (__m512 a, __m512 b) + /// VXORPS zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); + /// + /// __m512d _mm512_xor_pd (__m512d a, __m512d b) + /// VXORPS zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); + /// + /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) + /// VPXOR zmm, zmm, zmm/m512 + /// + public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); } } diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index be37529d8ec9b..43000122b4dad 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4426,6 +4426,96 @@ public abstract partial class Avx512F : System.Runtime.Intrinsics.X86.Avx2 { internal Avx512F() { } public static new bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector512 And(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 And(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 And(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 And(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 And(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 And(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 And(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 And(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 And(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 And(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 AndNot(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 AndNot(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 AndNot(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 AndNot(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 AndNot(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 AndNot(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 AndNot(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 AndNot(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 AndNot(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 AndNot(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadAlignedVector512(byte* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadAlignedVector512(double* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadAlignedVector512(short* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadAlignedVector512(int* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadAlignedVector512(long* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadAlignedVector512(sbyte* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadAlignedVector512(float* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadAlignedVector512(ushort* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadAlignedVector512(uint* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadAlignedVector512(ulong* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadVector512(byte* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadVector512(double* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadVector512(short* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadVector512(int* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadVector512(long* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadVector512(sbyte* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadVector512(float* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadVector512(ushort* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadVector512(uint* address) { throw null; } + public unsafe static System.Runtime.Intrinsics.Vector512 LoadVector512(ulong* address) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Or(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Or(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Or(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Or(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Or(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Or(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Or(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Or(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Or(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Or(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public unsafe static void Store(byte* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void Store(double* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void Store(short* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void Store(int* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void Store(long* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void Store(sbyte* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void Store(float* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void Store(ushort* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void Store(uint* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void Store(ulong* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAligned(byte* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAligned(double* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAligned(short* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAligned(int* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAligned(long* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAligned(sbyte* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAligned(float* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAligned(ushort* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAligned(uint* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAligned(ulong* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAlignedNonTemporal(byte* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAlignedNonTemporal(double* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAlignedNonTemporal(short* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAlignedNonTemporal(int* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAlignedNonTemporal(long* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAlignedNonTemporal(sbyte* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAlignedNonTemporal(float* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAlignedNonTemporal(ushort* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAlignedNonTemporal(uint* address, System.Runtime.Intrinsics.Vector512 source) { } + public unsafe static void StoreAlignedNonTemporal(ulong* address, System.Runtime.Intrinsics.Vector512 source) { } + public static System.Runtime.Intrinsics.Vector512 Xor(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Xor(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Xor(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Xor(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Xor(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Xor(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Xor(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Xor(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Xor(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Xor(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } public abstract partial class VL { internal VL() { } diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs index 52f7d523ea61d..968a2ede9ab9d 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs @@ -1077,6 +1077,61 @@ ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), }; +(string templateFileName, Dictionary templateData)[] Avx512FInputs = new [] +{ + + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "And", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "(byte)(left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(byte)(left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "And", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "(BitConverter.DoubleToInt64Bits(left[0]) & BitConverter.DoubleToInt64Bits(right[0])) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.DoubleToInt64Bits(left[i]) & BitConverter.DoubleToInt64Bits(right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "And", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "(short)(left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(short)(left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "And", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "(int)(left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(int)(left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "And", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "(long)(left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(long)(left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "And", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "(sbyte)(left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(sbyte)(left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "And", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "(BitConverter.SingleToInt32Bits(left[0]) & BitConverter.SingleToInt32Bits(right[0])) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(left[i]) & BitConverter.SingleToInt32Bits(right[i])) != BitConverter.SingleToInt32Bits(result[i])"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "And", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "(ushort)(left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(ushort)(left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "And", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "(uint)(left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(uint)(left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "And", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "(ulong)(left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(ulong)(left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "(byte)(~left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(byte)(~left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "((~BitConverter.DoubleToInt64Bits(left[0])) & BitConverter.DoubleToInt64Bits(right[0])) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "((~BitConverter.DoubleToInt64Bits(left[i])) & BitConverter.DoubleToInt64Bits(right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "(short)(~left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(short)(~left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "(int)(~left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(int)(~left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "(long)(~left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(long)(~left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "(sbyte)(~left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(sbyte)(~left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "((~BitConverter.SingleToInt32Bits(left[0])) & BitConverter.SingleToInt32Bits(right[0])) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((~BitConverter.SingleToInt32Bits(left[i])) & BitConverter.SingleToInt32Bits(right[i])) != BitConverter.SingleToInt32Bits(result[i])"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "(ushort)(~left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(ushort)(~left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "(uint)(~left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(uint)(~left[i] & right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "(ulong)(~left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(ulong)(~left[i] & right[i]) != result[i]"}), + ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["Method"] = "LoadVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(firstOp[i]) != BitConverter.SingleToInt32Bits(result[i])"}), + ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["Method"] = "LoadVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(firstOp[i]) != BitConverter.DoubleToInt64Bits(result[i])"}), + ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["Method"] = "LoadVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "firstOp[i] != result[i]"}), + ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["Method"] = "LoadVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "firstOp[i] != result[i]"}), + ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["Method"] = "LoadVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "firstOp[i] != result[i]"}), + ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["Method"] = "LoadVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "firstOp[i] != result[i]"}), + ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["Method"] = "LoadVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "firstOp[i] != result[i]"}), + ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["Method"] = "LoadVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "firstOp[i] != result[i]"}), + ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["Method"] = "LoadVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "firstOp[i] != result[i]"}), + ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["Method"] = "LoadVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "firstOp[i] != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Or", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "(byte)(left[0] | right[0]) != result[0]", ["ValidateRemainingResults"] = "(byte)(left[i] | right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Or", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "(BitConverter.DoubleToInt64Bits(left[0]) | BitConverter.DoubleToInt64Bits(right[0])) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.DoubleToInt64Bits(left[i]) | BitConverter.DoubleToInt64Bits(right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Or", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "(short)(left[0] | right[0]) != result[0]", ["ValidateRemainingResults"] = "(short)(left[i] | right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Or", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "(int)(left[0] | right[0]) != result[0]", ["ValidateRemainingResults"] = "(int)(left[i] | right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Or", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "(long)(left[0] | right[0]) != result[0]", ["ValidateRemainingResults"] = "(long)(left[i] | right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Or", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "(sbyte)(left[0] | right[0]) != result[0]", ["ValidateRemainingResults"] = "(sbyte)(left[i] | right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Or", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "(BitConverter.SingleToInt32Bits(left[0]) | BitConverter.SingleToInt32Bits(right[0])) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(left[i]) | BitConverter.SingleToInt32Bits(right[i])) != BitConverter.SingleToInt32Bits(result[i])"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Or", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "(ushort)(left[0] | right[0]) != result[0]", ["ValidateRemainingResults"] = "(ushort)(left[i] | right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Or", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "(uint)(left[0] | right[0]) != result[0]", ["ValidateRemainingResults"] = "(uint)(left[i] | right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Or", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "(ulong)(left[0] | right[0]) != result[0]", ["ValidateRemainingResults"] = "(ulong)(left[i] | right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Xor", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "(byte)(left[0] ^ right[0]) != result[0]", ["ValidateRemainingResults"] = "(byte)(left[i] ^ right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Xor", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "(BitConverter.SingleToInt32Bits(left[0]) ^ BitConverter.SingleToInt32Bits(right[0])) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(left[i]) ^ BitConverter.SingleToInt32Bits(right[i])) != BitConverter.SingleToInt32Bits(result[i])"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Xor", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "(short)(left[0] ^ right[0]) != result[0]", ["ValidateRemainingResults"] = "(short)(left[i] ^ right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Xor", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "(int)(left[0] ^ right[0]) != result[0]", ["ValidateRemainingResults"] = "(int)(left[i] ^ right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Xor", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "(long)(left[0] ^ right[0]) != result[0]", ["ValidateRemainingResults"] = "(long)(left[i] ^ right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Xor", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "(sbyte)(left[0] ^ right[0]) != result[0]", ["ValidateRemainingResults"] = "(sbyte)(left[i] ^ right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Xor", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "(BitConverter.DoubleToInt64Bits(left[0]) ^ BitConverter.DoubleToInt64Bits(right[0])) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.DoubleToInt64Bits(left[i]) ^ BitConverter.DoubleToInt64Bits(right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Xor", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "(ushort)(left[0] ^ right[0]) != result[0]", ["ValidateRemainingResults"] = "(ushort)(left[i] ^ right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Xor", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "(uint)(left[0] ^ right[0]) != result[0]", ["ValidateRemainingResults"] = "(uint)(left[i] ^ right[i]) != result[i]"}), + ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Xor", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "(ulong)(left[0] ^ right[0]) != result[0]", ["ValidateRemainingResults"] = "(ulong)(left[i] ^ right[i]) != result[i]"}), +}; + (string templateFileName, Dictionary templateData)[] Fma_Vector128Inputs = new [] { ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Fma", ["LoadIsa"] = "Sse2", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(Math.Round(double.FusedMultiplyAdd(firstOp[0], secondOp[0], thirdOp[0]), 9)) != BitConverter.DoubleToInt64Bits(Math.Round(result[0], 9))", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(Math.Round(double.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]), 9)) != BitConverter.DoubleToInt64Bits(Math.Round(result[i], 9))"}), @@ -1316,6 +1371,7 @@ bool isImmTemplate(string name) ProcessInputs("Avx1_Vector128", Avx1_Vector128Inputs); ProcessInputs("Avx2", Avx2Inputs); ProcessInputs("Avx2_Vector128", Avx2_Vector128Inputs); +ProcessInputs("Avx512F", Avx512FInputs); ProcessInputs("Fma_Vector128", Fma_Vector128Inputs); ProcessInputs("Fma_Vector256", Fma_Vector256Inputs); ProcessInputs("Bmi1", Bmi1Inputs); diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_handwritten_r.csproj new file mode 100644 index 0000000000000..3409daf6a63e7 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_handwritten_r.csproj @@ -0,0 +1,20 @@ + + + X86_Avx512F_handwritten_r + false + true + + + Embedded + + + + + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_handwritten_ro.csproj new file mode 100644 index 0000000000000..21cf81645b373 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_handwritten_ro.csproj @@ -0,0 +1,20 @@ + + + X86_Avx512F_handwritten_ro + false + true + + + Embedded + True + + + + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_r.csproj new file mode 100644 index 0000000000000..92f1c39cf18e3 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_r.csproj @@ -0,0 +1,15 @@ + + + X86_Avx512F_r + false + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_ro.csproj new file mode 100644 index 0000000000000..bc5fee89f90fa --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Avx512F_ro.csproj @@ -0,0 +1,15 @@ + + + X86_Avx512F_ro + false + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/LoadAlignedVector512.cs b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/LoadAlignedVector512.cs new file mode 100644 index 0000000000000..7fdf127fbacaa --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/LoadAlignedVector512.cs @@ -0,0 +1,267 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; +using Xunit; + +namespace IntelHardwareIntrinsicTest.Avx512F +{ + public partial class Program + { + [Fact] + public static unsafe void LoadAlignedVector512() + { + int testResult = Pass; + + if (Avx512F.IsSupported) + { + { + byte* inBuffer = stackalloc byte[128]; + float* inArray = (float*)Align(inBuffer, 64); + float* outArray = stackalloc float[16]; + var vf = Avx512F.LoadAlignedVector512(inArray); + Unsafe.Write(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (BitConverter.SingleToInt32Bits(inArray[i]) != BitConverter.SingleToInt32Bits(outArray[i])) + { + Console.WriteLine("AVX512F LoadAlignedVector512 failed on float:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inBuffer = stackalloc byte[128]; + double* inArray = (double*)Align(inBuffer, 64); + double* outArray = stackalloc double[8]; + var vf = Avx512F.LoadAlignedVector512(inArray); + Unsafe.Write(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (BitConverter.DoubleToInt64Bits(inArray[i]) != BitConverter.DoubleToInt64Bits(outArray[i])) + { + Console.WriteLine("AVX512F LoadAlignedVector512 failed on double:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inBuffer = stackalloc byte[128]; + int* inArray = (int*)Align(inBuffer, 64); + int* outArray = stackalloc int[16]; + var vf = Avx512F.LoadAlignedVector512(inArray); + Unsafe.Write(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F LoadAlignedVector512 failed on int:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inBuffer = stackalloc byte[128]; + long* inArray = (long*)Align(inBuffer, 64); + long* outArray = stackalloc long[8]; + var vf = Avx512F.LoadAlignedVector512(inArray); + Unsafe.Write(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F LoadAlignedVector512 failed on long:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inBuffer = stackalloc byte[128]; + uint* inArray = (uint*)Align(inBuffer, 64); + uint* outArray = stackalloc uint[16]; + var vf = Avx512F.LoadAlignedVector512(inArray); + Unsafe.Write(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F LoadAlignedVector512 failed on uint:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inBuffer = stackalloc byte[128]; + ulong* inArray = (ulong*)Align(inBuffer, 64); + ulong* outArray = stackalloc ulong[8]; + var vf = Avx512F.LoadAlignedVector512(inArray); + Unsafe.Write(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F LoadAlignedVector512 failed on ulong:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inBuffer = stackalloc byte[128]; + short* inArray = (short*)Align(inBuffer, 64); + short* outArray = stackalloc short[32]; + var vf = Avx512F.LoadAlignedVector512(inArray); + Unsafe.Write(outArray, vf); + + for (var i = 0; i < 32; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F LoadAlignedVector512 failed on short:"); + for (var n = 0; n < 32; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inBuffer = stackalloc byte[128]; + ushort* inArray = (ushort*)Align(inBuffer, 64); + ushort* outArray = stackalloc ushort[32]; + var vf = Avx512F.LoadAlignedVector512(inArray); + Unsafe.Write(outArray, vf); + + for (var i = 0; i < 32; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F LoadAlignedVector512 failed on ushort:"); + for (var n = 0; n < 32; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inBuffer = stackalloc byte[128]; + sbyte* inArray = (sbyte*)Align(inBuffer, 64); + sbyte* outArray = stackalloc sbyte[64]; + var vf = Avx512F.LoadAlignedVector512(inArray); + Unsafe.Write(outArray, vf); + + for (var i = 0; i < 64; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F LoadAlignedVector512 failed on sbyte:"); + for (var n = 0; n < 64; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inBuffer = stackalloc byte[128]; + byte* inArray = (byte*)Align(inBuffer, 64); + byte* outArray = stackalloc byte[64]; + var vf = Avx512F.LoadAlignedVector512(inArray); + Unsafe.Write(outArray, vf); + + for (var i = 0; i < 64; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F LoadAlignedVector512 failed on byte:"); + for (var n = 0; n < 64; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + } + + Assert.Equal(Pass, testResult); + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Program.Avx512F.cs b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Program.Avx512F.cs new file mode 100644 index 0000000000000..b0a8b60c9b15b --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Program.Avx512F.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace JIT.HardwareIntrinsics.X86._Avx512F +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Store.cs b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Store.cs new file mode 100644 index 0000000000000..f5ac48b6bf7ea --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Store.cs @@ -0,0 +1,198 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; +using Xunit; + +namespace IntelHardwareIntrinsicTest.Avx512F +{ + public partial class Program + { + [Fact] + public static unsafe void Store() + { + int testResult = Pass; + + if (Avx512F.IsSupported) + { + using (TestTable doubleTable = new TestTable(new double[8] { 1, -5, 100, 0, 1, 2, 3, 4 }, new double[8])) + { + var vf = Unsafe.Read>(doubleTable.inArrayPtr); + Avx512F.Store((double*)(doubleTable.outArrayPtr), vf); + + if (!doubleTable.CheckResult((x, y) => BitConverter.DoubleToInt64Bits(x) == BitConverter.DoubleToInt64Bits(y))) + { + Console.WriteLine("AVX512F Store failed on double:"); + foreach (var item in doubleTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable floatTable = new TestTable(new float[16] { 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4 }, new float[16])) + { + var vf = Unsafe.Read>(floatTable.inArrayPtr); + Avx512F.Store((float*)(floatTable.outArrayPtr), vf); + + if (!floatTable.CheckResult((x, y) => BitConverter.SingleToInt32Bits(x) == BitConverter.SingleToInt32Bits(y))) + { + Console.WriteLine("AVX512F Store failed on float:"); + foreach (var item in floatTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new long[8] { 1, -5, 100, 0, 1, 2, 3, 4 }, new long[8])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx512F.Store((long*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((long x, long y) => x == y)) + { + Console.WriteLine("AVX512F Store failed on long:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new ulong[8] { 1, 5, 100, 0, 1, 2, 3, 4 }, new ulong[8])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx512F.Store((ulong*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((ulong x, ulong y) => x == y)) + { + Console.WriteLine("AVX512F Store failed on ulong:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new int[16] { 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4 }, new int[16])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx512F.Store((int*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((int x, int y) => x == y)) + { + Console.WriteLine("AVX512F Store failed on int:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new uint[16] { 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4 }, new uint[16])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx512F.Store((uint*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((uint x, uint y) => x == y)) + { + Console.WriteLine("AVX512F Store failed on uint:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new short[32] { 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4. 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4 }, new short[32])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx512F.Store((short*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((short x, short y) => x == y)) + { + Console.WriteLine("AVX512F Store failed on short:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new ushort[32] { 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4 }, new ushort[32])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx512F.Store((ushort*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((ushort x, ushort y) => x == y)) + { + Console.WriteLine("AVX512F Store failed on ushort:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new sbyte[64] { 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4 }, new sbyte[64])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx512F.Store((sbyte*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((sbyte x, sbyte y) => x == y)) + { + Console.WriteLine("AVX512F Store failed on sbyte:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + using (TestTable intTable = new TestTable(new byte[64] { 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4, 1, 5, 100, 0, 1, 2, 3, 4 }, new byte[64])) + { + var vf = Unsafe.Read>(intTable.inArrayPtr); + Avx512F.Store((byte*)(intTable.outArrayPtr), vf); + + if (!intTable.CheckResult((byte x, byte y) => x == y)) + { + Console.WriteLine("AVX512F Store failed on byte:"); + foreach (var item in intTable.outArray) + { + Console.Write(item + ", "); + } + Console.WriteLine(); + testResult = Fail; + } + } + + } + + Assert.Equal(Pass, testResult); + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAligned.cs b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAligned.cs new file mode 100644 index 0000000000000..c36a603487f84 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAligned.cs @@ -0,0 +1,277 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; +using Xunit; + +namespace IntelHardwareIntrinsicTest.Avx512F +{ + public partial class Program + { + [Fact] + public static unsafe void StoreAligned() + { + int testResult = Pass; + + if (Avx512F.IsSupported) + { + { + double* inArray = stackalloc double[8]; + byte* outBuffer = stackalloc byte[128]; + double* outArray = (double*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAligned(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (BitConverter.DoubleToInt64Bits(inArray[i]) != BitConverter.DoubleToInt64Bits(outArray[i])) + { + Console.WriteLine("AVX512F StoreAligned failed on double:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + float* inArray = stackalloc float[16]; + byte* outBuffer = stackalloc byte[128]; + float* outArray = (float*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAligned(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (BitConverter.SingleToInt32Bits(inArray[i]) != BitConverter.SingleToInt32Bits(outArray[i])) + { + Console.WriteLine("AVX512F StoreAligned failed on float:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + long* inArray = stackalloc long[8]; + byte* outBuffer = stackalloc byte[128]; + long* outArray = (long*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAligned(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAligned failed on long:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + ulong* inArray = stackalloc ulong[8]; + byte* outBuffer = stackalloc byte[128]; + ulong* outArray = (ulong*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAligned(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAligned failed on ulong:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + int* inArray = stackalloc int[16]; + byte* outBuffer = stackalloc byte[128]; + int* outArray = (int*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAligned(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAligned failed on int:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + uint* inArray = stackalloc uint[16]; + byte* outBuffer = stackalloc byte[128]; + uint* outArray = (uint*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAligned(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAligned failed on uint:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + short* inArray = stackalloc short[32]; + byte* outBuffer = stackalloc byte[128]; + short* outArray = (short*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAligned(outArray, vf); + + for (var i = 0; i < 32; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAligned failed on short:"); + for (var n = 0; n < 32; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + ushort* inArray = stackalloc ushort[32]; + byte* outBuffer = stackalloc byte[128]; + ushort* outArray = (ushort*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAligned(outArray, vf); + + for (var i = 0; i < 32; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAligned failed on ushort:"); + for (var n = 0; n < 32; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inArray = stackalloc byte[64]; + byte* outBuffer = stackalloc byte[128]; + byte* outArray = (byte*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAligned(outArray, vf); + + for (var i = 0; i < 64; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAligned failed on byte:"); + for (var n = 0; n < 64; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + sbyte* inArray = stackalloc sbyte[64]; + byte* outBuffer = stackalloc byte[128]; + sbyte* outArray = (sbyte*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAligned(outArray, vf); + + for (var i = 0; i < 64; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAligned failed on byte:"); + for (var n = 0; n < 64; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + } + + Assert.Equal(Pass, testResult); + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAlignedNonTemporal.cs b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAlignedNonTemporal.cs new file mode 100644 index 0000000000000..b0805c717eba7 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAlignedNonTemporal.cs @@ -0,0 +1,277 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; +using Xunit; + +namespace IntelHardwareIntrinsicTest.Avx512F +{ + public partial class Program + { + [Fact] + public static unsafe void StoreAlignedNonTemporal() + { + int testResult = Pass; + + if (Avx512F.IsSupported) + { + { + double* inArray = stackalloc double[8]; + byte* outBuffer = stackalloc byte[128]; + double* outArray = (double*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (BitConverter.DoubleToInt64Bits(inArray[i]) != BitConverter.DoubleToInt64Bits(outArray[i])) + { + Console.WriteLine("AVX512F StoreAlignedNonTemporal failed on double:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + float* inArray = stackalloc float[16]; + byte* outBuffer = stackalloc byte[128]; + float* outArray = (float*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (BitConverter.SingleToInt32Bits(inArray[i]) != BitConverter.SingleToInt32Bits(outArray[i])) + { + Console.WriteLine("AVX512F StoreAlignedNonTemporal failed on float:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + long* inArray = stackalloc long[8]; + byte* outBuffer = stackalloc byte[128]; + long* outArray = (long*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAlignedNonTemporal failed on long:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + ulong* inArray = stackalloc ulong[8]; + byte* outBuffer = stackalloc byte[128]; + ulong* outArray = (ulong*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 8; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAlignedNonTemporal failed on ulong:"); + for (var n = 0; n < 8; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + int* inArray = stackalloc int[16]; + byte* outBuffer = stackalloc byte[128]; + int* outArray = (int*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAlignedNonTemporal failed on int:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + uint* inArray = stackalloc uint[16]; + byte* outBuffer = stackalloc byte[128]; + uint* outArray = (uint*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 16; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAlignedNonTemporal failed on uint:"); + for (var n = 0; n < 16; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + short* inArray = stackalloc short[32]; + byte* outBuffer = stackalloc byte[128]; + short* outArray = (short*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 32; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAlignedNonTemporal failed on short:"); + for (var n = 0; n < 32; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + ushort* inArray = stackalloc ushort[32]; + byte* outBuffer = stackalloc byte[128]; + ushort* outArray = (ushort*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 32; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAlignedNonTemporal failed on ushort:"); + for (var n = 0; n < 32; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + byte* inArray = stackalloc byte[64]; + byte* outBuffer = stackalloc byte[128]; + byte* outArray = (byte*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 64; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAlignedNonTemporal failed on byte:"); + for (var n = 0; n < 64; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + sbyte* inArray = stackalloc sbyte[64]; + byte* outBuffer = stackalloc byte[128]; + sbyte* outArray = (sbyte*)Align(outBuffer, 64); + + var vf = Unsafe.Read>(inArray); + Avx512F.StoreAlignedNonTemporal(outArray, vf); + + for (var i = 0; i < 64; i++) + { + if (inArray[i] != outArray[i]) + { + Console.WriteLine("AVX512F StoreAlignedNonTemporal failed on byte:"); + for (var n = 0; n < 64; n++) + { + Console.Write(outArray[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + } + + Assert.Equal(Pass, testResult); + } + } +} From 45fc6537e78306b446f9591159a8765e27d6e1c3 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 13 Mar 2023 14:28:19 -0700 Subject: [PATCH 3/7] Fix the "throw new PlatformNotSupported" expressions for Avx512F --- .../X86/Avx512F.PlatformNotSupported.cs | 180 +++++++++--------- 1 file changed, 90 insertions(+), 90 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs index c74496886ddfa..63f62aff615aa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs @@ -34,459 +34,459 @@ internal X64() { } /// __m512i _mm512_and_si512 (__m512i a, __m512i b) /// VPAND zmm, zmm, zmm/m512 /// - public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) /// VPAND zmm, zmm, zmm/m512 /// - public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) /// VPAND zmm, zmm, zmm/m512 /// - public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) /// VPAND zmm, zmm, zmm/m512 /// - public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) /// VPAND zmm, zmm, zmm/m512 /// - public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) /// VPAND zmm, zmm, zmm/m512 /// - public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) /// VPAND zmm, zmm, zmm/m512 /// - public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) /// VPAND zmm, zmm, zmm/m512 /// - public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512 _mm512_and_ps (__m512 a, __m512 b) /// VANDPS zmm, zmm, zmm/m512 /// - public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512d _mm512_and_pd (__m512d a, __m512d b) /// VANDPD zmm, zmm, zmm/m512 /// - public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) /// VPANDN zmm, zmm, zmm/m512 /// - public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) /// VPANDN zmm, zmm, zmm/m512 /// - public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) /// VPANDN zmm, zmm, zmm/m512 /// - public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) /// VPANDN zmm, zmm, zmm/m512 /// - public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) /// VPANDN zmm, zmm, zmm/m512 /// - public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) /// VPANDN zmm, zmm, zmm/m512 /// - public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) /// VPANDN zmm, zmm, zmm/m512 /// - public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) /// VPANDN zmm, zmm, zmm/m512 /// - public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512 _mm512_andnot_ps (__m512 a, __m512 b) /// VANDNPS zmm, zmm, zmm/m512 /// - public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512d _mm512_andnot_pd (__m512d a, __m512d b) /// VANDNPD zmm, zmm, zmm/m512 /// - public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) /// VMOVDQU32 zmm, m512 /// - public static unsafe Vector512 LoadVector512(sbyte* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadVector512(sbyte* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) /// VMOVDQU32 zmm, m512 /// - public static unsafe Vector512 LoadVector512(byte* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadVector512(byte* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) /// VMOVDQU32 zmm, m512 /// - public static unsafe Vector512 LoadVector512(short* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadVector512(short* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) /// VMOVDQU32 zmm, m512 /// - public static unsafe Vector512 LoadVector512(ushort* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadVector512(ushort* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) /// VMOVDQU32 zmm, m512 /// - public static unsafe Vector512 LoadVector512(int* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadVector512(int* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) /// VMOVDQU32 zmm, m512 /// - public static unsafe Vector512 LoadVector512(uint* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadVector512(uint* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) /// VMOVDQU64 zmm, m512 /// - public static unsafe Vector512 LoadVector512(long* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadVector512(long* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) /// VMOVDQU64 zmm, m512 /// - public static unsafe Vector512 LoadVector512(ulong* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadVector512(ulong* address) { throw new PlatformNotSupportedException(); } /// /// __m512 _mm512_loadu_ps (float const * mem_addr) /// VMOVUPS zmm, zmm/m512 /// - public static unsafe Vector512 LoadVector512(float* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadVector512(float* address) { throw new PlatformNotSupportedException(); } /// /// __m512d _mm512_loadu_pd (double const * mem_addr) /// VMOVUPD zmm, zmm/m512 /// - public static unsafe Vector512 LoadVector512(double* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadVector512(double* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) /// VMOVDQA32 zmm, m512 /// - public static unsafe Vector512 LoadAlignedVector512(sbyte* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadAlignedVector512(sbyte* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) /// VMOVDQA32 zmm, m512 /// - public static unsafe Vector512 LoadAlignedVector512(byte* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadAlignedVector512(byte* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) /// VMOVDQA32 zmm, m512 /// - public static unsafe Vector512 LoadAlignedVector512(short* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadAlignedVector512(short* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) /// VMOVDQA32 zmm, m512 /// - public static unsafe Vector512 LoadAlignedVector512(ushort* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadAlignedVector512(ushort* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) /// VMOVDQA32 zmm, m512 /// - public static unsafe Vector512 LoadAlignedVector512(int* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadAlignedVector512(int* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) /// VMOVDQA32 zmm, m512 /// - public static unsafe Vector512 LoadAlignedVector512(uint* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadAlignedVector512(uint* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) /// VMOVDQA64 zmm, m512 /// - public static unsafe Vector512 LoadAlignedVector512(long* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadAlignedVector512(long* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) /// VMOVDQA64 zmm, m512 /// - public static unsafe Vector512 LoadAlignedVector512(ulong* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadAlignedVector512(ulong* address) { throw new PlatformNotSupportedException(); } /// /// __m512 _mm512_load_ps (float const * mem_addr) /// VMOVAPS zmm, zmm/m512 /// - public static unsafe Vector512 LoadAlignedVector512(float* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadAlignedVector512(float* address) { throw new PlatformNotSupportedException(); } /// /// __m512d _mm512_load_pd (double const * mem_addr) /// VMOVAPD zmm, zmm/m512 /// - public static unsafe Vector512 LoadAlignedVector512(double* address) { throw new PlatformNotSupportedException; } + public static unsafe Vector512 LoadAlignedVector512(double* address) { throw new PlatformNotSupportedException(); } /// /// __m512 _mm512_or_ps (__m512 a, __m512 b) /// VORPS zmm, zmm, zmm/m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512d _mm512_or_pd (__m512d a, __m512d b) /// VORPD zmm, zmm, zmm/m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) /// VPOR zmm, zmm, zmm/m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) /// VPOR zmm, zmm, zmm/m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) /// VPOR zmm, zmm, zmm/m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) /// VPOR zmm, zmm, zmm/m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) /// VPOR zmm, zmm, zmm/m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) /// VPOR zmm, zmm, zmm/m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) /// VPOR zmm, zmm, zmm/m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) /// VPOR zmm, zmm, zmm/m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQU32 m512, zmm /// - public static unsafe void Store(sbyte* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void Store(sbyte* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQU32 m512, zmm /// - public static unsafe void Store(byte* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void Store(byte* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQU32 m512, zmm /// - public static unsafe void Store(short* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void Store(short* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQU32 m512, zmm /// - public static unsafe void Store(ushort* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void Store(ushort* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQU32 m512, zmm /// - public static unsafe void Store(int* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void Store(int* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQU32 m512, zmm /// - public static unsafe void Store(uint* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void Store(uint* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQU64 m512, zmm /// - public static unsafe void Store(long* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void Store(long* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQU64 m512, zmm /// - public static unsafe void Store(ulong* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void Store(ulong* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_ps (float * mem_addr, __m512 a) /// VMOVUPS m512, zmm /// - public static unsafe void Store(float* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void Store(float* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_pd (double * mem_addr, __m512d a) /// VMOVUPD m512, zmm /// - public static unsafe void Store(double* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void Store(double* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQA32 m512, zmm /// - public static unsafe void StoreAligned(sbyte* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAligned(sbyte* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQA32 m512, zmm /// - public static unsafe void StoreAligned(byte* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAligned(byte* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQA32 m512, zmm /// - public static unsafe void StoreAligned(short* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAligned(short* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQA32 m512, zmm /// - public static unsafe void StoreAligned(ushort* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAligned(ushort* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQA32 m512, zmm /// - public static unsafe void StoreAligned(int* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAligned(int* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQA32 m512, zmm /// - public static unsafe void StoreAligned(uint* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAligned(uint* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQA64 m512, zmm /// - public static unsafe void StoreAligned(long* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAligned(long* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) /// VMOVDQA64 m512, zmm /// - public static unsafe void StoreAligned(ulong* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAligned(ulong* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_ps (float * mem_addr, __m512 a) /// VMOVAPS m512, zmm /// - public static unsafe void StoreAligned(float* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAligned(float* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_pd (double * mem_addr, __m512d a) /// VMOVAPD m512, zmm /// - public static unsafe void StoreAligned(double* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAligned(double* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) /// VMOVNTDQ m512, zmm /// - public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) /// VMOVNTDQ m512, zmm /// - public static unsafe void StoreAlignedNonTemporal(byte* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAlignedNonTemporal(byte* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) /// VMOVNTDQ m512, zmm /// - public static unsafe void StoreAlignedNonTemporal(short* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAlignedNonTemporal(short* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) /// VMOVNTDQ m512, zmm /// - public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) /// VMOVNTDQ m512, zmm /// - public static unsafe void StoreAlignedNonTemporal(int* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAlignedNonTemporal(int* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) /// VMOVNTDQ m512, zmm /// - public static unsafe void StoreAlignedNonTemporal(uint* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAlignedNonTemporal(uint* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) /// VMOVNTDQ m512, zmm /// - public static unsafe void StoreAlignedNonTemporal(long* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAlignedNonTemporal(long* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) /// VMOVNTDQ m512, zmm /// - public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_ps (float * mem_addr, __m512 a) /// MOVNTPS m512, zmm /// - public static unsafe void StoreAlignedNonTemporal(float* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAlignedNonTemporal(float* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_pd (double * mem_addr, __m512d a) /// MOVNTPD m512, zmm /// - public static unsafe void StoreAlignedNonTemporal(double* address, Vector512 source) { throw new PlatformNotSupportedException; } + public static unsafe void StoreAlignedNonTemporal(double* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// __m512 _mm512_xor_ps (__m512 a, __m512 b) /// VXORPS zmm, zmm, zmm/m512 /// - public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512d _mm512_xor_pd (__m512d a, __m512d b) /// VXORPS zmm, zmm, zmm/m512 /// - public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) /// VPXOR zmm, zmm, zmm/m512 /// - public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) /// VPXOR zmm, zmm, zmm/m512 /// - public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) /// VPXOR zmm, zmm, zmm/m512 /// - public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) /// VPXOR zmm, zmm, zmm/m512 /// - public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) /// VPXOR zmm, zmm, zmm/m512 /// - public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) /// VPXOR zmm, zmm, zmm/m512 /// - public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) /// VPXOR zmm, zmm, zmm/m512 /// - public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) /// VPXOR zmm, zmm, zmm/m512 /// - public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException; } + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } } } From f57b16295eb4c99ba5ca368798f5e47c073a1165 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 13 Mar 2023 14:50:34 -0700 Subject: [PATCH 4/7] Fixing some test build failures --- .../X86/Avx512F/HandwrittenProgram.cs | 171 ++++++++++++++++++ .../X86/Avx512F/LoadAlignedVector512.cs | 2 +- .../HardwareIntrinsics/X86/Avx512F/Store.cs | 4 +- .../X86/Avx512F/StoreAligned.cs | 2 +- .../X86/Avx512F/StoreAlignedNonTemporal.cs | 2 +- 5 files changed, 176 insertions(+), 5 deletions(-) create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Avx512F/HandwrittenProgram.cs diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/HandwrittenProgram.cs b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/HandwrittenProgram.cs new file mode 100644 index 0000000000000..da191c1c431f7 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/HandwrittenProgram.cs @@ -0,0 +1,171 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; +using Xunit; + +namespace IntelHardwareIntrinsicTest._Avx512F +{ + public partial class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe void* Align(byte* buffer, byte expectedAlignment) + { + // Compute how bad the misalignment is, which is at most (expectedAlignment - 1). + // Then subtract that from the expectedAlignment and add it to the original address + // to compute the aligned address. + + var misalignment = expectedAlignment - ((ulong)(buffer) % expectedAlignment); + return (void*)(buffer + misalignment); + } + + public unsafe struct TestTable : IDisposable where T : struct + { + public T[] inArray; + public T[] outArray; + + public void* inArrayPtr => inHandle.AddrOfPinnedObject().ToPointer(); + public void* outArrayPtr => outHandle.AddrOfPinnedObject().ToPointer(); + + GCHandle inHandle; + GCHandle outHandle; + public TestTable(T[] a, T[] b) + { + this.inArray = a; + this.outArray = b; + + inHandle = GCHandle.Alloc(inArray, GCHandleType.Pinned); + outHandle = GCHandle.Alloc(outArray, GCHandleType.Pinned); + } + public bool CheckResult(Func check) + { + return check(inArray, outArray); + } + public bool CheckResult(Func check) + { + for (int i = 0; i < inArray.Length; i++) + { + if (!check(inArray[i], outArray[i])) + { + return false; + } + } + return true; + } + public void Dispose() + { + inHandle.Free(); + outHandle.Free(); + } + } + + public unsafe struct AlignedTestTable : IDisposable where T : struct + { + private byte[] inArray; + public T[] outArray; + + private GCHandle inHandle; + private GCHandle outHandle; + + private byte simdSize; + + public AlignedTestTable(T[] a, T[] b) + { + this.inArray = new byte[64]; + this.outArray = b; + + this.inHandle = GCHandle.Alloc(this.inArray, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.simdSize = 64; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArrayPtr), ref Unsafe.As(ref a[0]), this.simdSize); + } + + public void* inArrayPtr => Align((byte*)(inHandle.AddrOfPinnedObject().ToPointer()), simdSize); + public void* outArrayPtr => outHandle.AddrOfPinnedObject().ToPointer(); + + public bool CheckResult(Func check) + { + for (int i = 0; i < outArray.Length; i++) + { + if (!check(Unsafe.Add(ref Unsafe.AsRef(inArrayPtr), i), outArray[i])) + { + return false; + } + } + return true; + } + + public void Dispose() + { + inHandle.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, byte expectedAlignment) + { + // Compute how bad the misalignment is, which is at most (expectedAlignment - 1). + // Then subtract that from the expectedAlignment and add it to the original address + // to compute the aligned address. + + var misalignment = expectedAlignment - ((ulong)(buffer) % expectedAlignment); + return (void*)(buffer + misalignment); + } + } + + public unsafe struct TestTable_2Input : IDisposable where T : struct + { + public T[] inArray1; + public T[] inArray2; + public T[] outArray; + + public void* inArray1Ptr => inHandle1.AddrOfPinnedObject().ToPointer(); + public void* inArray2Ptr => inHandle2.AddrOfPinnedObject().ToPointer(); + public void* outArrayPtr => outHandle.AddrOfPinnedObject().ToPointer(); + + GCHandle inHandle1; + GCHandle inHandle2; + GCHandle outHandle; + public TestTable_2Input(T[] a, T[] b, T[] c) + { + this.inArray1 = a; + this.inArray2 = b; + this.outArray = c; + + inHandle1 = GCHandle.Alloc(inArray1, GCHandleType.Pinned); + inHandle2 = GCHandle.Alloc(inArray2, GCHandleType.Pinned); + outHandle = GCHandle.Alloc(outArray, GCHandleType.Pinned); + } + public bool CheckResult(Func check) + { + return check(inArray1, inArray2, outArray); + } + public bool CheckResult(Func check) + { + for (int i = 0; i < inArray1.Length; i++) + { + if (!check(inArray1[i], inArray2[i], outArray[i])) + { + return false; + } + } + return true; + } + + public void Dispose() + { + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/LoadAlignedVector512.cs b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/LoadAlignedVector512.cs index 7fdf127fbacaa..71083c3c1461f 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/LoadAlignedVector512.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/LoadAlignedVector512.cs @@ -9,7 +9,7 @@ using System.Runtime.Intrinsics; using Xunit; -namespace IntelHardwareIntrinsicTest.Avx512F +namespace IntelHardwareIntrinsicTest._Avx512F { public partial class Program { diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Store.cs b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Store.cs index f5ac48b6bf7ea..1c767abfa540a 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Store.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/Store.cs @@ -9,7 +9,7 @@ using System.Runtime.Intrinsics; using Xunit; -namespace IntelHardwareIntrinsicTest.Avx512F +namespace IntelHardwareIntrinsicTest._Avx512F { public partial class Program { @@ -122,7 +122,7 @@ public static unsafe void Store() } } - using (TestTable intTable = new TestTable(new short[32] { 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4. 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4 }, new short[32])) + using (TestTable intTable = new TestTable(new short[32] { 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4, 1, -5, 100, 0, 1, 2, 3, 4 }, new short[32])) { var vf = Unsafe.Read>(intTable.inArrayPtr); Avx512F.Store((short*)(intTable.outArrayPtr), vf); diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAligned.cs b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAligned.cs index c36a603487f84..2328c84a1e86f 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAligned.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAligned.cs @@ -9,7 +9,7 @@ using System.Runtime.Intrinsics; using Xunit; -namespace IntelHardwareIntrinsicTest.Avx512F +namespace IntelHardwareIntrinsicTest._Avx512F { public partial class Program { diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAlignedNonTemporal.cs b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAlignedNonTemporal.cs index b0805c717eba7..350940d814384 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAlignedNonTemporal.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/Avx512F/StoreAlignedNonTemporal.cs @@ -9,7 +9,7 @@ using System.Runtime.Intrinsics; using Xunit; -namespace IntelHardwareIntrinsicTest.Avx512F +namespace IntelHardwareIntrinsicTest._Avx512F { public partial class Program { From 7cb6fe3fa3f4bb2e73ee55a69ea20b0ddea42e6e Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 13 Mar 2023 15:28:02 -0700 Subject: [PATCH 5/7] Ensure the Avx512F and related classes can lightup in import --- src/coreclr/jit/compiler.cpp | 11 ++++- src/coreclr/jit/emitxarch.cpp | 8 ++++ src/coreclr/jit/hwintrinsic.cpp | 22 ++++++--- src/coreclr/jit/hwintrinsicxarch.cpp | 67 ++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index fc77d2d24487e..17df103713957 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2287,7 +2287,16 @@ void Compiler::compSetProcessor() instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW) && instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ)) { - if (!DoJitStressEvexEncoding()) + // Using JitStressEVEXEncoding flag will force instructions which would + // otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding + // This requires AVX512VL support. JitForceEVEXEncoding forces this encoding, thus + // causing failure if not running on compatible hardware. + + // We can't use !DoJitStressEvexEncoding() yet because opts.compSupportsISA hasn't + // been set yet as that's what we're trying to set here + + if (!JitConfig.JitForceEVEXEncoding() && !JitConfig.JitStressEvexEncoding() && + !instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)) { instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F); instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL); diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index a2704f301a7c9..e7f4b5d1cd5b3 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -17757,9 +17757,17 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_paddusw: case INS_psubusw: case INS_pand: + case INS_vpandd: + case INS_vpandq: case INS_pandn: + case INS_vpandnd: + case INS_vpandnq: case INS_por: + case INS_vpord: + case INS_vporq: case INS_pxor: + case INS_vpxord: + case INS_vpxorq: case INS_andpd: case INS_andps: case INS_andnpd: diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 17fd44b446338..46dbe9c0e1885 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -632,7 +632,7 @@ static bool isSupportedBaseType(NamedIntrinsic intrinsic, CorInfoType baseJitTyp #ifdef DEBUG CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsic); #ifdef TARGET_XARCH - assert((isa == InstructionSet_Vector256) || (isa == InstructionSet_Vector128)); + assert((isa == InstructionSet_Vector512) || (isa == InstructionSet_Vector256) || (isa == InstructionSet_Vector128)); #endif // TARGET_XARCH #ifdef TARGET_ARM64 assert((isa == InstructionSet_Vector64) || (isa == InstructionSet_Vector128)); @@ -976,11 +976,23 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, assert(numArgs >= 0); - if (!isScalar && ((HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType) == INS_invalid) || - ((simdSize != 8) && (simdSize != 16) && (simdSize != 32)))) + if (!isScalar) { - assert(!"Unexpected HW Intrinsic"); - return nullptr; + if (HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType) == INS_invalid) + { + assert(!"Unexpected HW intrinsic"); + return nullptr; + } + +#if defined(TARGET_ARM64) + if ((simdSize != 8) && (simdSize != 16)) +#elif defined(TARGET_XARCH) + if ((simdSize != 16) && (simdSize != 32) && (simdSize != 64)) +#endif // TARGET_* + { + assert(!"Unexpected SIMD size"); + return nullptr; + } } GenTree* op1 = nullptr; diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index f3eb33c52c13e..977bb1b320a87 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -36,6 +36,22 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_AVX_X64; case InstructionSet_AVX2: return InstructionSet_AVX2_X64; + case InstructionSet_AVX512BW: + return InstructionSet_AVX512BW_X64; + case InstructionSet_AVX512BW_VL: + return InstructionSet_AVX512BW_VL_X64; + case InstructionSet_AVX512CD: + return InstructionSet_AVX512CD_X64; + case InstructionSet_AVX512CD_VL: + return InstructionSet_AVX512CD_VL_X64; + case InstructionSet_AVX512DQ: + return InstructionSet_AVX512DQ_X64; + case InstructionSet_AVX512DQ_VL: + return InstructionSet_AVX512DQ_VL_X64; + case InstructionSet_AVX512F: + return InstructionSet_AVX512F_X64; + case InstructionSet_AVX512F_VL: + return InstructionSet_AVX512F_VL_X64; case InstructionSet_AVXVNNI: return InstructionSet_AVXVNNI_X64; case InstructionSet_AES: @@ -59,6 +75,31 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) } } +//------------------------------------------------------------------------ +// VLVersionOfIsa: Gets the corresponding AVX512VL only InstructionSet for a given InstructionSet +// +// Arguments: +// isa -- The InstructionSet ID +// +// Return Value: +// The AVX512VL only InstructionSet associated with isa +static CORINFO_InstructionSet VLVersionOfIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_AVX512BW: + return InstructionSet_AVX512BW_VL; + case InstructionSet_AVX512CD: + return InstructionSet_AVX512CD_VL; + case InstructionSet_AVX512DQ: + return InstructionSet_AVX512DQ_VL; + case InstructionSet_AVX512F: + return InstructionSet_AVX512F_VL; + default: + return InstructionSet_NONE; + } +} + //------------------------------------------------------------------------ // lookupInstructionSet: Gets the InstructionSet for a given class name // @@ -84,6 +125,22 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) { return InstructionSet_AVX2; } + if (strcmp(className, "Avx512BW") == 0) + { + return InstructionSet_AVX512BW; + } + if (strcmp(className, "Avx512CD") == 0) + { + return InstructionSet_AVX512CD; + } + if (strcmp(className, "Avx512DQ") == 0) + { + return InstructionSet_AVX512DQ; + } + if (strcmp(className, "Avx512F") == 0) + { + return InstructionSet_AVX512F; + } if (strcmp(className, "AvxVnni") == 0) { return InstructionSet_AVXVNNI; @@ -152,6 +209,11 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) { return InstructionSet_Vector512; } + else if (strcmp(className, "VL") == 0) + { + assert(!"VL.X64 support doesn't exist in the managed libraries and so is not yet implemented"); + return InstructionSet_ILLEGAL; + } } else if (strcmp(className, "Fma") == 0) { @@ -191,6 +253,11 @@ CORINFO_InstructionSet HWIntrinsicInfo::lookupIsa(const char* className, const c assert(enclosingClassName != nullptr); return X64VersionOfIsa(lookupInstructionSet(enclosingClassName)); } + else if (strcmp(className, "VL") == 0) + { + assert(enclosingClassName != nullptr); + return VLVersionOfIsa(lookupInstructionSet(enclosingClassName)); + } else { return lookupInstructionSet(className); From f5091575fed0a8159ddb438d8f2863445e88ca36 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 13 Mar 2023 16:32:36 -0700 Subject: [PATCH 6/7] Ensure that JitStressEvexEncoding is only checked in debug --- src/coreclr/jit/compiler.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 17df103713957..91ce978c2325d 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2287,6 +2287,7 @@ void Compiler::compSetProcessor() instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW) && instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ)) { +#if defined(DEBUG) // Using JitStressEVEXEncoding flag will force instructions which would // otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding // This requires AVX512VL support. JitForceEVEXEncoding forces this encoding, thus @@ -2295,8 +2296,14 @@ void Compiler::compSetProcessor() // We can't use !DoJitStressEvexEncoding() yet because opts.compSupportsISA hasn't // been set yet as that's what we're trying to set here - if (!JitConfig.JitForceEVEXEncoding() && !JitConfig.JitStressEvexEncoding() && - !instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)) + bool disableAvx512 = !JitConfig.JitForceEVEXEncoding() && !JitConfig.JitStressEvexEncoding() && + !instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL); +#else + // TODO-XARCH-AVX512: Allow AVX512 support by default + bool disableAvx512 = true; +#endif // DEBUG + + if (disableAvx512) { instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F); instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL); From 0fbec219971531cf81d3dc1bd35a101bfa0c0f2f Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 13 Mar 2023 20:27:12 -0700 Subject: [PATCH 7/7] Allow 64-bit alignment in the test data table types and fix the AVX512 enablement check --- src/coreclr/jit/compiler.cpp | 19 ++++++++++++------- .../Shared/ScalarSimdUnOpTest_DataTable.cs | 2 +- .../Shared/SimdScalarUnOpTest_DataTable.cs | 2 +- .../Shared/SimpleBinOpConvTest_DataTable.cs | 2 +- .../X86/Shared/SimpleBinOpTest_DataTable.cs | 2 +- .../X86/Shared/SimpleUnOpTest_DataTable.cs | 2 +- .../X86/Shared/_BinaryOpTestTemplate.template | 4 ++-- .../_BooleanBinaryOpTestTemplate.template | 2 +- .../_BooleanUnaryOpTestTemplate.template | 2 +- .../Shared/_TernaryOpTestTemplate.template | 2 +- .../X86/Shared/_UnaryOpTestTemplate.template | 2 +- 11 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 91ce978c2325d..896075a57aae8 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2287,7 +2287,6 @@ void Compiler::compSetProcessor() instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW) && instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ)) { -#if defined(DEBUG) // Using JitStressEVEXEncoding flag will force instructions which would // otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding // This requires AVX512VL support. JitForceEVEXEncoding forces this encoding, thus @@ -2296,14 +2295,20 @@ void Compiler::compSetProcessor() // We can't use !DoJitStressEvexEncoding() yet because opts.compSupportsISA hasn't // been set yet as that's what we're trying to set here - bool disableAvx512 = !JitConfig.JitForceEVEXEncoding() && !JitConfig.JitStressEvexEncoding() && - !instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL); -#else - // TODO-XARCH-AVX512: Allow AVX512 support by default - bool disableAvx512 = true; + bool enableAvx512 = false; + +#if defined(DEBUG) + if (JitConfig.JitForceEVEXEncoding()) + { + enableAvx512 = true; + } + else if (JitConfig.JitStressEvexEncoding() && instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)) + { + enableAvx512 = true; + } #endif // DEBUG - if (disableAvx512) + if (!enableAvx512) { instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F); instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL); diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/ScalarSimdUnOpTest_DataTable.cs b/src/tests/JIT/HardwareIntrinsics/X86/Shared/ScalarSimdUnOpTest_DataTable.cs index e6710bf2acdbf..4f11954a6b7e7 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/ScalarSimdUnOpTest_DataTable.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/ScalarSimdUnOpTest_DataTable.cs @@ -22,7 +22,7 @@ public unsafe struct ScalarSimdUnaryOpTest__DataTable : IDisposable public ScalarSimdUnaryOpTest__DataTable(TResult[] outArray, int alignment) { int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); - if ((alignment != 32 && alignment != 16) || (alignment * 2) < sizeOfoutArray) + if (((alignment != 64) && (alignment != 32) && (alignment != 16)) || (alignment * 2) < sizeOfoutArray) { throw new ArgumentException("Invalid value of alignment"); } diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimdScalarUnOpTest_DataTable.cs b/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimdScalarUnOpTest_DataTable.cs index 45ebbbd611fbb..a8a4299dbdc89 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimdScalarUnOpTest_DataTable.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimdScalarUnOpTest_DataTable.cs @@ -22,7 +22,7 @@ public unsafe struct SimdScalarUnaryOpTest__DataTable : IDisposable public SimdScalarUnaryOpTest__DataTable(TOp1[] inArray, int alignment) { int sizeOfinArray = inArray.Length * Unsafe.SizeOf(); - if ((alignment != 32 && alignment != 16) || (alignment * 2) < sizeOfinArray) + if (((alignment != 64) && (alignment != 32) && (alignment != 16)) || (alignment * 2) < sizeOfinArray) { throw new ArgumentException("Invalid value of alignment"); } diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleBinOpConvTest_DataTable.cs b/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleBinOpConvTest_DataTable.cs index b742c86651d00..b597715be2747 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleBinOpConvTest_DataTable.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleBinOpConvTest_DataTable.cs @@ -28,7 +28,7 @@ public SimpleBinaryOpConvTest__DataTable(TOp1[] inArray1, TOp2 inData2, TResult[ { int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); - if ((alignment != 32 && alignment != 16) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfoutArray) + if (((alignment != 64) && (alignment != 32) && (alignment != 16)) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfoutArray) { throw new ArgumentException("Invalid value of alignment"); } diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleBinOpTest_DataTable.cs b/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleBinOpTest_DataTable.cs index 7e1d770979990..2dbb29f05ecdd 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleBinOpTest_DataTable.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleBinOpTest_DataTable.cs @@ -30,7 +30,7 @@ public SimpleBinaryOpTest__DataTable(TOp1[] inArray1, TOp2[] inArray2, TResult[] int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); - if ((alignment != 32 && alignment != 16) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + if (((alignment != 64) && (alignment != 32) && (alignment != 16)) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) { throw new ArgumentException("Invalid value of alignment"); } diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleUnOpTest_DataTable.cs b/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleUnOpTest_DataTable.cs index afce1ceb95338..57e94d8442bf9 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleUnOpTest_DataTable.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleUnOpTest_DataTable.cs @@ -26,7 +26,7 @@ public SimpleUnaryOpTest__DataTable(TOp1[] inArray, TResult[] outArray, int alig { int sizeOfinArray = inArray.Length * Unsafe.SizeOf(); int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); - if ((alignment != 32 && alignment != 16) || (alignment * 2) < sizeOfinArray || (alignment * 2) < sizeOfoutArray) + if (((alignment != 64) && (alignment != 32) && (alignment != 16)) || (alignment * 2) < sizeOfinArray || (alignment * 2) < sizeOfoutArray) { throw new ArgumentException("Invalid value of alignment"); } diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BinaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BinaryOpTestTemplate.template index af493b4578e20..8f8ca99e58f16 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BinaryOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BinaryOpTestTemplate.template @@ -139,7 +139,7 @@ namespace JIT.HardwareIntrinsics.X86 int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op2BaseType}>(); int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); - if ((alignment != 32 && alignment != 16) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + if (((alignment != 64) && (alignment != 32) && (alignment != 16)) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) { throw new ArgumentException("Invalid value of alignment"); } @@ -487,7 +487,7 @@ namespace JIT.HardwareIntrinsics.X86 Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); } - + public void RunStructFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BooleanBinaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BooleanBinaryOpTestTemplate.template index 658ffb20089c0..8d708cd1640f6 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BooleanBinaryOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BooleanBinaryOpTestTemplate.template @@ -136,7 +136,7 @@ namespace JIT.HardwareIntrinsics.X86 { int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op2BaseType}>(); - if ((alignment != 32 && alignment != 16) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2) + if (((alignment != 64) && (alignment != 32) && (alignment != 16)) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2) { throw new ArgumentException("Invalid value of alignment"); } diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BooleanUnaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BooleanUnaryOpTestTemplate.template index 4d38f8440a7a7..fa8e8dc699258 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BooleanUnaryOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BooleanUnaryOpTestTemplate.template @@ -133,7 +133,7 @@ namespace JIT.HardwareIntrinsics.X86 public DataTable({Op1BaseType}[] inArray1, int alignment) { int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); - if ((alignment != 32 && alignment != 16) || (alignment * 2) < sizeOfinArray1) + if (((alignment != 64) && (alignment != 32) && (alignment != 16)) || (alignment * 2) < sizeOfinArray1) { throw new ArgumentException("Invalid value of alignment"); } diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template index dca1c5746d302..b4f1b20c62f68 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template @@ -142,7 +142,7 @@ namespace JIT.HardwareIntrinsics.X86 int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op2BaseType}>(); int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op3BaseType}>(); int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); - if ((alignment != 32 && alignment != 16) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfoutArray) + if (((alignment != 64) && (alignment != 32) && (alignment != 16)) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfoutArray) { throw new ArgumentException("Invalid value of alignment"); } diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_UnaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_UnaryOpTestTemplate.template index dc0256d59cb28..8ad5297280990 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_UnaryOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_UnaryOpTestTemplate.template @@ -136,7 +136,7 @@ namespace JIT.HardwareIntrinsics.X86 { int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); - if ((alignment != 32 && alignment != 16) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfoutArray) + if (((alignment != 64) && (alignment != 32) && (alignment != 16)) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfoutArray) { throw new ArgumentException("Invalid value of alignment"); }