From df00b7270f1804a1ef80e7bd70d344649bb9e11a Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Tue, 22 Oct 2024 19:10:37 -0700 Subject: [PATCH 01/15] add vpclmulqdq intrinsics --- src/coreclr/inc/clrconfigvalues.h | 1 + src/coreclr/inc/corinfoinstructionset.h | 250 +++++++++------- src/coreclr/inc/jiteeversionguid.h | 10 +- src/coreclr/inc/readytoruninstructionset.h | 1 + src/coreclr/jit/compiler.cpp | 10 + src/coreclr/jit/hwintrinsic.cpp | 2 + src/coreclr/jit/hwintrinsiclistxarch.h | 18 ++ src/coreclr/jit/hwintrinsicxarch.cpp | 33 ++- src/coreclr/jit/instrsxarch.h | 2 +- src/coreclr/jit/jitconfigvalues.h | 1 + src/coreclr/jit/lowerxarch.cpp | 14 + .../Compiler/HardwareIntrinsicHelpers.cs | 5 + .../Runtime/ReadyToRunInstructionSet.cs | 1 + .../Runtime/ReadyToRunInstructionSetHelper.cs | 4 + .../JitInterface/CorInfoInstructionSet.cs | 270 +++++++++++------- .../ThunkGenerator/InstructionSetDesc.txt | 6 + src/coreclr/vm/codeman.cpp | 5 + .../ILLink.Substitutions.NoX86Intrinsics.xml | 6 + .../X86/Pclmulqdq.PlatformNotSupported.cs | 42 +++ .../Runtime/Intrinsics/X86/Pclmulqdq.cs | 44 +++ .../ref/System.Runtime.Intrinsics.cs | 14 + src/native/minipal/cpufeatures.c | 5 + src/native/minipal/cpufeatures.h | 1 + .../GenerateHWIntrinsicTests_X86.cs | 50 +++- .../Pclmulqdq.V256/Pclmulqdq.V256_r.csproj | 14 + .../Pclmulqdq.V256/Pclmulqdq.V256_ro.csproj | 14 + .../Pclmulqdq.V256/Program.Pclmulqdq.V256.cs | 17 ++ .../Pclmulqdq.V512/Pclmulqdq.V512_r.csproj | 14 + .../Pclmulqdq.V512/Pclmulqdq.V512_ro.csproj | 14 + .../Pclmulqdq.V512/Program.Pclmulqdq.V512.cs | 17 ++ src/tests/issues.targets | 6 + 31 files changed, 652 insertions(+), 239 deletions(-) create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V256/Pclmulqdq.V256_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V256/Pclmulqdq.V256_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V256/Program.Pclmulqdq.V256.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V512/Pclmulqdq.V512_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V512/Pclmulqdq.V512_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V512/Program.Pclmulqdq.V512.cs diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 00d9745e177e7..f1291394a28db 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -780,6 +780,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"), RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableFMA, W("EnableFMA"), 1, "Allows FMA+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableLZCNT, W("EnableLZCNT"), 1, "Allows LZCNT+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePCLMULQDQ, W("EnablePCLMULQDQ"), 1, "Allows PCLMULQDQ+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableVPCLMULQDQ, W("EnableVPCLMULQDQ"), 1, "Allows VPCLMULQDQ+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableMOVBE, W("EnableMOVBE"), 1, "Allows MOVBE+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePOPCNT, W("EnablePOPCNT"), 1, "Allows POPCNT+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE, W("EnableSSE"), 1, "Allows SSE+ hardware intrinsics to be disabled") diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 43f2be795314f..fcb4b87bceaca 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -58,61 +58,63 @@ enum CORINFO_InstructionSet InstructionSet_FMA=13, InstructionSet_LZCNT=14, InstructionSet_PCLMULQDQ=15, - InstructionSet_POPCNT=16, - InstructionSet_Vector128=17, - InstructionSet_Vector256=18, - InstructionSet_Vector512=19, - InstructionSet_AVXVNNI=20, - InstructionSet_MOVBE=21, - InstructionSet_X86Serialize=22, - InstructionSet_EVEX=23, - InstructionSet_AVX512F=24, - InstructionSet_AVX512F_VL=25, - InstructionSet_AVX512BW=26, - InstructionSet_AVX512BW_VL=27, - InstructionSet_AVX512CD=28, - InstructionSet_AVX512CD_VL=29, - InstructionSet_AVX512DQ=30, - InstructionSet_AVX512DQ_VL=31, - InstructionSet_AVX512VBMI=32, - InstructionSet_AVX512VBMI_VL=33, - InstructionSet_AVX10v1=34, - InstructionSet_AVX10v1_V512=35, - InstructionSet_VectorT128=36, - InstructionSet_VectorT256=37, - InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_EVEX_X64=58, - InstructionSet_AVX512F_X64=59, - InstructionSet_AVX512F_VL_X64=60, - InstructionSet_AVX512BW_X64=61, - InstructionSet_AVX512BW_VL_X64=62, - InstructionSet_AVX512CD_X64=63, - InstructionSet_AVX512CD_VL_X64=64, - InstructionSet_AVX512DQ_X64=65, - InstructionSet_AVX512DQ_VL_X64=66, - InstructionSet_AVX512VBMI_X64=67, - InstructionSet_AVX512VBMI_VL_X64=68, - InstructionSet_AVX10v1_X64=69, - InstructionSet_AVX10v1_V512_X64=70, + InstructionSet_VPCLMULQDQ=16, + InstructionSet_VPCLMULQDQ_V512=17, + InstructionSet_POPCNT=18, + InstructionSet_Vector128=19, + InstructionSet_Vector256=20, + InstructionSet_Vector512=21, + InstructionSet_AVXVNNI=22, + InstructionSet_MOVBE=23, + InstructionSet_X86Serialize=24, + InstructionSet_EVEX=25, + InstructionSet_AVX512F=26, + InstructionSet_AVX512F_VL=27, + InstructionSet_AVX512BW=28, + InstructionSet_AVX512BW_VL=29, + InstructionSet_AVX512CD=30, + InstructionSet_AVX512CD_VL=31, + InstructionSet_AVX512DQ=32, + InstructionSet_AVX512DQ_VL=33, + InstructionSet_AVX512VBMI=34, + InstructionSet_AVX512VBMI_VL=35, + InstructionSet_AVX10v1=36, + InstructionSet_AVX10v1_V512=37, + InstructionSet_VectorT128=38, + InstructionSet_VectorT256=39, + InstructionSet_VectorT512=40, + InstructionSet_X86Base_X64=41, + InstructionSet_SSE_X64=42, + InstructionSet_SSE2_X64=43, + InstructionSet_SSE3_X64=44, + InstructionSet_SSSE3_X64=45, + InstructionSet_SSE41_X64=46, + InstructionSet_SSE42_X64=47, + InstructionSet_AVX_X64=48, + InstructionSet_AVX2_X64=49, + InstructionSet_AES_X64=50, + InstructionSet_BMI1_X64=51, + InstructionSet_BMI2_X64=52, + InstructionSet_FMA_X64=53, + InstructionSet_LZCNT_X64=54, + InstructionSet_PCLMULQDQ_X64=55, + InstructionSet_POPCNT_X64=56, + InstructionSet_AVXVNNI_X64=57, + InstructionSet_MOVBE_X64=58, + InstructionSet_X86Serialize_X64=59, + InstructionSet_EVEX_X64=60, + InstructionSet_AVX512F_X64=61, + InstructionSet_AVX512F_VL_X64=62, + InstructionSet_AVX512BW_X64=63, + InstructionSet_AVX512BW_VL_X64=64, + InstructionSet_AVX512CD_X64=65, + InstructionSet_AVX512CD_VL_X64=66, + InstructionSet_AVX512DQ_X64=67, + InstructionSet_AVX512DQ_VL_X64=68, + InstructionSet_AVX512VBMI_X64=69, + InstructionSet_AVX512VBMI_VL_X64=70, + InstructionSet_AVX10v1_X64=71, + InstructionSet_AVX10v1_V512_X64=72, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -130,61 +132,63 @@ enum CORINFO_InstructionSet InstructionSet_FMA=13, InstructionSet_LZCNT=14, InstructionSet_PCLMULQDQ=15, - InstructionSet_POPCNT=16, - InstructionSet_Vector128=17, - InstructionSet_Vector256=18, - InstructionSet_Vector512=19, - InstructionSet_AVXVNNI=20, - InstructionSet_MOVBE=21, - InstructionSet_X86Serialize=22, - InstructionSet_EVEX=23, - InstructionSet_AVX512F=24, - InstructionSet_AVX512F_VL=25, - InstructionSet_AVX512BW=26, - InstructionSet_AVX512BW_VL=27, - InstructionSet_AVX512CD=28, - InstructionSet_AVX512CD_VL=29, - InstructionSet_AVX512DQ=30, - InstructionSet_AVX512DQ_VL=31, - InstructionSet_AVX512VBMI=32, - InstructionSet_AVX512VBMI_VL=33, - InstructionSet_AVX10v1=34, - InstructionSet_AVX10v1_V512=35, - InstructionSet_VectorT128=36, - InstructionSet_VectorT256=37, - InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_EVEX_X64=58, - InstructionSet_AVX512F_X64=59, - InstructionSet_AVX512F_VL_X64=60, - InstructionSet_AVX512BW_X64=61, - InstructionSet_AVX512BW_VL_X64=62, - InstructionSet_AVX512CD_X64=63, - InstructionSet_AVX512CD_VL_X64=64, - InstructionSet_AVX512DQ_X64=65, - InstructionSet_AVX512DQ_VL_X64=66, - InstructionSet_AVX512VBMI_X64=67, - InstructionSet_AVX512VBMI_VL_X64=68, - InstructionSet_AVX10v1_X64=69, - InstructionSet_AVX10v1_V512_X64=70, + InstructionSet_VPCLMULQDQ=16, + InstructionSet_VPCLMULQDQ_V512=17, + InstructionSet_POPCNT=18, + InstructionSet_Vector128=19, + InstructionSet_Vector256=20, + InstructionSet_Vector512=21, + InstructionSet_AVXVNNI=22, + InstructionSet_MOVBE=23, + InstructionSet_X86Serialize=24, + InstructionSet_EVEX=25, + InstructionSet_AVX512F=26, + InstructionSet_AVX512F_VL=27, + InstructionSet_AVX512BW=28, + InstructionSet_AVX512BW_VL=29, + InstructionSet_AVX512CD=30, + InstructionSet_AVX512CD_VL=31, + InstructionSet_AVX512DQ=32, + InstructionSet_AVX512DQ_VL=33, + InstructionSet_AVX512VBMI=34, + InstructionSet_AVX512VBMI_VL=35, + InstructionSet_AVX10v1=36, + InstructionSet_AVX10v1_V512=37, + InstructionSet_VectorT128=38, + InstructionSet_VectorT256=39, + InstructionSet_VectorT512=40, + InstructionSet_X86Base_X64=41, + InstructionSet_SSE_X64=42, + InstructionSet_SSE2_X64=43, + InstructionSet_SSE3_X64=44, + InstructionSet_SSSE3_X64=45, + InstructionSet_SSE41_X64=46, + InstructionSet_SSE42_X64=47, + InstructionSet_AVX_X64=48, + InstructionSet_AVX2_X64=49, + InstructionSet_AES_X64=50, + InstructionSet_BMI1_X64=51, + InstructionSet_BMI2_X64=52, + InstructionSet_FMA_X64=53, + InstructionSet_LZCNT_X64=54, + InstructionSet_PCLMULQDQ_X64=55, + InstructionSet_POPCNT_X64=56, + InstructionSet_AVXVNNI_X64=57, + InstructionSet_MOVBE_X64=58, + InstructionSet_X86Serialize_X64=59, + InstructionSet_EVEX_X64=60, + InstructionSet_AVX512F_X64=61, + InstructionSet_AVX512F_VL_X64=62, + InstructionSet_AVX512BW_X64=63, + InstructionSet_AVX512BW_VL_X64=64, + InstructionSet_AVX512CD_X64=65, + InstructionSet_AVX512CD_VL_X64=66, + InstructionSet_AVX512DQ_X64=67, + InstructionSet_AVX512DQ_VL_X64=68, + InstructionSet_AVX512VBMI_X64=69, + InstructionSet_AVX512VBMI_VL_X64=70, + InstructionSet_AVX10v1_X64=71, + InstructionSet_AVX10v1_V512_X64=72, #endif // TARGET_X86 }; @@ -636,6 +640,14 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AES); if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ)) + resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_AVX)) + resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ)) + resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ_V512); + if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) @@ -742,6 +754,14 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AES); if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ)) + resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_AVX)) + resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ)) + resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ_V512); + if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) @@ -910,6 +930,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "PCLMULQDQ"; case InstructionSet_PCLMULQDQ_X64 : return "PCLMULQDQ_X64"; + case InstructionSet_VPCLMULQDQ : + return "VPCLMULQDQ"; + case InstructionSet_VPCLMULQDQ_V512 : + return "VPCLMULQDQ_V512"; case InstructionSet_POPCNT : return "POPCNT"; case InstructionSet_POPCNT_X64 : @@ -1022,6 +1046,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "LZCNT"; case InstructionSet_PCLMULQDQ : return "PCLMULQDQ"; + case InstructionSet_VPCLMULQDQ : + return "VPCLMULQDQ"; + case InstructionSet_VPCLMULQDQ_V512 : + return "VPCLMULQDQ_V512"; case InstructionSet_POPCNT : return "POPCNT"; case InstructionSet_Vector128 : @@ -1118,6 +1146,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Fma: return InstructionSet_FMA; case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT; case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; + case READYTORUN_INSTRUCTION_Pclmulqdq_V256: return InstructionSet_VPCLMULQDQ; case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; @@ -1155,6 +1184,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Fma: return InstructionSet_FMA; case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT; case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; + case READYTORUN_INSTRUCTION_Pclmulqdq_V256: return InstructionSet_VPCLMULQDQ; case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 44087c266f74f..b6651d7a13864 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* ac04f79d-8d06-4a15-9692-1b4f59265825 */ - 0xac04f79d, - 0x8d06, - 0x4a15, - {0x96, 0x92, 0x1b, 0x4f, 0x59, 0x26, 0x58, 0x25} +constexpr GUID JITEEVersionIdentifier = { /* b2a94b56-4259-41ef-93ae-5c757667ba32 */ + 0xb2a94b56, + 0x4259, + 0x41ef, + {0x93, 0xae, 0x5c, 0x75, 0x76, 0x67, 0xba, 0x32} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 4ad8c6b4e5912..1a57e69094d1c 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -55,6 +55,7 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Avx10v1=44, READYTORUN_INSTRUCTION_Avx10v1_V512=46, READYTORUN_INSTRUCTION_EVEX=47, + READYTORUN_INSTRUCTION_Pclmulqdq_V256=48, }; diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index cf40734b29300..d728be7f74427 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2262,6 +2262,11 @@ void Compiler::compSetProcessor() preferredVectorByteLength = 256 / 8; } + + if (instructionSetFlags.HasInstructionSet(InstructionSet_VPCLMULQDQ)) + { + instructionSetFlags.AddInstructionSet(InstructionSet_VPCLMULQDQ_V512); + } } else { @@ -6264,6 +6269,11 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ); } + if (JitConfig.EnableVPCLMULQDQ() != 0) + { + instructionSetFlags.AddInstructionSet(InstructionSet_VPCLMULQDQ); + } + if (JitConfig.EnablePOPCNT() != 0) { instructionSetFlags.AddInstructionSet(InstructionSet_POPCNT); diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 72b0550b7d82d..1631f0fcdd9e6 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -781,6 +781,8 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { FIRST_NI_FMA, LAST_NI_FMA }, { FIRST_NI_LZCNT, LAST_NI_LZCNT }, { FIRST_NI_PCLMULQDQ, LAST_NI_PCLMULQDQ }, + { FIRST_NI_VPCLMULQDQ, LAST_NI_VPCLMULQDQ }, + { FIRST_NI_VPCLMULQDQ_V512, LAST_NI_VPCLMULQDQ_V512 }, { FIRST_NI_POPCNT, LAST_NI_POPCNT }, { FIRST_NI_Vector128, LAST_NI_Vector128 }, { FIRST_NI_Vector256, LAST_NI_Vector256 }, diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index d44457c727c6f..4d592f80b2316 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1527,6 +1527,24 @@ HARDWARE_INTRINSIC(LZCNT_X64, LeadingZeroCount, HARDWARE_INTRINSIC(PCLMULQDQ, CarrylessMultiply, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) #define LAST_NI_PCLMULQDQ NI_PCLMULQDQ_CarrylessMultiply +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// VPCLMULQDQ Intrinsics +#define FIRST_NI_VPCLMULQDQ NI_VPCLMULQDQ_CarrylessMultiply +HARDWARE_INTRINSIC(VPCLMULQDQ, CarrylessMultiply, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +#define LAST_NI_VPCLMULQDQ NI_VPCLMULQDQ_CarrylessMultiply + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// VPCLMULQDQ_V512 Intrinsics +#define FIRST_NI_VPCLMULQDQ_V512 NI_VPCLMULQDQ_V512_CarrylessMultiply +HARDWARE_INTRINSIC(VPCLMULQDQ_V512, CarrylessMultiply, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +#define LAST_NI_VPCLMULQDQ_V512 NI_VPCLMULQDQ_V512_CarrylessMultiply + // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index a833b5733d52c..d4b0cd4f5a660 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -113,13 +113,32 @@ static CORINFO_InstructionSet VLVersionOfIsa(CORINFO_InstructionSet isa) } //------------------------------------------------------------------------ -// V512VersionOfIsa: Gets the corresponding AVX10V512 only InstructionSet for a given InstructionSet +// V256VersionOfIsa: Gets the corresponding V256 only InstructionSet for a given InstructionSet // // Arguments: // isa -- The InstructionSet ID // // Return Value: -// The AVX10V512 only InstructionSet associated with isa +// The V256 only InstructionSet associated with isa +static CORINFO_InstructionSet V256VersionOfIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_PCLMULQDQ: + return InstructionSet_VPCLMULQDQ; + default: + return InstructionSet_NONE; + } +} + +//------------------------------------------------------------------------ +// V512VersionOfIsa: Gets the corresponding V512 only InstructionSet for a given InstructionSet +// +// Arguments: +// isa -- The InstructionSet ID +// +// Return Value: +// The V512 only InstructionSet associated with isa static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) { switch (isa) @@ -128,6 +147,8 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_AVX10v1_V512; case InstructionSet_AVX10v1_X64: return InstructionSet_AVX10v1_V512_X64; + case InstructionSet_PCLMULQDQ: + return InstructionSet_VPCLMULQDQ_V512; default: return InstructionSet_NONE; } @@ -340,7 +361,11 @@ CORINFO_InstructionSet HWIntrinsicInfo::lookupIsa(const char* className, if (className[0] == 'V') { - if (strcmp(className, "V512") == 0) + if (strcmp(className, "V256") == 0) + { + return V256VersionOfIsa(enclosingIsa); + } + else if (strcmp(className, "V512") == 0) { return V512VersionOfIsa(enclosingIsa); } @@ -862,6 +887,8 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_LZCNT_X64: case InstructionSet_PCLMULQDQ: case InstructionSet_PCLMULQDQ_X64: + case InstructionSet_VPCLMULQDQ: + case InstructionSet_VPCLMULQDQ_V512: case InstructionSet_POPCNT: case InstructionSet_POPCNT_X64: case InstructionSet_SSE: diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index f365880bcbf05..822d4e21809e2 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -400,7 +400,7 @@ INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE, INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_TT_NONE, REX_WIG | Encoding_VEX) // Perform the AES InvMixColumn Transformation INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_TT_NONE, REX_WIG | Encoding_VEX) // AES Round Key Generation Assist -INST3(pclmulqdq, "pclmulqdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords +INST3(pclmulqdq, "pclmulqdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_FULL_MEM, Input_64Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords // SSE4.1 INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 23158d49342de..c84fe6e1bf8f5 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -406,6 +406,7 @@ RELEASE_CONFIG_INTEGER(EnableBMI2, W("EnableBMI2"), RELEASE_CONFIG_INTEGER(EnableFMA, W("EnableFMA"), 1) // Allows FMA+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableLZCNT, W("EnableLZCNT"), 1) // Allows LZCNT+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnablePCLMULQDQ, W("EnablePCLMULQDQ"), 1) // Allows PCLMULQDQ+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableVPCLMULQDQ, W("EnableVPCLMULQDQ"), 1) // Allows VPCLMULQDQ+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnablePOPCNT, W("EnablePOPCNT"), 1) // Allows POPCNT+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableSSE, W("EnableSSE"), 1) // Allows SSE+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableSSE2, W("EnableSSE2"), 1) // Allows SSE2+ hardware intrinsics to be disabled diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 1da40d1dc453b..7f4f0f237cc4b 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -2592,6 +2592,16 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return LowerHWIntrinsicTernaryLogic(node); } + case NI_PCLMULQDQ_CarrylessMultiply: + { + // The EVEX form of 128-bit pclmulqdq requires VPCLMULQDQ in addition to AVX512VL + if (comp->compOpportunisticallyDependsOn(InstructionSet_VPCLMULQDQ)) + { + intrinsicId = NI_VPCLMULQDQ_CarrylessMultiply; + node->ChangeHWIntrinsicId(intrinsicId); + } + } + default: break; } @@ -9341,6 +9351,8 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_SSE41_MultipleSumAbsoluteDifferences: case NI_AES_KeygenAssist: case NI_PCLMULQDQ_CarrylessMultiply: + case NI_VPCLMULQDQ_CarrylessMultiply: + case NI_VPCLMULQDQ_V512_CarrylessMultiply: case NI_AVX_Blend: case NI_AVX_Compare: case NI_AVX_DotProduct: @@ -11377,6 +11389,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX512DQ_VL_Range: case NI_AVX512DQ_ReduceScalar: case NI_PCLMULQDQ_CarrylessMultiply: + case NI_VPCLMULQDQ_CarrylessMultiply: + case NI_VPCLMULQDQ_V512_CarrylessMultiply: case NI_AVX10v1_AlignRight32: case NI_AVX10v1_AlignRight64: case NI_AVX10v1_GetMantissaScalar: diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index d00bca1109b20..1c11b1cb0b480 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -78,6 +78,7 @@ private static class XArchIntrinsicConstants public const int Serialize = 0x20000; public const int Avx10v1 = 0x40000; public const int Evex = 0x80000; + public const int Vpclmulqdq = 0x100000; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { @@ -135,6 +136,8 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) builder.AddSupportedInstructionSet("avx10v1_v512"); if ((flags & Evex) != 0) builder.AddSupportedInstructionSet("evex"); + if ((flags & Vpclmulqdq) != 0) + builder.AddSupportedInstructionSet("vpclmulqdq"); } public static int FromInstructionSet(InstructionSet instructionSet) @@ -204,6 +207,8 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVX10v1_V512_X64 => (Avx10v1 | Avx512), InstructionSet.X64_EVEX => Evex, InstructionSet.X64_EVEX_X64 => Evex, + InstructionSet.X64_VPCLMULQDQ => Vpclmulqdq, + InstructionSet.X64_VPCLMULQDQ_V512 => Vpclmulqdq, // Baseline ISAs - they're always available InstructionSet.X64_SSE => 0, diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index dd6a57731444e..94a51e957b12a 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -58,6 +58,7 @@ public enum ReadyToRunInstructionSet Avx10v1=44, Avx10v1_V512=46, EVEX=47, + Pclmulqdq_V256=48, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 361aa92bea955..7cea565134a44 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -87,6 +87,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_LZCNT_X64: return ReadyToRunInstructionSet.Lzcnt; case InstructionSet.X64_PCLMULQDQ: return ReadyToRunInstructionSet.Pclmulqdq; case InstructionSet.X64_PCLMULQDQ_X64: return ReadyToRunInstructionSet.Pclmulqdq; + case InstructionSet.X64_VPCLMULQDQ: return ReadyToRunInstructionSet.Pclmulqdq_V256; + case InstructionSet.X64_VPCLMULQDQ_V512: return null; case InstructionSet.X64_POPCNT: return ReadyToRunInstructionSet.Popcnt; case InstructionSet.X64_POPCNT_X64: return ReadyToRunInstructionSet.Popcnt; case InstructionSet.X64_Vector128: return null; @@ -166,6 +168,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_LZCNT_X64: return null; case InstructionSet.X86_PCLMULQDQ: return ReadyToRunInstructionSet.Pclmulqdq; case InstructionSet.X86_PCLMULQDQ_X64: return null; + case InstructionSet.X86_VPCLMULQDQ: return ReadyToRunInstructionSet.Pclmulqdq_V256; + case InstructionSet.X86_VPCLMULQDQ_V512: return null; case InstructionSet.X86_POPCNT: return ReadyToRunInstructionSet.Popcnt; case InstructionSet.X86_POPCNT_X64: return null; case InstructionSet.X86_Vector128: return null; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 42807bfcec1d9..9184452691626 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -56,6 +56,8 @@ public enum InstructionSet X64_FMA = InstructionSet_X64.FMA, X64_LZCNT = InstructionSet_X64.LZCNT, X64_PCLMULQDQ = InstructionSet_X64.PCLMULQDQ, + X64_VPCLMULQDQ = InstructionSet_X64.VPCLMULQDQ, + X64_VPCLMULQDQ_V512 = InstructionSet_X64.VPCLMULQDQ_V512, X64_POPCNT = InstructionSet_X64.POPCNT, X64_Vector128 = InstructionSet_X64.Vector128, X64_Vector256 = InstructionSet_X64.Vector256, @@ -126,6 +128,8 @@ public enum InstructionSet X86_FMA = InstructionSet_X86.FMA, X86_LZCNT = InstructionSet_X86.LZCNT, X86_PCLMULQDQ = InstructionSet_X86.PCLMULQDQ, + X86_VPCLMULQDQ = InstructionSet_X86.VPCLMULQDQ, + X86_VPCLMULQDQ_V512 = InstructionSet_X86.VPCLMULQDQ_V512, X86_POPCNT = InstructionSet_X86.POPCNT, X86_Vector128 = InstructionSet_X86.Vector128, X86_Vector256 = InstructionSet_X86.Vector256, @@ -232,61 +236,63 @@ public enum InstructionSet_X64 FMA = 13, LZCNT = 14, PCLMULQDQ = 15, - POPCNT = 16, - Vector128 = 17, - Vector256 = 18, - Vector512 = 19, - AVXVNNI = 20, - MOVBE = 21, - X86Serialize = 22, - EVEX = 23, - AVX512F = 24, - AVX512F_VL = 25, - AVX512BW = 26, - AVX512BW_VL = 27, - AVX512CD = 28, - AVX512CD_VL = 29, - AVX512DQ = 30, - AVX512DQ_VL = 31, - AVX512VBMI = 32, - AVX512VBMI_VL = 33, - AVX10v1 = 34, - AVX10v1_V512 = 35, - VectorT128 = 36, - VectorT256 = 37, - VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - EVEX_X64 = 58, - AVX512F_X64 = 59, - AVX512F_VL_X64 = 60, - AVX512BW_X64 = 61, - AVX512BW_VL_X64 = 62, - AVX512CD_X64 = 63, - AVX512CD_VL_X64 = 64, - AVX512DQ_X64 = 65, - AVX512DQ_VL_X64 = 66, - AVX512VBMI_X64 = 67, - AVX512VBMI_VL_X64 = 68, - AVX10v1_X64 = 69, - AVX10v1_V512_X64 = 70, + VPCLMULQDQ = 16, + VPCLMULQDQ_V512 = 17, + POPCNT = 18, + Vector128 = 19, + Vector256 = 20, + Vector512 = 21, + AVXVNNI = 22, + MOVBE = 23, + X86Serialize = 24, + EVEX = 25, + AVX512F = 26, + AVX512F_VL = 27, + AVX512BW = 28, + AVX512BW_VL = 29, + AVX512CD = 30, + AVX512CD_VL = 31, + AVX512DQ = 32, + AVX512DQ_VL = 33, + AVX512VBMI = 34, + AVX512VBMI_VL = 35, + AVX10v1 = 36, + AVX10v1_V512 = 37, + VectorT128 = 38, + VectorT256 = 39, + VectorT512 = 40, + X86Base_X64 = 41, + SSE_X64 = 42, + SSE2_X64 = 43, + SSE3_X64 = 44, + SSSE3_X64 = 45, + SSE41_X64 = 46, + SSE42_X64 = 47, + AVX_X64 = 48, + AVX2_X64 = 49, + AES_X64 = 50, + BMI1_X64 = 51, + BMI2_X64 = 52, + FMA_X64 = 53, + LZCNT_X64 = 54, + PCLMULQDQ_X64 = 55, + POPCNT_X64 = 56, + AVXVNNI_X64 = 57, + MOVBE_X64 = 58, + X86Serialize_X64 = 59, + EVEX_X64 = 60, + AVX512F_X64 = 61, + AVX512F_VL_X64 = 62, + AVX512BW_X64 = 63, + AVX512BW_VL_X64 = 64, + AVX512CD_X64 = 65, + AVX512CD_VL_X64 = 66, + AVX512DQ_X64 = 67, + AVX512DQ_VL_X64 = 68, + AVX512VBMI_X64 = 69, + AVX512VBMI_VL_X64 = 70, + AVX10v1_X64 = 71, + AVX10v1_V512_X64 = 72, } public enum InstructionSet_X86 @@ -308,61 +314,63 @@ public enum InstructionSet_X86 FMA = 13, LZCNT = 14, PCLMULQDQ = 15, - POPCNT = 16, - Vector128 = 17, - Vector256 = 18, - Vector512 = 19, - AVXVNNI = 20, - MOVBE = 21, - X86Serialize = 22, - EVEX = 23, - AVX512F = 24, - AVX512F_VL = 25, - AVX512BW = 26, - AVX512BW_VL = 27, - AVX512CD = 28, - AVX512CD_VL = 29, - AVX512DQ = 30, - AVX512DQ_VL = 31, - AVX512VBMI = 32, - AVX512VBMI_VL = 33, - AVX10v1 = 34, - AVX10v1_V512 = 35, - VectorT128 = 36, - VectorT256 = 37, - VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - EVEX_X64 = 58, - AVX512F_X64 = 59, - AVX512F_VL_X64 = 60, - AVX512BW_X64 = 61, - AVX512BW_VL_X64 = 62, - AVX512CD_X64 = 63, - AVX512CD_VL_X64 = 64, - AVX512DQ_X64 = 65, - AVX512DQ_VL_X64 = 66, - AVX512VBMI_X64 = 67, - AVX512VBMI_VL_X64 = 68, - AVX10v1_X64 = 69, - AVX10v1_V512_X64 = 70, + VPCLMULQDQ = 16, + VPCLMULQDQ_V512 = 17, + POPCNT = 18, + Vector128 = 19, + Vector256 = 20, + Vector512 = 21, + AVXVNNI = 22, + MOVBE = 23, + X86Serialize = 24, + EVEX = 25, + AVX512F = 26, + AVX512F_VL = 27, + AVX512BW = 28, + AVX512BW_VL = 29, + AVX512CD = 30, + AVX512CD_VL = 31, + AVX512DQ = 32, + AVX512DQ_VL = 33, + AVX512VBMI = 34, + AVX512VBMI_VL = 35, + AVX10v1 = 36, + AVX10v1_V512 = 37, + VectorT128 = 38, + VectorT256 = 39, + VectorT512 = 40, + X86Base_X64 = 41, + SSE_X64 = 42, + SSE2_X64 = 43, + SSE3_X64 = 44, + SSSE3_X64 = 45, + SSE41_X64 = 46, + SSE42_X64 = 47, + AVX_X64 = 48, + AVX2_X64 = 49, + AES_X64 = 50, + BMI1_X64 = 51, + BMI2_X64 = 52, + FMA_X64 = 53, + LZCNT_X64 = 54, + PCLMULQDQ_X64 = 55, + POPCNT_X64 = 56, + AVXVNNI_X64 = 57, + MOVBE_X64 = 58, + X86Serialize_X64 = 59, + EVEX_X64 = 60, + AVX512F_X64 = 61, + AVX512F_VL_X64 = 62, + AVX512BW_X64 = 63, + AVX512BW_VL_X64 = 64, + AVX512CD_X64 = 65, + AVX512CD_VL_X64 = 66, + AVX512DQ_X64 = 67, + AVX512DQ_VL_X64 = 68, + AVX512VBMI_X64 = 69, + AVX512VBMI_VL_X64 = 70, + AVX10v1_X64 = 71, + AVX10v1_V512_X64 = 72, } public unsafe struct InstructionSetFlags : IEnumerable @@ -774,6 +782,14 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ)) resultflags.AddInstructionSet(InstructionSet.X64_SSE2); + if (resultflags.HasInstructionSet(InstructionSet.X64_VPCLMULQDQ)) + resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet.X64_VPCLMULQDQ)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX); + if (resultflags.HasInstructionSet(InstructionSet.X64_VPCLMULQDQ_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_VPCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet.X64_VPCLMULQDQ_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize)) @@ -881,6 +897,14 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ)) resultflags.AddInstructionSet(InstructionSet.X86_SSE2); + if (resultflags.HasInstructionSet(InstructionSet.X86_VPCLMULQDQ)) + resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet.X86_VPCLMULQDQ)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX); + if (resultflags.HasInstructionSet(InstructionSet.X86_VPCLMULQDQ_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_VPCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet.X86_VPCLMULQDQ_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Serialize)) @@ -1115,6 +1139,14 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AES); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ)) + resultflags.AddInstructionSet(InstructionSet.X64_VPCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) + resultflags.AddInstructionSet(InstructionSet.X64_VPCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet.X64_VPCLMULQDQ)) + resultflags.AddInstructionSet(InstructionSet.X64_VPCLMULQDQ_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_VPCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) @@ -1222,6 +1254,14 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_AES); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ)) + resultflags.AddInstructionSet(InstructionSet.X86_VPCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) + resultflags.AddInstructionSet(InstructionSet.X86_VPCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet.X86_VPCLMULQDQ)) + resultflags.AddInstructionSet(InstructionSet.X86_VPCLMULQDQ_V512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_VPCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) @@ -1353,6 +1393,8 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("fma", "Fma", InstructionSet.X64_FMA, true); yield return new InstructionSetInfo("lzcnt", "Lzcnt", InstructionSet.X64_LZCNT, true); yield return new InstructionSetInfo("pclmul", "Pclmulqdq", InstructionSet.X64_PCLMULQDQ, true); + yield return new InstructionSetInfo("vpclmulqdq", "Pclmulqdq_V256", InstructionSet.X64_VPCLMULQDQ, true); + yield return new InstructionSetInfo("VPCLMULQDQ_V512", "", InstructionSet.X64_VPCLMULQDQ_V512, false); yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X64_POPCNT, true); yield return new InstructionSetInfo("Vector128", "", InstructionSet.X64_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X64_Vector256, false); @@ -1394,6 +1436,8 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("fma", "Fma", InstructionSet.X86_FMA, true); yield return new InstructionSetInfo("lzcnt", "Lzcnt", InstructionSet.X86_LZCNT, true); yield return new InstructionSetInfo("pclmul", "Pclmulqdq", InstructionSet.X86_PCLMULQDQ, true); + yield return new InstructionSetInfo("vpclmulqdq", "Pclmulqdq_V256", InstructionSet.X86_VPCLMULQDQ, true); + yield return new InstructionSetInfo("VPCLMULQDQ_V512", "", InstructionSet.X86_VPCLMULQDQ_V512, false); yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X86_POPCNT, true); yield return new InstructionSetInfo("Vector128", "", InstructionSet.X86_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X86_Vector256, false); @@ -1812,6 +1856,9 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X64_PCLMULQDQ; } + case "Pclmulqdq_V256": + { return InstructionSet.X64_VPCLMULQDQ; } + case "Popcnt": if (nestedTypeName == "X64") { return InstructionSet.X64_POPCNT_X64; } @@ -1960,6 +2007,9 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "Pclmulqdq": { return InstructionSet.X86_PCLMULQDQ; } + case "Pclmulqdq_V256": + { return InstructionSet.X86_VPCLMULQDQ; } + case "Popcnt": { return InstructionSet.X86_POPCNT; } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index dbb8e6efd20ad..a89c9b96c8dd2 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -40,6 +40,8 @@ instructionset ,X86 ,Bmi2 , ,11 ,BMI2 instructionset ,X86 ,Fma , ,12 ,FMA ,fma instructionset ,X86 ,Lzcnt , ,13 ,LZCNT ,lzcnt instructionset ,X86 ,Pclmulqdq , ,14 ,PCLMULQDQ ,pclmul +instructionset ,X86 ,Pclmulqdq_V256 , ,48 ,VPCLMULQDQ ,vpclmulqdq +instructionset ,X86 , , , ,VPCLMULQDQ_V512 , instructionset ,X86 ,Popcnt , ,15 ,POPCNT ,popcnt instructionset ,X86 , , , ,Vector128 , instructionset ,X86 , , , ,Vector256 , @@ -147,6 +149,10 @@ implication ,X86 ,AVX512VBMI_VL ,AVX512BW_VL implication ,X86 ,AES ,SSE2 implication ,X86 ,PCLMULQDQ ,SSE2 +implication ,X86 ,VPCLMULQDQ ,PCLMULQDQ +implication ,X86 ,VPCLMULQDQ ,AVX +implication ,X86 ,VPCLMULQDQ_V512 ,VPCLMULQDQ +implication ,X86 ,VPCLMULQDQ_V512 ,AVX512F implication ,X86 ,AVXVNNI ,AVX2 implication ,X86 ,X86Serialize ,X86Base diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index fa89ace071698..3f1252edac8dd 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1392,6 +1392,11 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_PCLMULQDQ); } + if (((cpuFeatures & XArchIntrinsicConstants_Vpclmulqdq) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableVPCLMULQDQ)) + { + CPUCompileFlags.Set(InstructionSet_VPCLMULQDQ); + } + if (((cpuFeatures & XArchIntrinsicConstants_AvxVnni) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNI)) { CPUCompileFlags.Set(InstructionSet_AVXVNNI); diff --git a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml index 60020ec1e24df..c3b4fb52768d3 100644 --- a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml +++ b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml @@ -111,6 +111,12 @@ + + + + + + diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs index c9111f801ecaf..9c034adaaf5d8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs @@ -30,6 +30,48 @@ internal X64() { } public static new bool IsSupported { [Intrinsic] get { return false; } } } + public abstract class V256 + { + internal V256() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { [Intrinsic] get { return false; } } + + /// + /// __m256i _mm256_clmulepi64_epi128 (__m256i a, __m256i b, const int imm8) + /// VPCLMULQDQ ymm1, ymm2, ymm3/m256, imm8 + /// + public static Vector256 CarrylessMultiply(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_clmulepi64_epi128 (__m256i a, __m256i b, const int imm8) + /// VPCLMULQDQ ymm1, ymm2, ymm3/m256, imm8 + /// + public static Vector256 CarrylessMultiply(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + } + + public abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { [Intrinsic] get { return false; } } + + /// + /// __m512i _mm512_clmulepi64_epi128 (__m512i a, __m512i b, const int imm8) + /// VPCLMULQDQ zmm1, zmm2, zmm3/m512, imm8 + /// + public static Vector512 CarrylessMultiply(Vector512 left, Vector512 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + /// + /// __m512i _mm512_clmulepi64_epi128 (__m512i a, __m512i b, const int imm8) + /// VPCLMULQDQ zmm1, zmm2, zmm3/m512, imm8 + /// + public static Vector512 CarrylessMultiply(Vector512 left, Vector512 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + } + /// /// __m128i _mm_clmulepi64_si128 (__m128i a, __m128i b, const int imm8) /// PCLMULQDQ xmm1, xmm2/m128, imm8 diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs index 9cc213f0f98aa..f32f65859a7af 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs @@ -30,6 +30,50 @@ internal X64() { } public static new bool IsSupported { get => IsSupported; } } + [Intrinsic] + public abstract class V256 + { + internal V256() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { get => IsSupported; } + + /// + /// __m256i _mm256_clmulepi64_epi128 (__m256i a, __m256i b, const int imm8) + /// VPCLMULQDQ ymm1, ymm2, ymm3/m256, imm8 + /// + public static Vector256 CarrylessMultiply(Vector256 left, Vector256 right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control); + /// + /// __m256i _mm256_clmulepi64_epi128 (__m256i a, __m256i b, const int imm8) + /// VPCLMULQDQ ymm1, ymm2, ymm3/m256, imm8 + /// + public static Vector256 CarrylessMultiply(Vector256 left, Vector256 right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control); + } + + [Intrinsic] + public abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { get => IsSupported; } + + /// + /// __m512i _mm512_clmulepi64_epi128 (__m512i a, __m512i b, const int imm8) + /// VPCLMULQDQ zmm1, zmm2, zmm3/m512, imm8 + /// + public static Vector512 CarrylessMultiply(Vector512 left, Vector512 right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control); + /// + /// __m512i _mm512_clmulepi64_epi128 (__m512i a, __m512i b, const int imm8) + /// VPCLMULQDQ zmm1, zmm2, zmm3/m512, imm8 + /// + public static Vector512 CarrylessMultiply(Vector512 left, Vector512 right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control); + } + /// /// __m128i _mm_clmulepi64_si128 (__m128i a, __m128i b, const int imm8) /// PCLMULQDQ xmm1, xmm2/m128, imm8 diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 352820d85007c..5cc8110d47bbc 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -8454,6 +8454,20 @@ internal Pclmulqdq() { } internal X64() { } public static new bool IsSupported { get { throw null; } } } + public abstract partial class V256 + { + internal V256() { } + public static bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector256 CarrylessMultiply(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte control) { throw null; } + public static System.Runtime.Intrinsics.Vector256 CarrylessMultiply(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte control) { throw null; } + } + public abstract partial class V512 + { + internal V512() { } + public static bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector512 CarrylessMultiply(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte control) { throw null; } + public static System.Runtime.Intrinsics.Vector512 CarrylessMultiply(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte control) { throw null; } + } } [System.CLSCompliantAttribute(false)] public abstract partial class Popcnt : System.Runtime.Intrinsics.X86.Sse42 diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 8d6a063ce4d2f..9a8f73c6e4740 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -216,6 +216,11 @@ int minipal_getcpufeatures(void) { __cpuidex(cpuidInfo, 0x00000007, 0x00000000); + if ((cpuidInfo[CPUID_ECX] & (1 << 10)) != 0) // VPCLMULQDQ + { + result |= XArchIntrinsicConstants_Vpclmulqdq; + } + if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 { result |= XArchIntrinsicConstants_Avx2; diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 6422fe33f9787..7c61accc1c7fa 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -31,6 +31,7 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Serialize = 0x20000, XArchIntrinsicConstants_Avx10v1 = 0x40000, XArchIntrinsicConstants_Evex = 0x80000, + XArchIntrinsicConstants_Vpclmulqdq = 0x100000, }; #endif // HOST_X86 || HOST_AMD64 diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs index 42ee1aecd6576..ab266063f1040 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs @@ -3356,16 +3356,44 @@ (string templateFileName, Dictionary templateData)[] PclmulqdqInputs = new [] { - ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Pclmulqdq", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["Imm"] = "0", ["LargestVectorSize"] = "16", ["Input1"] = "{2, 20}", ["Input1Size"] = "2" ,["Input2"] = "{25, 95}", ["Input2Size"] = "2" ,["ExpectedRet"] = "{50, 0}", ["ExpectedRetSize"] = "2"}), - ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Pclmulqdq", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["Imm"] = "1", ["LargestVectorSize"] = "16", ["Input1"] = "{2, 20}", ["Input1Size"] = "2" ,["Input2"] = "{25, 95}", ["Input2Size"] = "2" ,["ExpectedRet"] = "{500, 0}" , ["ExpectedRetSize"] = "2"}), - ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Pclmulqdq", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["Imm"] = "16", ["LargestVectorSize"] = "16", ["Input1"] = "{2, 20}", ["Input1Size"] = "2" ,["Input2"] = "{25, 95}", ["Input2Size"] = "2" ,["ExpectedRet"] = "{190, 0}" , ["ExpectedRetSize"] = "2"}), - ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Pclmulqdq", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["Imm"] = "17", ["LargestVectorSize"] = "16", ["Input1"] = "{2, 20}", ["Input1Size"] = "2" ,["Input2"] = "{25, 95}", ["Input2Size"] = "2" ,["ExpectedRet"] = "{1164, 0}" , ["ExpectedRetSize"] = "2"}), - ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Pclmulqdq", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["Imm"] = "129", ["LargestVectorSize"] = "16",["Input1"] = "{2, 20}", ["Input1Size"] = "2" ,["Input2"] = "{25, 95}", ["Input2Size"] = "2" ,["ExpectedRet"] = "{500, 0}" , ["ExpectedRetSize"] = "2"}), - ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Pclmulqdq", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["Imm"] = "0", ["LargestVectorSize"] = "16", ["Input1"] = "{-2, -20}", ["Input1Size"] = "2" ,["Input2"] = "{25, 65535}", ["Input2Size"] = "2" ,["ExpectedRet"] = "{-18, 8}" , ["ExpectedRetSize"] = "2"}), - ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Pclmulqdq", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["Imm"] = "1", ["LargestVectorSize"] = "16", ["Input1"] = "{-2, -20}", ["Input1Size"] = "2" ,["Input2"] = "{25, 65535}", ["Input2Size"] = "2" ,["ExpectedRet"] = "{-436, 8}" , ["ExpectedRetSize"] = "2"}), - ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Pclmulqdq", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["Imm"] = "16", ["LargestVectorSize"] = "16", ["Input1"] = "{-2, -20}", ["Input1Size"] = "2" ,["Input2"] = "{25, 65535}", ["Input2Size"] = "2" ,["ExpectedRet"] = "{43690, 21845}" , ["ExpectedRetSize"] = "2"}), - ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Pclmulqdq", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["Imm"] = "17", ["LargestVectorSize"] = "16", ["Input1"] = "{-2, -20}", ["Input1Size"] = "2" ,["Input2"] = "{25, 65535}", ["Input2Size"] = "2" ,["ExpectedRet"] = "{961188, 21845}" , ["ExpectedRetSize"] = "2"}), - ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Pclmulqdq", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["Imm"] = "129", ["LargestVectorSize"] = "16", ["Input1"] = "{-2, -20}", ["Input1Size"] = "2" ,["Input2"] ="{25, 65535}", ["Input2Size"] = "2" ,["ExpectedRet"] = "{-436, 8}" , ["ExpectedRetSize"] = "2"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Sse2", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["Imm"] = "0", ["LargestVectorSize"] = "16", ["Input1Size"] = "2", ["Input1"] = "{2, 20}", ["Input2Size"] = "2", ["Input2"] = "{25, 95}", ["ExpectedRetSize"] = "2", ["ExpectedRet"] = "{50, 0}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Sse2", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["Imm"] = "1", ["LargestVectorSize"] = "16", ["Input1Size"] = "2", ["Input1"] = "{2, 20}", ["Input2Size"] = "2", ["Input2"] = "{25, 95}", ["ExpectedRetSize"] = "2", ["ExpectedRet"] = "{500, 0}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Sse2", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["Imm"] = "16", ["LargestVectorSize"] = "16", ["Input1Size"] = "2", ["Input1"] = "{2, 20}", ["Input2Size"] = "2", ["Input2"] = "{25, 95}", ["ExpectedRetSize"] = "2", ["ExpectedRet"] = "{190, 0}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Sse2", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["Imm"] = "17", ["LargestVectorSize"] = "16", ["Input1Size"] = "2", ["Input1"] = "{2, 20}", ["Input2Size"] = "2", ["Input2"] = "{25, 95}", ["ExpectedRetSize"] = "2", ["ExpectedRet"] = "{1164, 0}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Sse2", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["Imm"] = "129", ["LargestVectorSize"] = "16", ["Input1Size"] = "2", ["Input1"] = "{2, 20}", ["Input2Size"] = "2", ["Input2"] = "{25, 95}", ["ExpectedRetSize"] = "2", ["ExpectedRet"] = "{500, 0}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Sse2", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["Imm"] = "0", ["LargestVectorSize"] = "16", ["Input1Size"] = "2", ["Input1"] = "{-2, -20}", ["Input2Size"] = "2", ["Input2"] = "{25, 65535}", ["ExpectedRetSize"] = "2", ["ExpectedRet"] = "{-18, 8}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Sse2", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["Imm"] = "1", ["LargestVectorSize"] = "16", ["Input1Size"] = "2", ["Input1"] = "{-2, -20}", ["Input2Size"] = "2", ["Input2"] = "{25, 65535}", ["ExpectedRetSize"] = "2", ["ExpectedRet"] = "{-436, 8}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Sse2", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["Imm"] = "16", ["LargestVectorSize"] = "16", ["Input1Size"] = "2", ["Input1"] = "{-2, -20}", ["Input2Size"] = "2", ["Input2"] = "{25, 65535}", ["ExpectedRetSize"] = "2", ["ExpectedRet"] = "{43690, 21845}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Sse2", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["Imm"] = "17", ["LargestVectorSize"] = "16", ["Input1Size"] = "2", ["Input1"] = "{-2, -20}", ["Input2Size"] = "2", ["Input2"] = "{25, 65535}", ["ExpectedRetSize"] = "2", ["ExpectedRet"] = "{961188, 21845}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq", ["LoadIsa"] = "Sse2", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["Imm"] = "129", ["LargestVectorSize"] = "16", ["Input1Size"] = "2", ["Input1"] = "{-2, -20}", ["Input2Size"] = "2", ["Input2"] = "{25, 65535}", ["ExpectedRetSize"] = "2", ["ExpectedRet"] = "{-436, 8}"}), +}; + +(string templateFileName, Dictionary templateData)[] PclmulqdqV256Inputs = new[] +{ + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V256", ["LoadIsa"] = "Avx", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt64", ["Imm"] = "0", ["LargestVectorSize"] = "32", ["Input1Size"] = "4", ["Input1"] = "{2, 20, 25, 95}", ["Input2Size"] = "4", ["Input2"] = "{25, 95, 2, 20}", ["ExpectedRetSize"] = "4", ["ExpectedRet"] = "{50, 0, 50, 0}" }), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V256", ["LoadIsa"] = "Avx", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt64", ["Imm"] = "1", ["LargestVectorSize"] = "32", ["Input1Size"] = "4", ["Input1"] = "{2, 20, 25, 95}", ["Input2Size"] = "4", ["Input2"] = "{25, 95, 2, 20}", ["ExpectedRetSize"] = "4", ["ExpectedRet"] = "{500, 0, 190, 0}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V256", ["LoadIsa"] = "Avx", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt64", ["Imm"] = "16", ["LargestVectorSize"] = "32", ["Input1Size"] = "4", ["Input1"] = "{2, 20, 25, 95}", ["Input2Size"] = "4", ["Input2"] = "{25, 95, 2, 20}", ["ExpectedRetSize"] = "4", ["ExpectedRet"] = "{190, 0, 500, 0}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V256", ["LoadIsa"] = "Avx", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt64", ["Imm"] = "17", ["LargestVectorSize"] = "32", ["Input1Size"] = "4", ["Input1"] = "{2, 20, 25, 95}", ["Input2Size"] = "4", ["Input2"] = "{25, 95, 2, 20}", ["ExpectedRetSize"] = "4", ["ExpectedRet"] = "{1164, 0, 1164, 0}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V256", ["LoadIsa"] = "Avx", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt64", ["Imm"] = "129", ["LargestVectorSize"] = "32", ["Input1Size"] = "4", ["Input1"] = "{2, 20, 25, 95}", ["Input2Size"] = "4", ["Input2"] = "{25, 95, 2, 20}", ["ExpectedRetSize"] = "4", ["ExpectedRet"] = "{500, 0, 190, 0}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V256", ["LoadIsa"] = "Avx", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int64", ["Imm"] = "0", ["LargestVectorSize"] = "32", ["Input1Size"] = "4", ["Input1"] = "{-2, -20, 25, 65535}", ["Input2Size"] = "4", ["Input2"] = "{25, 65535, -2, -20}", ["ExpectedRetSize"] = "4", ["ExpectedRet"] = "{-18, 8, -18, 8}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V256", ["LoadIsa"] = "Avx", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int64", ["Imm"] = "1", ["LargestVectorSize"] = "32", ["Input1Size"] = "4", ["Input1"] = "{-2, -20, 25, 65535}", ["Input2Size"] = "4", ["Input2"] = "{25, 65535, -2, -20}", ["ExpectedRetSize"] = "4", ["ExpectedRet"] = "{-436, 8, 43690, 21845}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V256", ["LoadIsa"] = "Avx", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int64", ["Imm"] = "16", ["LargestVectorSize"] = "32", ["Input1Size"] = "4", ["Input1"] = "{-2, -20, 25, 65535}", ["Input2Size"] = "4", ["Input2"] = "{25, 65535, -2, -20}", ["ExpectedRetSize"] = "4", ["ExpectedRet"] = "{43690, 21845, -436, 8}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V256", ["LoadIsa"] = "Avx", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int64", ["Imm"] = "17", ["LargestVectorSize"] = "32", ["Input1Size"] = "4", ["Input1"] = "{-2, -20, 25, 65535}", ["Input2Size"] = "4", ["Input2"] = "{25, 65535, -2, -20}", ["ExpectedRetSize"] = "4", ["ExpectedRet"] = "{961188, 21845, 961188, 21845}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V256", ["LoadIsa"] = "Avx", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int64", ["Imm"] = "129", ["LargestVectorSize"] = "32", ["Input1Size"] = "4", ["Input1"] = "{-2, -20, 25, 65535}", ["Input2Size"] = "4", ["Input2"] = "{25, 65535, -2, -20}", ["ExpectedRetSize"] = "4", ["ExpectedRet"] = "{-436, 8, 43690, 21845}"}), +}; + +(string templateFileName, Dictionary templateData)[] PclmulqdqV512Inputs = new[] +{ + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt64", ["Imm"] = "0", ["LargestVectorSize"] = "64", ["Input1Size"] = "8", ["Input1"] = "{2, 20, 25, 95, 25, 95, 2, 20}", ["Input2Size"] = "8", ["Input2"] = "{25, 95, 2, 20, 2, 20, 25, 95}", ["ExpectedRetSize"] = "8", ["ExpectedRet"] = "{50, 0, 50, 0, 50, 0, 50, 0}" }), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt64", ["Imm"] = "1", ["LargestVectorSize"] = "64", ["Input1Size"] = "8", ["Input1"] = "{2, 20, 25, 95, 25, 95, 2, 20}", ["Input2Size"] = "8", ["Input2"] = "{25, 95, 2, 20, 2, 20, 25, 95}", ["ExpectedRetSize"] = "8", ["ExpectedRet"] = "{500, 0, 190, 0, 190, 0, 500, 0}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt64", ["Imm"] = "16", ["LargestVectorSize"] = "64", ["Input1Size"] = "8", ["Input1"] = "{2, 20, 25, 95, 25, 95, 2, 20}", ["Input2Size"] = "8", ["Input2"] = "{25, 95, 2, 20, 2, 20, 25, 95}", ["ExpectedRetSize"] = "8", ["ExpectedRet"] = "{190, 0, 500, 0, 500, 0, 190, 0}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt64", ["Imm"] = "17", ["LargestVectorSize"] = "64", ["Input1Size"] = "8", ["Input1"] = "{2, 20, 25, 95, 25, 95, 2, 20}", ["Input2Size"] = "8", ["Input2"] = "{25, 95, 2, 20, 2, 20, 25, 95}", ["ExpectedRetSize"] = "8", ["ExpectedRet"] = "{1164, 0, 1164, 0, 1164, 0, 1164, 0}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64",["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt64", ["Imm"] = "129", ["LargestVectorSize"] = "64", ["Input1Size"] = "8", ["Input1"] = "{2, 20, 25, 95, 25, 95, 2, 20}", ["Input2Size"] = "8", ["Input2"] = "{25, 95, 2, 20, 2, 20, 25, 95}", ["ExpectedRetSize"] = "8", ["ExpectedRet"] = "{500, 0, 190, 0, 190, 0, 500, 0}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int64", ["Imm"] = "0", ["LargestVectorSize"] = "64", ["Input1Size"] = "8", ["Input1"] = "{-2, -20, 25, 65535, 25, 65535, -2, -20}", ["Input2Size"] = "8", ["Input2"] = "{25, 65535, -2, -20, -2, -20, 25, 65535}", ["ExpectedRetSize"] = "8", ["ExpectedRet"] = "{-18, 8, -18, 8, -18, 8, -18, 8}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int64", ["Imm"] = "1", ["LargestVectorSize"] = "64", ["Input1Size"] = "8", ["Input1"] = "{-2, -20, 25, 65535, 25, 65535, -2, -20}", ["Input2Size"] = "8", ["Input2"] = "{25, 65535, -2, -20, -2, -20, 25, 65535}", ["ExpectedRetSize"] = "8", ["ExpectedRet"] = "{-436, 8, 43690, 21845, 43690, 21845, -436, 8}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int64", ["Imm"] = "16", ["LargestVectorSize"] = "64", ["Input1Size"] = "8", ["Input1"] = "{-2, -20, 25, 65535, 25, 65535, -2, -20}", ["Input2Size"] = "8", ["Input2"] = "{25, 65535, -2, -20, -2, -20, 25, 65535}", ["ExpectedRetSize"] = "8", ["ExpectedRet"] = "{43690, 21845, -436, 8, -436, 8, 43690, 21845}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int64", ["Imm"] = "17", ["LargestVectorSize"] = "64", ["Input1Size"] = "8", ["Input1"] = "{-2, -20, 25, 65535, 25, 65535, -2, -20}", ["Input2Size"] = "8", ["Input2"] = "{25, 65535, -2, -20, -2, -20, 25, 65535}", ["ExpectedRetSize"] = "8", ["ExpectedRet"] = "{961188, 21845, 961188, 21845, 961188, 21845, 961188, 21845}"}), + ("PclmulqdqOpTest.template", new Dictionary { ["Isa"] = "Pclmulqdq.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "CarrylessMultiply", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int64", ["Imm"] = "129", ["LargestVectorSize"] = "64", ["Input1Size"] = "8", ["Input1"] = "{-2, -20, 25, 65535, 25, 65535, -2, -20}", ["Input2Size"] = "8", ["Input2"] = "{25, 65535, -2, -20, -2, -20, 25, 65535}", ["ExpectedRetSize"] = "8", ["ExpectedRet"] = "{-436, 8, 43690, 21845, 43690, 21845, -436, 8}"}), }; const string ValidateBmi2ParallelBitComment = @" @@ -3558,6 +3586,8 @@ bool isImmTemplate(string name) ProcessInputs("Bmi1.X64", Bmi1X64Inputs); ProcessInputs("Aes", AesInputs); ProcessInputs("Pclmulqdq", PclmulqdqInputs); +ProcessInputs("Pclmulqdq.V256", PclmulqdqV256Inputs); +ProcessInputs("Pclmulqdq.V512", PclmulqdqV512Inputs); ProcessInputs("Bmi2", Bmi2Inputs); ProcessInputs("Bmi2.X64", Bmi2X64Inputs); ProcessInputs("X86Base", X86BaseInputs); diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V256/Pclmulqdq.V256_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V256/Pclmulqdq.V256_r.csproj new file mode 100644 index 0000000000000..cf8ceb516059a --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V256/Pclmulqdq.V256_r.csproj @@ -0,0 +1,14 @@ + + + X86_Pclmulqdq.V256_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V256/Pclmulqdq.V256_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V256/Pclmulqdq.V256_ro.csproj new file mode 100644 index 0000000000000..9f87c35b8cbd6 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V256/Pclmulqdq.V256_ro.csproj @@ -0,0 +1,14 @@ + + + X86_Pclmulqdq.V256_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V256/Program.Pclmulqdq.V256.cs b/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V256/Program.Pclmulqdq.V256.cs new file mode 100644 index 0000000000000..87adf412f22b3 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V256/Program.Pclmulqdq.V256.cs @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +[assembly:Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/91392", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoLLVMAOT))] +namespace JIT.HardwareIntrinsics.X86._Pclmulqdq.V256 +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V512/Pclmulqdq.V512_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V512/Pclmulqdq.V512_r.csproj new file mode 100644 index 0000000000000..42aa14c07bf9e --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V512/Pclmulqdq.V512_r.csproj @@ -0,0 +1,14 @@ + + + X86_Pclmulqdq.V512_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V512/Pclmulqdq.V512_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V512/Pclmulqdq.V512_ro.csproj new file mode 100644 index 0000000000000..6d8765f41c965 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V512/Pclmulqdq.V512_ro.csproj @@ -0,0 +1,14 @@ + + + X86_Pclmulqdq.V512_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V512/Program.Pclmulqdq.V512.cs b/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V512/Program.Pclmulqdq.V512.cs new file mode 100644 index 0000000000000..8bf92ce490d55 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Pclmulqdq.V512/Program.Pclmulqdq.V512.cs @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +[assembly:Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/91392", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoLLVMAOT))] +namespace JIT.HardwareIntrinsics.X86._Pclmulqdq.V512 +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/issues.targets b/src/tests/issues.targets index 1724254d8ad89..cab8281973dfa 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -2206,6 +2206,12 @@ https://github.com/dotnet/runtime/issues/75767 + + https://github.com/dotnet/runtime/issues/91392 + + + https://github.com/dotnet/runtime/issues/91392 + https://github.com/dotnet/runtime/issues/75767 From 092a0ae836bf05232a7cd29cffcdd4d4ed93d1ec Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Tue, 22 Oct 2024 19:50:19 -0700 Subject: [PATCH 02/15] add missing break --- src/coreclr/jit/lowerxarch.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 7f4f0f237cc4b..66a1a7a05ff3f 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -2600,6 +2600,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) intrinsicId = NI_VPCLMULQDQ_CarrylessMultiply; node->ChangeHWIntrinsicId(intrinsicId); } + break; } default: From c4aaa76bc1ece33e0c7b90baf4346bc8d76b7853 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Wed, 23 Oct 2024 13:46:08 -0700 Subject: [PATCH 03/15] add alternate instruction def for evex encoding --- src/coreclr/jit/emitxarch.cpp | 1 + src/coreclr/jit/hwintrinsiclistxarch.h | 4 ++-- src/coreclr/jit/instrsxarch.h | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 500d827ab29ca..98a882ea6593c 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -19870,6 +19870,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; case INS_pclmulqdq: + case INS_pclmulqdq_evex: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += PERFSCORE_LATENCY_7C; break; diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 4d592f80b2316..c989a42ecac02 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1533,7 +1533,7 @@ HARDWARE_INTRINSIC(PCLMULQDQ, CarrylessMultiply, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // VPCLMULQDQ Intrinsics #define FIRST_NI_VPCLMULQDQ NI_VPCLMULQDQ_CarrylessMultiply -HARDWARE_INTRINSIC(VPCLMULQDQ, CarrylessMultiply, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(VPCLMULQDQ, CarrylessMultiply, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq_evex, INS_pclmulqdq_evex, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) #define LAST_NI_VPCLMULQDQ NI_VPCLMULQDQ_CarrylessMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -1542,7 +1542,7 @@ HARDWARE_INTRINSIC(VPCLMULQDQ, CarrylessMultiply, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // VPCLMULQDQ_V512 Intrinsics #define FIRST_NI_VPCLMULQDQ_V512 NI_VPCLMULQDQ_V512_CarrylessMultiply -HARDWARE_INTRINSIC(VPCLMULQDQ_V512, CarrylessMultiply, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(VPCLMULQDQ_V512, CarrylessMultiply, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq_evex, INS_pclmulqdq_evex, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) #define LAST_NI_VPCLMULQDQ_V512 NI_VPCLMULQDQ_V512_CarrylessMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 822d4e21809e2..179ea140b7599 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -400,7 +400,8 @@ INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE, INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_TT_NONE, REX_WIG | Encoding_VEX) // Perform the AES InvMixColumn Transformation INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_TT_NONE, REX_WIG | Encoding_VEX) // AES Round Key Generation Assist -INST3(pclmulqdq, "pclmulqdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_FULL_MEM, Input_64Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords +INST3(pclmulqdq, "pclmulqdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords +INST3(pclmulqdq_evex, "pclmulqdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_FULL_MEM, Input_64Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords // SSE4.1 INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values From c309baa90bc26922670d0f9ab078125feb772e33 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Sat, 26 Oct 2024 15:17:15 -0700 Subject: [PATCH 04/15] rename instruction --- src/coreclr/jit/emitxarch.cpp | 2 +- src/coreclr/jit/hwintrinsiclistxarch.h | 4 ++-- src/coreclr/jit/instrsxarch.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 98a882ea6593c..a633aca2a8850 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -19870,7 +19870,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; case INS_pclmulqdq: - case INS_pclmulqdq_evex: + case INS_vpclmulqdq: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += PERFSCORE_LATENCY_7C; break; diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index c989a42ecac02..9670813217b85 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1533,7 +1533,7 @@ HARDWARE_INTRINSIC(PCLMULQDQ, CarrylessMultiply, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // VPCLMULQDQ Intrinsics #define FIRST_NI_VPCLMULQDQ NI_VPCLMULQDQ_CarrylessMultiply -HARDWARE_INTRINSIC(VPCLMULQDQ, CarrylessMultiply, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq_evex, INS_pclmulqdq_evex, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(VPCLMULQDQ, CarrylessMultiply, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpclmulqdq, INS_vpclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) #define LAST_NI_VPCLMULQDQ NI_VPCLMULQDQ_CarrylessMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -1542,7 +1542,7 @@ HARDWARE_INTRINSIC(VPCLMULQDQ, CarrylessMultiply, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // VPCLMULQDQ_V512 Intrinsics #define FIRST_NI_VPCLMULQDQ_V512 NI_VPCLMULQDQ_V512_CarrylessMultiply -HARDWARE_INTRINSIC(VPCLMULQDQ_V512, CarrylessMultiply, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq_evex, INS_pclmulqdq_evex, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(VPCLMULQDQ_V512, CarrylessMultiply, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpclmulqdq, INS_vpclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) #define LAST_NI_VPCLMULQDQ_V512 NI_VPCLMULQDQ_V512_CarrylessMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 179ea140b7599..a7f9f6636340e 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -400,8 +400,8 @@ INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE, INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_TT_NONE, REX_WIG | Encoding_VEX) // Perform the AES InvMixColumn Transformation INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_TT_NONE, REX_WIG | Encoding_VEX) // AES Round Key Generation Assist -INST3(pclmulqdq, "pclmulqdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords -INST3(pclmulqdq_evex, "pclmulqdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_FULL_MEM, Input_64Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords +INST3(pclmulqdq, "pclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords +INST3(vpclmulqdq, "pclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_FULL_MEM, Input_64Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords // SSE4.1 INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values From 3275606cf0a2549a6e76e118d7e58ef42645348c Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Wed, 30 Oct 2024 20:04:20 -0700 Subject: [PATCH 05/15] whitespace --- .../JIT/HardwareIntrinsics/X86/Shared/PclmulqdqOpTest.template | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/PclmulqdqOpTest.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/PclmulqdqOpTest.template index 487941a088089..c54667a16d244 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/PclmulqdqOpTest.template +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/PclmulqdqOpTest.template @@ -271,6 +271,5 @@ namespace JIT.HardwareIntrinsics.X86 Succeeded = false; } } - } } From 5ada8e16d8f3d9b781243fb97bc277a627ec3fbb Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Mon, 4 Nov 2024 16:55:49 -0800 Subject: [PATCH 06/15] re-run thunk generator --- src/coreclr/inc/corinfoinstructionset.h | 206 ++++++++-------- src/coreclr/inc/jiteeversionguid.h | 10 +- src/coreclr/inc/readytoruninstructionset.h | 1 + .../Runtime/ReadyToRunInstructionSet.cs | 1 + .../Runtime/ReadyToRunInstructionSetHelper.cs | 4 +- .../JitInterface/CorInfoInstructionSet.cs | 230 +++++++++--------- .../ThunkGenerator/InstructionSetDesc.txt | 4 +- 7 files changed, 237 insertions(+), 219 deletions(-) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 0ccf09413b031..d42edba8430d8 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -58,56 +58,58 @@ enum CORINFO_InstructionSet InstructionSet_FMA=13, InstructionSet_LZCNT=14, InstructionSet_PCLMULQDQ=15, - InstructionSet_POPCNT=16, - InstructionSet_Vector128=17, - InstructionSet_Vector256=18, - InstructionSet_Vector512=19, - InstructionSet_AVXVNNI=20, - InstructionSet_MOVBE=21, - InstructionSet_X86Serialize=22, - InstructionSet_EVEX=23, - InstructionSet_AVX512F=24, - InstructionSet_AVX512F_VL=25, - InstructionSet_AVX512BW=26, - InstructionSet_AVX512BW_VL=27, - InstructionSet_AVX512CD=28, - InstructionSet_AVX512CD_VL=29, - InstructionSet_AVX512DQ=30, - InstructionSet_AVX512DQ_VL=31, - InstructionSet_AVX512VBMI=32, - InstructionSet_AVX512VBMI_VL=33, - InstructionSet_AVX10v1=34, - InstructionSet_AVX10v1_V512=35, - InstructionSet_VectorT128=36, - InstructionSet_VectorT256=37, - InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_EVEX_X64=58, - InstructionSet_AVX512F_X64=59, - InstructionSet_AVX512BW_X64=60, - InstructionSet_AVX512CD_X64=61, - InstructionSet_AVX512DQ_X64=62, - InstructionSet_AVX512VBMI_X64=63, - InstructionSet_AVX10v1_X64=64, - InstructionSet_AVX10v1_V512_X64=65, + InstructionSet_VPCLMULQDQ=16, + InstructionSet_VPCLMULQDQ_V512=17, + InstructionSet_POPCNT=18, + InstructionSet_Vector128=19, + InstructionSet_Vector256=20, + InstructionSet_Vector512=21, + InstructionSet_AVXVNNI=22, + InstructionSet_MOVBE=23, + InstructionSet_X86Serialize=24, + InstructionSet_EVEX=25, + InstructionSet_AVX512F=26, + InstructionSet_AVX512F_VL=27, + InstructionSet_AVX512BW=28, + InstructionSet_AVX512BW_VL=29, + InstructionSet_AVX512CD=30, + InstructionSet_AVX512CD_VL=31, + InstructionSet_AVX512DQ=32, + InstructionSet_AVX512DQ_VL=33, + InstructionSet_AVX512VBMI=34, + InstructionSet_AVX512VBMI_VL=35, + InstructionSet_AVX10v1=36, + InstructionSet_AVX10v1_V512=37, + InstructionSet_VectorT128=38, + InstructionSet_VectorT256=39, + InstructionSet_VectorT512=40, + InstructionSet_X86Base_X64=41, + InstructionSet_SSE_X64=42, + InstructionSet_SSE2_X64=43, + InstructionSet_SSE3_X64=44, + InstructionSet_SSSE3_X64=45, + InstructionSet_SSE41_X64=46, + InstructionSet_SSE42_X64=47, + InstructionSet_AVX_X64=48, + InstructionSet_AVX2_X64=49, + InstructionSet_AES_X64=50, + InstructionSet_BMI1_X64=51, + InstructionSet_BMI2_X64=52, + InstructionSet_FMA_X64=53, + InstructionSet_LZCNT_X64=54, + InstructionSet_PCLMULQDQ_X64=55, + InstructionSet_POPCNT_X64=56, + InstructionSet_AVXVNNI_X64=57, + InstructionSet_MOVBE_X64=58, + InstructionSet_X86Serialize_X64=59, + InstructionSet_EVEX_X64=60, + InstructionSet_AVX512F_X64=61, + InstructionSet_AVX512BW_X64=62, + InstructionSet_AVX512CD_X64=63, + InstructionSet_AVX512DQ_X64=64, + InstructionSet_AVX512VBMI_X64=65, + InstructionSet_AVX10v1_X64=66, + InstructionSet_AVX10v1_V512_X64=67, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -125,56 +127,58 @@ enum CORINFO_InstructionSet InstructionSet_FMA=13, InstructionSet_LZCNT=14, InstructionSet_PCLMULQDQ=15, - InstructionSet_POPCNT=16, - InstructionSet_Vector128=17, - InstructionSet_Vector256=18, - InstructionSet_Vector512=19, - InstructionSet_AVXVNNI=20, - InstructionSet_MOVBE=21, - InstructionSet_X86Serialize=22, - InstructionSet_EVEX=23, - InstructionSet_AVX512F=24, - InstructionSet_AVX512F_VL=25, - InstructionSet_AVX512BW=26, - InstructionSet_AVX512BW_VL=27, - InstructionSet_AVX512CD=28, - InstructionSet_AVX512CD_VL=29, - InstructionSet_AVX512DQ=30, - InstructionSet_AVX512DQ_VL=31, - InstructionSet_AVX512VBMI=32, - InstructionSet_AVX512VBMI_VL=33, - InstructionSet_AVX10v1=34, - InstructionSet_AVX10v1_V512=35, - InstructionSet_VectorT128=36, - InstructionSet_VectorT256=37, - InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_EVEX_X64=58, - InstructionSet_AVX512F_X64=59, - InstructionSet_AVX512BW_X64=60, - InstructionSet_AVX512CD_X64=61, - InstructionSet_AVX512DQ_X64=62, - InstructionSet_AVX512VBMI_X64=63, - InstructionSet_AVX10v1_X64=64, - InstructionSet_AVX10v1_V512_X64=65, + InstructionSet_VPCLMULQDQ=16, + InstructionSet_VPCLMULQDQ_V512=17, + InstructionSet_POPCNT=18, + InstructionSet_Vector128=19, + InstructionSet_Vector256=20, + InstructionSet_Vector512=21, + InstructionSet_AVXVNNI=22, + InstructionSet_MOVBE=23, + InstructionSet_X86Serialize=24, + InstructionSet_EVEX=25, + InstructionSet_AVX512F=26, + InstructionSet_AVX512F_VL=27, + InstructionSet_AVX512BW=28, + InstructionSet_AVX512BW_VL=29, + InstructionSet_AVX512CD=30, + InstructionSet_AVX512CD_VL=31, + InstructionSet_AVX512DQ=32, + InstructionSet_AVX512DQ_VL=33, + InstructionSet_AVX512VBMI=34, + InstructionSet_AVX512VBMI_VL=35, + InstructionSet_AVX10v1=36, + InstructionSet_AVX10v1_V512=37, + InstructionSet_VectorT128=38, + InstructionSet_VectorT256=39, + InstructionSet_VectorT512=40, + InstructionSet_X86Base_X64=41, + InstructionSet_SSE_X64=42, + InstructionSet_SSE2_X64=43, + InstructionSet_SSE3_X64=44, + InstructionSet_SSSE3_X64=45, + InstructionSet_SSE41_X64=46, + InstructionSet_SSE42_X64=47, + InstructionSet_AVX_X64=48, + InstructionSet_AVX2_X64=49, + InstructionSet_AES_X64=50, + InstructionSet_BMI1_X64=51, + InstructionSet_BMI2_X64=52, + InstructionSet_FMA_X64=53, + InstructionSet_LZCNT_X64=54, + InstructionSet_PCLMULQDQ_X64=55, + InstructionSet_POPCNT_X64=56, + InstructionSet_AVXVNNI_X64=57, + InstructionSet_MOVBE_X64=58, + InstructionSet_X86Serialize_X64=59, + InstructionSet_EVEX_X64=60, + InstructionSet_AVX512F_X64=61, + InstructionSet_AVX512BW_X64=62, + InstructionSet_AVX512CD_X64=63, + InstructionSet_AVX512DQ_X64=64, + InstructionSet_AVX512VBMI_X64=65, + InstructionSet_AVX10v1_X64=66, + InstructionSet_AVX10v1_V512_X64=67, #endif // TARGET_X86 }; @@ -1093,6 +1097,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT; case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; case READYTORUN_INSTRUCTION_Pclmulqdq_V256: return InstructionSet_VPCLMULQDQ; + case READYTORUN_INSTRUCTION_Pclmulqdq_V512: return InstructionSet_VPCLMULQDQ_V512; case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; @@ -1131,6 +1136,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT; case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; case READYTORUN_INSTRUCTION_Pclmulqdq_V256: return InstructionSet_VPCLMULQDQ; + case READYTORUN_INSTRUCTION_Pclmulqdq_V512: return InstructionSet_VPCLMULQDQ_V512; case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 07e246cd3fa13..7f4ed543df2f5 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 04021b93-e969-41ed-96cd-4c583673b9ab */ - 0x04021b93, - 0xe969, - 0x41ed, - {0x96, 0xcd, 0x4c, 0x58, 0x36, 0x73, 0xb9, 0xab} +constexpr GUID JITEEVersionIdentifier = { /* 9014d652-5dc7-4edf-9285-6644d0898fb5 */ + 0x9014d652, + 0x5dc7, + 0x4edf, + {0x92, 0x85, 0x66, 0x44, 0xd0, 0x89, 0x8f, 0xb5} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 1a57e69094d1c..2200898d9ef0a 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -56,6 +56,7 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Avx10v1_V512=46, READYTORUN_INSTRUCTION_EVEX=47, READYTORUN_INSTRUCTION_Pclmulqdq_V256=48, + READYTORUN_INSTRUCTION_Pclmulqdq_V512=49, }; diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index 94a51e957b12a..13e6ad5607bd6 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -59,6 +59,7 @@ public enum ReadyToRunInstructionSet Avx10v1_V512=46, EVEX=47, Pclmulqdq_V256=48, + Pclmulqdq_V512=49, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index dcd9f64111558..1bc00e79163cc 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -88,7 +88,7 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_PCLMULQDQ: return ReadyToRunInstructionSet.Pclmulqdq; case InstructionSet.X64_PCLMULQDQ_X64: return ReadyToRunInstructionSet.Pclmulqdq; case InstructionSet.X64_VPCLMULQDQ: return ReadyToRunInstructionSet.Pclmulqdq_V256; - case InstructionSet.X64_VPCLMULQDQ_V512: return null; + case InstructionSet.X64_VPCLMULQDQ_V512: return ReadyToRunInstructionSet.Pclmulqdq_V512; case InstructionSet.X64_POPCNT: return ReadyToRunInstructionSet.Popcnt; case InstructionSet.X64_POPCNT_X64: return ReadyToRunInstructionSet.Popcnt; case InstructionSet.X64_Vector128: return null; @@ -164,7 +164,7 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_PCLMULQDQ: return ReadyToRunInstructionSet.Pclmulqdq; case InstructionSet.X86_PCLMULQDQ_X64: return null; case InstructionSet.X86_VPCLMULQDQ: return ReadyToRunInstructionSet.Pclmulqdq_V256; - case InstructionSet.X86_VPCLMULQDQ_V512: return null; + case InstructionSet.X86_VPCLMULQDQ_V512: return ReadyToRunInstructionSet.Pclmulqdq_V512; case InstructionSet.X86_POPCNT: return ReadyToRunInstructionSet.Popcnt; case InstructionSet.X86_POPCNT_X64: return null; case InstructionSet.X86_Vector128: return null; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 94984b26b533e..108a0fb5644bf 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -226,56 +226,58 @@ public enum InstructionSet_X64 FMA = 13, LZCNT = 14, PCLMULQDQ = 15, - POPCNT = 16, - Vector128 = 17, - Vector256 = 18, - Vector512 = 19, - AVXVNNI = 20, - MOVBE = 21, - X86Serialize = 22, - EVEX = 23, - AVX512F = 24, - AVX512F_VL = 25, - AVX512BW = 26, - AVX512BW_VL = 27, - AVX512CD = 28, - AVX512CD_VL = 29, - AVX512DQ = 30, - AVX512DQ_VL = 31, - AVX512VBMI = 32, - AVX512VBMI_VL = 33, - AVX10v1 = 34, - AVX10v1_V512 = 35, - VectorT128 = 36, - VectorT256 = 37, - VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - EVEX_X64 = 58, - AVX512F_X64 = 59, - AVX512BW_X64 = 60, - AVX512CD_X64 = 61, - AVX512DQ_X64 = 62, - AVX512VBMI_X64 = 63, - AVX10v1_X64 = 64, - AVX10v1_V512_X64 = 65, + VPCLMULQDQ = 16, + VPCLMULQDQ_V512 = 17, + POPCNT = 18, + Vector128 = 19, + Vector256 = 20, + Vector512 = 21, + AVXVNNI = 22, + MOVBE = 23, + X86Serialize = 24, + EVEX = 25, + AVX512F = 26, + AVX512F_VL = 27, + AVX512BW = 28, + AVX512BW_VL = 29, + AVX512CD = 30, + AVX512CD_VL = 31, + AVX512DQ = 32, + AVX512DQ_VL = 33, + AVX512VBMI = 34, + AVX512VBMI_VL = 35, + AVX10v1 = 36, + AVX10v1_V512 = 37, + VectorT128 = 38, + VectorT256 = 39, + VectorT512 = 40, + X86Base_X64 = 41, + SSE_X64 = 42, + SSE2_X64 = 43, + SSE3_X64 = 44, + SSSE3_X64 = 45, + SSE41_X64 = 46, + SSE42_X64 = 47, + AVX_X64 = 48, + AVX2_X64 = 49, + AES_X64 = 50, + BMI1_X64 = 51, + BMI2_X64 = 52, + FMA_X64 = 53, + LZCNT_X64 = 54, + PCLMULQDQ_X64 = 55, + POPCNT_X64 = 56, + AVXVNNI_X64 = 57, + MOVBE_X64 = 58, + X86Serialize_X64 = 59, + EVEX_X64 = 60, + AVX512F_X64 = 61, + AVX512BW_X64 = 62, + AVX512CD_X64 = 63, + AVX512DQ_X64 = 64, + AVX512VBMI_X64 = 65, + AVX10v1_X64 = 66, + AVX10v1_V512_X64 = 67, } public enum InstructionSet_X86 @@ -297,56 +299,58 @@ public enum InstructionSet_X86 FMA = 13, LZCNT = 14, PCLMULQDQ = 15, - POPCNT = 16, - Vector128 = 17, - Vector256 = 18, - Vector512 = 19, - AVXVNNI = 20, - MOVBE = 21, - X86Serialize = 22, - EVEX = 23, - AVX512F = 24, - AVX512F_VL = 25, - AVX512BW = 26, - AVX512BW_VL = 27, - AVX512CD = 28, - AVX512CD_VL = 29, - AVX512DQ = 30, - AVX512DQ_VL = 31, - AVX512VBMI = 32, - AVX512VBMI_VL = 33, - AVX10v1 = 34, - AVX10v1_V512 = 35, - VectorT128 = 36, - VectorT256 = 37, - VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - EVEX_X64 = 58, - AVX512F_X64 = 59, - AVX512BW_X64 = 60, - AVX512CD_X64 = 61, - AVX512DQ_X64 = 62, - AVX512VBMI_X64 = 63, - AVX10v1_X64 = 64, - AVX10v1_V512_X64 = 65, + VPCLMULQDQ = 16, + VPCLMULQDQ_V512 = 17, + POPCNT = 18, + Vector128 = 19, + Vector256 = 20, + Vector512 = 21, + AVXVNNI = 22, + MOVBE = 23, + X86Serialize = 24, + EVEX = 25, + AVX512F = 26, + AVX512F_VL = 27, + AVX512BW = 28, + AVX512BW_VL = 29, + AVX512CD = 30, + AVX512CD_VL = 31, + AVX512DQ = 32, + AVX512DQ_VL = 33, + AVX512VBMI = 34, + AVX512VBMI_VL = 35, + AVX10v1 = 36, + AVX10v1_V512 = 37, + VectorT128 = 38, + VectorT256 = 39, + VectorT512 = 40, + X86Base_X64 = 41, + SSE_X64 = 42, + SSE2_X64 = 43, + SSE3_X64 = 44, + SSSE3_X64 = 45, + SSE41_X64 = 46, + SSE42_X64 = 47, + AVX_X64 = 48, + AVX2_X64 = 49, + AES_X64 = 50, + BMI1_X64 = 51, + BMI2_X64 = 52, + FMA_X64 = 53, + LZCNT_X64 = 54, + PCLMULQDQ_X64 = 55, + POPCNT_X64 = 56, + AVXVNNI_X64 = 57, + MOVBE_X64 = 58, + X86Serialize_X64 = 59, + EVEX_X64 = 60, + AVX512F_X64 = 61, + AVX512BW_X64 = 62, + AVX512CD_X64 = 63, + AVX512DQ_X64 = 64, + AVX512VBMI_X64 = 65, + AVX10v1_X64 = 66, + AVX10v1_V512_X64 = 67, } public unsafe struct InstructionSetFlags : IEnumerable @@ -1339,8 +1343,8 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("fma", "Fma", InstructionSet.X64_FMA, true); yield return new InstructionSetInfo("lzcnt", "Lzcnt", InstructionSet.X64_LZCNT, true); yield return new InstructionSetInfo("pclmul", "Pclmulqdq", InstructionSet.X64_PCLMULQDQ, true); - yield return new InstructionSetInfo("vpclmulqdq", "Pclmulqdq_V256", InstructionSet.X64_VPCLMULQDQ, true); - yield return new InstructionSetInfo("VPCLMULQDQ_V512", "", InstructionSet.X64_VPCLMULQDQ_V512, false); + yield return new InstructionSetInfo("vpclmul", "Pclmulqdq_V256", InstructionSet.X64_VPCLMULQDQ, true); + yield return new InstructionSetInfo("vpclmul_v512", "Pclmulqdq_V512", InstructionSet.X64_VPCLMULQDQ_V512, true); yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X64_POPCNT, true); yield return new InstructionSetInfo("Vector128", "", InstructionSet.X64_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X64_Vector256, false); @@ -1382,8 +1386,8 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("fma", "Fma", InstructionSet.X86_FMA, true); yield return new InstructionSetInfo("lzcnt", "Lzcnt", InstructionSet.X86_LZCNT, true); yield return new InstructionSetInfo("pclmul", "Pclmulqdq", InstructionSet.X86_PCLMULQDQ, true); - yield return new InstructionSetInfo("vpclmulqdq", "Pclmulqdq_V256", InstructionSet.X86_VPCLMULQDQ, true); - yield return new InstructionSetInfo("VPCLMULQDQ_V512", "", InstructionSet.X86_VPCLMULQDQ_V512, false); + yield return new InstructionSetInfo("vpclmul", "Pclmulqdq_V256", InstructionSet.X86_VPCLMULQDQ, true); + yield return new InstructionSetInfo("vpclmul_v512", "Pclmulqdq_V512", InstructionSet.X86_VPCLMULQDQ_V512, true); yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X86_POPCNT, true); yield return new InstructionSetInfo("Vector128", "", InstructionSet.X86_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X86_Vector256, false); @@ -1777,10 +1781,13 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite if (nestedTypeName == "X64") { return InstructionSet.X64_PCLMULQDQ_X64; } else - { return InstructionSet.X64_PCLMULQDQ; } - - case "Pclmulqdq_V256": + if (nestedTypeName == "V256") { return InstructionSet.X64_VPCLMULQDQ; } + else + if (nestedTypeName == "V512") + { return InstructionSet.X64_VPCLMULQDQ_V512; } + else + { return InstructionSet.X64_PCLMULQDQ; } case "Popcnt": if (nestedTypeName == "X64") @@ -1928,10 +1935,13 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite { return InstructionSet.X86_LZCNT; } case "Pclmulqdq": - { return InstructionSet.X86_PCLMULQDQ; } - - case "Pclmulqdq_V256": + if (nestedTypeName == "V256") { return InstructionSet.X86_VPCLMULQDQ; } + else + if (nestedTypeName == "V512") + { return InstructionSet.X86_VPCLMULQDQ_V512; } + else + { return InstructionSet.X86_PCLMULQDQ; } case "Popcnt": { return InstructionSet.X86_POPCNT; } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 15150700f6eff..d56a86f03598d 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -40,8 +40,8 @@ instructionset ,X86 ,Bmi2 , ,11 ,BMI2 instructionset ,X86 ,Fma , ,12 ,FMA ,fma instructionset ,X86 ,Lzcnt , ,13 ,LZCNT ,lzcnt instructionset ,X86 ,Pclmulqdq , ,14 ,PCLMULQDQ ,pclmul -instructionset ,X86 ,Pclmulqdq_V256 , ,48 ,VPCLMULQDQ ,vpclmulqdq -instructionset ,X86 , , , ,VPCLMULQDQ_V512 , +instructionset ,X86 ,Pclmulqdq_V256 , ,48 ,VPCLMULQDQ ,vpclmul +instructionset ,X86 ,Pclmulqdq_V512 , ,49 ,VPCLMULQDQ_V512 ,vpclmul_v512 instructionset ,X86 ,Popcnt , ,15 ,POPCNT ,popcnt instructionset ,X86 , , , ,Vector128 , instructionset ,X86 , , , ,Vector256 , From 05bea1c86d5cdda5bfdc8c00e214acba68511ce6 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Mon, 4 Nov 2024 23:28:46 -0800 Subject: [PATCH 07/15] fix AOT instruction sets --- src/coreclr/jit/compiler.cpp | 6 +----- .../tools/Common/Compiler/HardwareIntrinsicHelpers.cs | 8 ++++++-- src/coreclr/vm/codeman.cpp | 1 + 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 4312b2dbb85c4..fb372ab0d07ce 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2262,11 +2262,6 @@ void Compiler::compSetProcessor() preferredVectorByteLength = 256 / 8; } - - if (instructionSetFlags.HasInstructionSet(InstructionSet_VPCLMULQDQ)) - { - instructionSetFlags.AddInstructionSet(InstructionSet_VPCLMULQDQ_V512); - } } else { @@ -6179,6 +6174,7 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, if (JitConfig.EnableVPCLMULQDQ() != 0) { instructionSetFlags.AddInstructionSet(InstructionSet_VPCLMULQDQ); + instructionSetFlags.AddInstructionSet(InstructionSet_VPCLMULQDQ_V512); } if (JitConfig.EnablePOPCNT() != 0) diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index f759ec532dbdc..471d8401efac5 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -137,7 +137,11 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) if ((flags & Evex) != 0) builder.AddSupportedInstructionSet("evex"); if ((flags & Vpclmulqdq) != 0) - builder.AddSupportedInstructionSet("vpclmulqdq"); + { + builder.AddSupportedInstructionSet("vpclmul"); + if ((flags & Avx512) != 0) + builder.AddSupportedInstructionSet("vpclmul_v512"); + } } public static int FromInstructionSet(InstructionSet instructionSet) @@ -203,7 +207,7 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_EVEX => Evex, InstructionSet.X64_EVEX_X64 => Evex, InstructionSet.X64_VPCLMULQDQ => Vpclmulqdq, - InstructionSet.X64_VPCLMULQDQ_V512 => Vpclmulqdq, + InstructionSet.X64_VPCLMULQDQ_V512 => (Vpclmulqdq | Avx512), // Baseline ISAs - they're always available InstructionSet.X64_SSE => 0, diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 3f1252edac8dd..bd8099274c1b3 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1395,6 +1395,7 @@ void EEJitManager::SetCpuInfo() if (((cpuFeatures & XArchIntrinsicConstants_Vpclmulqdq) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableVPCLMULQDQ)) { CPUCompileFlags.Set(InstructionSet_VPCLMULQDQ); + CPUCompileFlags.Set(InstructionSet_VPCLMULQDQ_V512); } if (((cpuFeatures & XArchIntrinsicConstants_AvxVnni) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNI)) From 8d26b2430916ca98464261a301213216346011e4 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Tue, 5 Nov 2024 12:17:56 -0800 Subject: [PATCH 08/15] address feedback --- src/coreclr/inc/corinfoinstructionset.h | 64 ++++++++--------- src/coreclr/jit/compiler.cpp | 4 +- src/coreclr/jit/emitxarch.cpp | 15 +++- src/coreclr/jit/gentree.cpp | 28 ++++++-- src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/hwintrinsic.cpp | 4 +- src/coreclr/jit/hwintrinsiclistxarch.h | 18 ++--- src/coreclr/jit/hwintrinsicxarch.cpp | 8 +-- src/coreclr/jit/instrsxarch.h | 3 +- src/coreclr/jit/lowerxarch.cpp | 19 ++--- src/coreclr/jit/lsraxarch.cpp | 4 +- .../Compiler/HardwareIntrinsicHelpers.cs | 4 +- .../Runtime/ReadyToRunInstructionSetHelper.cs | 8 +-- .../JitInterface/CorInfoInstructionSet.cs | 72 +++++++++---------- .../ThunkGenerator/InstructionSetDesc.txt | 12 ++-- src/coreclr/vm/codeman.cpp | 4 +- 16 files changed, 144 insertions(+), 125 deletions(-) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index d42edba8430d8..698778bfd1e47 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -58,8 +58,8 @@ enum CORINFO_InstructionSet InstructionSet_FMA=13, InstructionSet_LZCNT=14, InstructionSet_PCLMULQDQ=15, - InstructionSet_VPCLMULQDQ=16, - InstructionSet_VPCLMULQDQ_V512=17, + InstructionSet_PCLMULQDQ_V256=16, + InstructionSet_PCLMULQDQ_V512=17, InstructionSet_POPCNT=18, InstructionSet_Vector128=19, InstructionSet_Vector256=20, @@ -127,8 +127,8 @@ enum CORINFO_InstructionSet InstructionSet_FMA=13, InstructionSet_LZCNT=14, InstructionSet_PCLMULQDQ=15, - InstructionSet_VPCLMULQDQ=16, - InstructionSet_VPCLMULQDQ_V512=17, + InstructionSet_PCLMULQDQ_V256=16, + InstructionSet_PCLMULQDQ_V512=17, InstructionSet_POPCNT=18, InstructionSet_Vector128=19, InstructionSet_Vector256=20, @@ -600,14 +600,14 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AES); if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ)) - resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_AVX)) - resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ)) - resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ_V512); - if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ_V512); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) @@ -714,14 +714,14 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AES); if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ)) - resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_AVX)) - resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ)) - resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ_V512); - if (resultflags.HasInstructionSet(InstructionSet_VPCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_VPCLMULQDQ_V512); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) @@ -890,10 +890,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "PCLMULQDQ"; case InstructionSet_PCLMULQDQ_X64 : return "PCLMULQDQ_X64"; - case InstructionSet_VPCLMULQDQ : - return "VPCLMULQDQ"; - case InstructionSet_VPCLMULQDQ_V512 : - return "VPCLMULQDQ_V512"; + case InstructionSet_PCLMULQDQ_V256 : + return "PCLMULQDQ_V256"; + case InstructionSet_PCLMULQDQ_V512 : + return "PCLMULQDQ_V512"; case InstructionSet_POPCNT : return "POPCNT"; case InstructionSet_POPCNT_X64 : @@ -996,10 +996,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "LZCNT"; case InstructionSet_PCLMULQDQ : return "PCLMULQDQ"; - case InstructionSet_VPCLMULQDQ : - return "VPCLMULQDQ"; - case InstructionSet_VPCLMULQDQ_V512 : - return "VPCLMULQDQ_V512"; + case InstructionSet_PCLMULQDQ_V256 : + return "PCLMULQDQ_V256"; + case InstructionSet_PCLMULQDQ_V512 : + return "PCLMULQDQ_V512"; case InstructionSet_POPCNT : return "POPCNT"; case InstructionSet_Vector128 : @@ -1096,8 +1096,8 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Fma: return InstructionSet_FMA; case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT; case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; - case READYTORUN_INSTRUCTION_Pclmulqdq_V256: return InstructionSet_VPCLMULQDQ; - case READYTORUN_INSTRUCTION_Pclmulqdq_V512: return InstructionSet_VPCLMULQDQ_V512; + case READYTORUN_INSTRUCTION_Pclmulqdq_V256: return InstructionSet_PCLMULQDQ_V256; + case READYTORUN_INSTRUCTION_Pclmulqdq_V512: return InstructionSet_PCLMULQDQ_V512; case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; @@ -1135,8 +1135,8 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Fma: return InstructionSet_FMA; case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT; case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; - case READYTORUN_INSTRUCTION_Pclmulqdq_V256: return InstructionSet_VPCLMULQDQ; - case READYTORUN_INSTRUCTION_Pclmulqdq_V512: return InstructionSet_VPCLMULQDQ_V512; + case READYTORUN_INSTRUCTION_Pclmulqdq_V256: return InstructionSet_PCLMULQDQ_V256; + case READYTORUN_INSTRUCTION_Pclmulqdq_V512: return InstructionSet_PCLMULQDQ_V512; case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index fb372ab0d07ce..99958b8ab4363 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -6173,8 +6173,8 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, if (JitConfig.EnableVPCLMULQDQ() != 0) { - instructionSetFlags.AddInstructionSet(InstructionSet_VPCLMULQDQ); - instructionSetFlags.AddInstructionSet(InstructionSet_VPCLMULQDQ_V512); + instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ_V256); + instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ_V512); } if (JitConfig.EnablePOPCNT() != 0) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 847ec5e7d580d..5656ac3e68bc4 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -254,7 +254,19 @@ bool emitter::IsEvexEncodableInstruction(instruction ins) const { return false; } - return HasEvexEncoding(ins); + + switch (ins) + { + case INS_pclmulqdq: + { + return emitComp->compOpportunisticallyDependsOn(InstructionSet_PCLMULQDQ_V256); + } + + default: + { + return HasEvexEncoding(ins); + } + } } //------------------------------------------------------------------------ @@ -19860,7 +19872,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; case INS_pclmulqdq: - case INS_vpclmulqdq: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += PERFSCORE_LATENCY_7C; break; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 62cbaedf55aaa..33491fc367e9d 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20539,11 +20539,31 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) // EVEX form for its intended lowering instruction. // // Return Value: -// true if the intrisic node lowering instruction has an EVEX form +// true if the intrinsic node lowering instruction has an EVEX form // -bool GenTree::isEvexCompatibleHWIntrinsic() const +bool GenTree::isEvexCompatibleHWIntrinsic(Compiler* comp) const { - return OperIsHWIntrinsic() && HWIntrinsicInfo::HasEvexSemantics(AsHWIntrinsic()->GetHWIntrinsicId()); +#if defined(TARGET_XARCH) + if (OperIsHWIntrinsic()) + { + NamedIntrinsic intrinsicId = AsHWIntrinsic()->GetHWIntrinsicId(); + + switch (intrinsicId) + { + case NI_PCLMULQDQ_CarrylessMultiply: + { + return comp->compOpportunisticallyDependsOn(InstructionSet_PCLMULQDQ_V256); + } + + default: + { + return HWIntrinsicInfo::HasEvexSemantics(intrinsicId); + } + } + + } +#endif + return false; } //------------------------------------------------------------------------ @@ -20551,7 +20571,7 @@ bool GenTree::isEvexCompatibleHWIntrinsic() const // with the EVEX embedded masking form for its intended lowering instruction. // // Return Value: -// true if the intrisic node lowering instruction has an EVEX embedded masking +// true if the intrinsic node lowering instruction has an EVEX embedded masking // bool GenTree::isEmbeddedMaskingCompatibleHWIntrinsic() const { diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 6b46eafcf8422..32ef8929f893d 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1446,7 +1446,7 @@ struct GenTree bool isCommutativeHWIntrinsic() const; bool isContainableHWIntrinsic() const; bool isRMWHWIntrinsic(Compiler* comp); - bool isEvexCompatibleHWIntrinsic() const; + bool isEvexCompatibleHWIntrinsic(Compiler* comp) const; bool isEmbeddedMaskingCompatibleHWIntrinsic() const; #else bool isCommutativeHWIntrinsic() const diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 1b24037f8e06f..3eee5703ed446 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -781,8 +781,8 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { FIRST_NI_FMA, LAST_NI_FMA }, { FIRST_NI_LZCNT, LAST_NI_LZCNT }, { FIRST_NI_PCLMULQDQ, LAST_NI_PCLMULQDQ }, - { FIRST_NI_VPCLMULQDQ, LAST_NI_VPCLMULQDQ }, - { FIRST_NI_VPCLMULQDQ_V512, LAST_NI_VPCLMULQDQ_V512 }, + { FIRST_NI_PCLMULQDQ_V256, LAST_NI_PCLMULQDQ_V256 }, + { FIRST_NI_PCLMULQDQ_V512, LAST_NI_PCLMULQDQ_V512 }, { FIRST_NI_POPCNT, LAST_NI_POPCNT }, { FIRST_NI_Vector128, LAST_NI_Vector128 }, { FIRST_NI_Vector256, LAST_NI_Vector256 }, diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index dc0ef835d5f8d..80a7093a284d4 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1524,26 +1524,26 @@ HARDWARE_INTRINSIC(LZCNT_X64, LeadingZeroCount, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // PCLMULQDQ Intrinsics #define FIRST_NI_PCLMULQDQ NI_PCLMULQDQ_CarrylessMultiply -HARDWARE_INTRINSIC(PCLMULQDQ, CarrylessMultiply, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(PCLMULQDQ, CarrylessMultiply, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) #define LAST_NI_PCLMULQDQ NI_PCLMULQDQ_CarrylessMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// VPCLMULQDQ Intrinsics -#define FIRST_NI_VPCLMULQDQ NI_VPCLMULQDQ_CarrylessMultiply -HARDWARE_INTRINSIC(VPCLMULQDQ, CarrylessMultiply, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpclmulqdq, INS_vpclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -#define LAST_NI_VPCLMULQDQ NI_VPCLMULQDQ_CarrylessMultiply +// PCLMULQDQ_V256 Intrinsics +#define FIRST_NI_PCLMULQDQ_V256 NI_PCLMULQDQ_V256_CarrylessMultiply +HARDWARE_INTRINSIC(PCLMULQDQ_V256, CarrylessMultiply, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +#define LAST_NI_PCLMULQDQ_V256 NI_PCLMULQDQ_V256_CarrylessMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// VPCLMULQDQ_V512 Intrinsics -#define FIRST_NI_VPCLMULQDQ_V512 NI_VPCLMULQDQ_V512_CarrylessMultiply -HARDWARE_INTRINSIC(VPCLMULQDQ_V512, CarrylessMultiply, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpclmulqdq, INS_vpclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -#define LAST_NI_VPCLMULQDQ_V512 NI_VPCLMULQDQ_V512_CarrylessMultiply +// PCLMULQDQ_V512 Intrinsics +#define FIRST_NI_PCLMULQDQ_V512 NI_PCLMULQDQ_V512_CarrylessMultiply +HARDWARE_INTRINSIC(PCLMULQDQ_V512, CarrylessMultiply, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +#define LAST_NI_PCLMULQDQ_V512 NI_PCLMULQDQ_V512_CarrylessMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 8576c9fce3a55..28d415fcd9d93 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -115,7 +115,7 @@ static CORINFO_InstructionSet V256VersionOfIsa(CORINFO_InstructionSet isa) switch (isa) { case InstructionSet_PCLMULQDQ: - return InstructionSet_VPCLMULQDQ; + return InstructionSet_PCLMULQDQ_V256; default: return InstructionSet_NONE; } @@ -138,7 +138,7 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) case InstructionSet_AVX10v1_X64: return InstructionSet_AVX10v1_V512_X64; case InstructionSet_PCLMULQDQ: - return InstructionSet_VPCLMULQDQ_V512; + return InstructionSet_PCLMULQDQ_V512; default: return InstructionSet_NONE; } @@ -872,8 +872,8 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_LZCNT_X64: case InstructionSet_PCLMULQDQ: case InstructionSet_PCLMULQDQ_X64: - case InstructionSet_VPCLMULQDQ: - case InstructionSet_VPCLMULQDQ_V512: + case InstructionSet_PCLMULQDQ_V256: + case InstructionSet_PCLMULQDQ_V512: case InstructionSet_POPCNT: case InstructionSet_POPCNT_X64: case InstructionSet_SSE: diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index bb44fb79a8ad8..4442e39b7a885 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -400,8 +400,7 @@ INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE, INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_TT_NONE, REX_WIG | Encoding_VEX) // Perform the AES InvMixColumn Transformation INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_TT_NONE, REX_WIG | Encoding_VEX) // AES Round Key Generation Assist -INST3(pclmulqdq, "pclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords -INST3(vpclmulqdq, "pclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_FULL_MEM, Input_64Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords +INST3(pclmulqdq, "pclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_FULL_MEM, Input_64Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords // SSE4.1 INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 7182cf60a0303..8552fc85702f0 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -2592,17 +2592,6 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return LowerHWIntrinsicTernaryLogic(node); } - case NI_PCLMULQDQ_CarrylessMultiply: - { - // The EVEX form of 128-bit pclmulqdq requires VPCLMULQDQ in addition to AVX512VL - if (comp->compOpportunisticallyDependsOn(InstructionSet_VPCLMULQDQ)) - { - intrinsicId = NI_VPCLMULQDQ_CarrylessMultiply; - node->ChangeHWIntrinsicId(intrinsicId); - } - break; - } - default: break; } @@ -9352,8 +9341,8 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_SSE41_MultipleSumAbsoluteDifferences: case NI_AES_KeygenAssist: case NI_PCLMULQDQ_CarrylessMultiply: - case NI_VPCLMULQDQ_CarrylessMultiply: - case NI_VPCLMULQDQ_V512_CarrylessMultiply: + case NI_PCLMULQDQ_V256_CarrylessMultiply: + case NI_PCLMULQDQ_V512_CarrylessMultiply: case NI_AVX_Blend: case NI_AVX_Compare: case NI_AVX_DotProduct: @@ -11329,8 +11318,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX512DQ_VL_Range: case NI_AVX512DQ_ReduceScalar: case NI_PCLMULQDQ_CarrylessMultiply: - case NI_VPCLMULQDQ_CarrylessMultiply: - case NI_VPCLMULQDQ_V512_CarrylessMultiply: + case NI_PCLMULQDQ_V256_CarrylessMultiply: + case NI_PCLMULQDQ_V512_CarrylessMultiply: case NI_AVX10v1_AlignRight32: case NI_AVX10v1_AlignRight64: case NI_AVX10v1_GetMantissaScalar: diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index cf3f3efacd9fa..bc62b2812ba66 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2140,7 +2140,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // is not allocated the same register as the target. bool isRMW = intrinsicTree->isRMWHWIntrinsic(compiler); #if defined(TARGET_AMD64) - bool isEvexCompatible = intrinsicTree->isEvexCompatibleHWIntrinsic(); + bool isEvexCompatible = intrinsicTree->isEvexCompatibleHWIntrinsic(compiler); #endif // TARGET_AMD64 // Create internal temps, and handle any other special requirements. @@ -2789,7 +2789,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (dstCount == 1) { #if defined(TARGET_AMD64) - bool isEvexCompatible = intrinsicTree->isEvexCompatibleHWIntrinsic(); + bool isEvexCompatible = intrinsicTree->isEvexCompatibleHWIntrinsic(compiler); if (!isEvexCompatible) { diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index 471d8401efac5..7ebfec4097f6d 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -206,8 +206,8 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVX10v1_V512_X64 => (Avx10v1 | Avx512), InstructionSet.X64_EVEX => Evex, InstructionSet.X64_EVEX_X64 => Evex, - InstructionSet.X64_VPCLMULQDQ => Vpclmulqdq, - InstructionSet.X64_VPCLMULQDQ_V512 => (Vpclmulqdq | Avx512), + InstructionSet.X64_PCLMULQDQ_V256 => Vpclmulqdq, + InstructionSet.X64_PCLMULQDQ_V512 => (Vpclmulqdq | Avx512), // Baseline ISAs - they're always available InstructionSet.X64_SSE => 0, diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 1bc00e79163cc..27cbcc5a210ef 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -87,8 +87,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_LZCNT_X64: return ReadyToRunInstructionSet.Lzcnt; case InstructionSet.X64_PCLMULQDQ: return ReadyToRunInstructionSet.Pclmulqdq; case InstructionSet.X64_PCLMULQDQ_X64: return ReadyToRunInstructionSet.Pclmulqdq; - case InstructionSet.X64_VPCLMULQDQ: return ReadyToRunInstructionSet.Pclmulqdq_V256; - case InstructionSet.X64_VPCLMULQDQ_V512: return ReadyToRunInstructionSet.Pclmulqdq_V512; + case InstructionSet.X64_PCLMULQDQ_V256: return ReadyToRunInstructionSet.Pclmulqdq_V256; + case InstructionSet.X64_PCLMULQDQ_V512: return ReadyToRunInstructionSet.Pclmulqdq_V512; case InstructionSet.X64_POPCNT: return ReadyToRunInstructionSet.Popcnt; case InstructionSet.X64_POPCNT_X64: return ReadyToRunInstructionSet.Popcnt; case InstructionSet.X64_Vector128: return null; @@ -163,8 +163,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_LZCNT_X64: return null; case InstructionSet.X86_PCLMULQDQ: return ReadyToRunInstructionSet.Pclmulqdq; case InstructionSet.X86_PCLMULQDQ_X64: return null; - case InstructionSet.X86_VPCLMULQDQ: return ReadyToRunInstructionSet.Pclmulqdq_V256; - case InstructionSet.X86_VPCLMULQDQ_V512: return ReadyToRunInstructionSet.Pclmulqdq_V512; + case InstructionSet.X86_PCLMULQDQ_V256: return ReadyToRunInstructionSet.Pclmulqdq_V256; + case InstructionSet.X86_PCLMULQDQ_V512: return ReadyToRunInstructionSet.Pclmulqdq_V512; case InstructionSet.X86_POPCNT: return ReadyToRunInstructionSet.Popcnt; case InstructionSet.X86_POPCNT_X64: return null; case InstructionSet.X86_Vector128: return null; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 108a0fb5644bf..6ed64fc23dd5e 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -56,8 +56,8 @@ public enum InstructionSet X64_FMA = InstructionSet_X64.FMA, X64_LZCNT = InstructionSet_X64.LZCNT, X64_PCLMULQDQ = InstructionSet_X64.PCLMULQDQ, - X64_VPCLMULQDQ = InstructionSet_X64.VPCLMULQDQ, - X64_VPCLMULQDQ_V512 = InstructionSet_X64.VPCLMULQDQ_V512, + X64_PCLMULQDQ_V256 = InstructionSet_X64.PCLMULQDQ_V256, + X64_PCLMULQDQ_V512 = InstructionSet_X64.PCLMULQDQ_V512, X64_POPCNT = InstructionSet_X64.POPCNT, X64_Vector128 = InstructionSet_X64.Vector128, X64_Vector256 = InstructionSet_X64.Vector256, @@ -123,8 +123,8 @@ public enum InstructionSet X86_FMA = InstructionSet_X86.FMA, X86_LZCNT = InstructionSet_X86.LZCNT, X86_PCLMULQDQ = InstructionSet_X86.PCLMULQDQ, - X86_VPCLMULQDQ = InstructionSet_X86.VPCLMULQDQ, - X86_VPCLMULQDQ_V512 = InstructionSet_X86.VPCLMULQDQ_V512, + X86_PCLMULQDQ_V256 = InstructionSet_X86.PCLMULQDQ_V256, + X86_PCLMULQDQ_V512 = InstructionSet_X86.PCLMULQDQ_V512, X86_POPCNT = InstructionSet_X86.POPCNT, X86_Vector128 = InstructionSet_X86.Vector128, X86_Vector256 = InstructionSet_X86.Vector256, @@ -226,8 +226,8 @@ public enum InstructionSet_X64 FMA = 13, LZCNT = 14, PCLMULQDQ = 15, - VPCLMULQDQ = 16, - VPCLMULQDQ_V512 = 17, + PCLMULQDQ_V256 = 16, + PCLMULQDQ_V512 = 17, POPCNT = 18, Vector128 = 19, Vector256 = 20, @@ -299,8 +299,8 @@ public enum InstructionSet_X86 FMA = 13, LZCNT = 14, PCLMULQDQ = 15, - VPCLMULQDQ = 16, - VPCLMULQDQ_V512 = 17, + PCLMULQDQ_V256 = 16, + PCLMULQDQ_V512 = 17, POPCNT = 18, Vector128 = 19, Vector256 = 20, @@ -742,13 +742,13 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ)) resultflags.AddInstructionSet(InstructionSet.X64_SSE2); - if (resultflags.HasInstructionSet(InstructionSet.X64_VPCLMULQDQ)) + if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V256)) resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet.X64_VPCLMULQDQ)) + if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V256)) resultflags.AddInstructionSet(InstructionSet.X64_AVX); - if (resultflags.HasInstructionSet(InstructionSet.X64_VPCLMULQDQ_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_VPCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet.X64_VPCLMULQDQ_V512)) + if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V256); + if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); @@ -857,13 +857,13 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ)) resultflags.AddInstructionSet(InstructionSet.X86_SSE2); - if (resultflags.HasInstructionSet(InstructionSet.X86_VPCLMULQDQ)) + if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V256)) resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet.X86_VPCLMULQDQ)) + if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V256)) resultflags.AddInstructionSet(InstructionSet.X86_AVX); - if (resultflags.HasInstructionSet(InstructionSet.X86_VPCLMULQDQ_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_VPCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet.X86_VPCLMULQDQ_V512)) + if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V256); + if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); @@ -1090,13 +1090,13 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ); if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ)) - resultflags.AddInstructionSet(InstructionSet.X64_VPCLMULQDQ); + resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V256); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) - resultflags.AddInstructionSet(InstructionSet.X64_VPCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet.X64_VPCLMULQDQ)) - resultflags.AddInstructionSet(InstructionSet.X64_VPCLMULQDQ_V512); + resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V256); + if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V256)) + resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_VPCLMULQDQ_V512); + resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) @@ -1205,13 +1205,13 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ); if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ)) - resultflags.AddInstructionSet(InstructionSet.X86_VPCLMULQDQ); + resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V256); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) - resultflags.AddInstructionSet(InstructionSet.X86_VPCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet.X86_VPCLMULQDQ)) - resultflags.AddInstructionSet(InstructionSet.X86_VPCLMULQDQ_V512); + resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V256); + if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V256)) + resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_VPCLMULQDQ_V512); + resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) @@ -1343,8 +1343,8 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("fma", "Fma", InstructionSet.X64_FMA, true); yield return new InstructionSetInfo("lzcnt", "Lzcnt", InstructionSet.X64_LZCNT, true); yield return new InstructionSetInfo("pclmul", "Pclmulqdq", InstructionSet.X64_PCLMULQDQ, true); - yield return new InstructionSetInfo("vpclmul", "Pclmulqdq_V256", InstructionSet.X64_VPCLMULQDQ, true); - yield return new InstructionSetInfo("vpclmul_v512", "Pclmulqdq_V512", InstructionSet.X64_VPCLMULQDQ_V512, true); + yield return new InstructionSetInfo("vpclmul", "Pclmulqdq_V256", InstructionSet.X64_PCLMULQDQ_V256, true); + yield return new InstructionSetInfo("vpclmul_v512", "Pclmulqdq_V512", InstructionSet.X64_PCLMULQDQ_V512, true); yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X64_POPCNT, true); yield return new InstructionSetInfo("Vector128", "", InstructionSet.X64_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X64_Vector256, false); @@ -1386,8 +1386,8 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("fma", "Fma", InstructionSet.X86_FMA, true); yield return new InstructionSetInfo("lzcnt", "Lzcnt", InstructionSet.X86_LZCNT, true); yield return new InstructionSetInfo("pclmul", "Pclmulqdq", InstructionSet.X86_PCLMULQDQ, true); - yield return new InstructionSetInfo("vpclmul", "Pclmulqdq_V256", InstructionSet.X86_VPCLMULQDQ, true); - yield return new InstructionSetInfo("vpclmul_v512", "Pclmulqdq_V512", InstructionSet.X86_VPCLMULQDQ_V512, true); + yield return new InstructionSetInfo("vpclmul", "Pclmulqdq_V256", InstructionSet.X86_PCLMULQDQ_V256, true); + yield return new InstructionSetInfo("vpclmul_v512", "Pclmulqdq_V512", InstructionSet.X86_PCLMULQDQ_V512, true); yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X86_POPCNT, true); yield return new InstructionSetInfo("Vector128", "", InstructionSet.X86_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X86_Vector256, false); @@ -1782,10 +1782,10 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite { return InstructionSet.X64_PCLMULQDQ_X64; } else if (nestedTypeName == "V256") - { return InstructionSet.X64_VPCLMULQDQ; } + { return InstructionSet.X64_PCLMULQDQ_V256; } else if (nestedTypeName == "V512") - { return InstructionSet.X64_VPCLMULQDQ_V512; } + { return InstructionSet.X64_PCLMULQDQ_V512; } else { return InstructionSet.X64_PCLMULQDQ; } @@ -1936,10 +1936,10 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "Pclmulqdq": if (nestedTypeName == "V256") - { return InstructionSet.X86_VPCLMULQDQ; } + { return InstructionSet.X86_PCLMULQDQ_V256; } else if (nestedTypeName == "V512") - { return InstructionSet.X86_VPCLMULQDQ_V512; } + { return InstructionSet.X86_PCLMULQDQ_V512; } else { return InstructionSet.X86_PCLMULQDQ; } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index d56a86f03598d..193e0aa8d6b0c 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -40,8 +40,8 @@ instructionset ,X86 ,Bmi2 , ,11 ,BMI2 instructionset ,X86 ,Fma , ,12 ,FMA ,fma instructionset ,X86 ,Lzcnt , ,13 ,LZCNT ,lzcnt instructionset ,X86 ,Pclmulqdq , ,14 ,PCLMULQDQ ,pclmul -instructionset ,X86 ,Pclmulqdq_V256 , ,48 ,VPCLMULQDQ ,vpclmul -instructionset ,X86 ,Pclmulqdq_V512 , ,49 ,VPCLMULQDQ_V512 ,vpclmul_v512 +instructionset ,X86 ,Pclmulqdq_V256 , ,48 ,PCLMULQDQ_V256 ,vpclmul +instructionset ,X86 ,Pclmulqdq_V512 , ,49 ,PCLMULQDQ_V512 ,vpclmul_v512 instructionset ,X86 ,Popcnt , ,15 ,POPCNT ,popcnt instructionset ,X86 , , , ,Vector128 , instructionset ,X86 , , , ,Vector256 , @@ -144,10 +144,10 @@ implication ,X86 ,AVX512VBMI_VL ,AVX512BW_VL implication ,X86 ,AES ,SSE2 implication ,X86 ,PCLMULQDQ ,SSE2 -implication ,X86 ,VPCLMULQDQ ,PCLMULQDQ -implication ,X86 ,VPCLMULQDQ ,AVX -implication ,X86 ,VPCLMULQDQ_V512 ,VPCLMULQDQ -implication ,X86 ,VPCLMULQDQ_V512 ,AVX512F +implication ,X86 ,PCLMULQDQ_V256 ,PCLMULQDQ +implication ,X86 ,PCLMULQDQ_V256 ,AVX +implication ,X86 ,PCLMULQDQ_V512 ,PCLMULQDQ_V256 +implication ,X86 ,PCLMULQDQ_V512 ,AVX512F implication ,X86 ,AVXVNNI ,AVX2 implication ,X86 ,X86Serialize ,X86Base diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index bd8099274c1b3..2395ad50bf564 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1394,8 +1394,8 @@ void EEJitManager::SetCpuInfo() if (((cpuFeatures & XArchIntrinsicConstants_Vpclmulqdq) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableVPCLMULQDQ)) { - CPUCompileFlags.Set(InstructionSet_VPCLMULQDQ); - CPUCompileFlags.Set(InstructionSet_VPCLMULQDQ_V512); + CPUCompileFlags.Set(InstructionSet_PCLMULQDQ_V256); + CPUCompileFlags.Set(InstructionSet_PCLMULQDQ_V512); } if (((cpuFeatures & XArchIntrinsicConstants_AvxVnni) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNI)) From f177aee08aeb1bde9d11373bd729f6c4d6169420 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Tue, 5 Nov 2024 12:32:38 -0800 Subject: [PATCH 09/15] apply formatting patch --- src/coreclr/jit/gentree.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 33491fc367e9d..5d6e10d481a56 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20560,7 +20560,6 @@ bool GenTree::isEvexCompatibleHWIntrinsic(Compiler* comp) const return HWIntrinsicInfo::HasEvexSemantics(intrinsicId); } } - } #endif return false; From 23f2116c76622bc8f4072e0acfc23bda4193bd7a Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Fri, 8 Nov 2024 16:14:15 -0800 Subject: [PATCH 10/15] address feedback round 2 --- .../Common/Compiler/InstructionSetSupport.cs | 44 +++++---------- .../tools/Common/InstructionSetHelpers.cs | 2 + .../Compiler/HardwareIntrinsicHelpers.Aot.cs | 5 +- .../System.Private.CoreLib.Shared.projitems | 38 +++++-------- .../X86/Pclmulqdq.PlatformNotSupported.cs | 44 +-------------- .../Pclmulqdq.Wide.PlatformNotSupported.cs | 54 ++++++++++++++++++ .../Runtime/Intrinsics/X86/Pclmulqdq.Wide.cs | 55 +++++++++++++++++++ .../Runtime/Intrinsics/X86/Pclmulqdq.cs | 46 +--------------- .../ILLink.Substitutions.Intrinsics.x86.xml | 6 ++ .../X86/General/IsSupported.cs | 36 +++++++++++- .../HardwareIntrinsics/X86/X86Base/CpuId.cs | 18 +++++- .../SmokeTests/HardwareIntrinsics/Program.cs | 14 +++++ 12 files changed, 214 insertions(+), 148 deletions(-) create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.Wide.PlatformNotSupported.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.Wide.cs diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index 68ad176e63b19..34ffde36997cf 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -70,51 +70,33 @@ public static string GetHardwareIntrinsicId(TargetArchitecture architecture, Typ if (!potentialTypeDesc.IsIntrinsic || !(potentialTypeDesc is MetadataType potentialType)) return ""; + // 64-bit ISA variants are not included in the mapping dictionary, so we use the containing type instead + if ((architecture, potentialType.Name) is (TargetArchitecture.X64, "X64") or (TargetArchitecture.ARM64, "Arm64")) + potentialType = (MetadataType)potentialType.ContainingType; + + // We assume that managed names in InstructionSetDesc.txt use an underscore separator for nested classes string suffix = ""; - if (architecture == TargetArchitecture.X64) + while (potentialType.ContainingType is MetadataType containingType) { - if (potentialType.Name == "X64") - potentialType = (MetadataType)potentialType.ContainingType; - if (potentialType.Name == "VL") - potentialType = (MetadataType)potentialType.ContainingType; - if (potentialType.Name == "V512") - { - suffix = "_V512"; - potentialType = (MetadataType)potentialType.ContainingType; - } - - if (potentialType.Namespace != "System.Runtime.Intrinsics.X86") - return ""; + suffix += $"_{potentialType.Name}"; + potentialType = containingType; } - else if (architecture == TargetArchitecture.X86) + + if (architecture is TargetArchitecture.X64 or TargetArchitecture.X86) { - if (potentialType.Name == "VL") - potentialType = (MetadataType)potentialType.ContainingType; - if (potentialType.Name == "V512") - { - suffix = "_V512"; - potentialType = (MetadataType)potentialType.ContainingType; - } if (potentialType.Namespace != "System.Runtime.Intrinsics.X86") return ""; } - else if (architecture == TargetArchitecture.ARM64) - { - if (potentialType.Name == "Arm64") - potentialType = (MetadataType)potentialType.ContainingType; - if (potentialType.Namespace != "System.Runtime.Intrinsics.Arm") - return ""; - } - else if (architecture == TargetArchitecture.ARM) + else if (architecture is TargetArchitecture.ARM64 or TargetArchitecture.ARM) { if (potentialType.Namespace != "System.Runtime.Intrinsics.Arm") return ""; } - else if (architecture == TargetArchitecture.LoongArch64) + else if (architecture is TargetArchitecture.LoongArch64) { return ""; } - else if (architecture == TargetArchitecture.RiscV64) + else if (architecture is TargetArchitecture.RiscV64) { return ""; } diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index a9c4b35ed8de8..95811afd7fbcd 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -209,6 +209,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("fma"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi2"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vpclmul"); } Debug.Assert(InstructionSet.X64_AVX512F == InstructionSet.X86_AVX512F); @@ -226,6 +227,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vbmi_vl"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v1"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v1_v512"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vpclmul_v512"); } } else if (targetArchitecture == TargetArchitecture.ARM64) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs index 85e7a943dba4a..f235483b67f8f 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs @@ -49,11 +49,10 @@ public static MethodIL EmitIsSupportedIL(MethodDesc method, FieldDesc isSupporte var emit = new ILEmitter(); ILCodeStream codeStream = emit.NewCodeStream(); - if(!uint.IsPow2((uint)flag)) + if (!uint.IsPow2((uint)flag)) { // These are the ISAs managed by multiple-bit flags. - // we need to emit different IL to handle the checks. - // For now just Avx10v1_V512 = (Avx10v1 | Avx512) + // We need to emit different IL to handle the checks. // (isSupportedField & flag) == flag codeStream.Emit(ILOpcode.ldsfld, emit.NewToken(isSupportedField)); codeStream.EmitLdc(flag); diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 1b6323b940e86..b8cb658322f89 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -18,6 +18,7 @@ true true false + .PlatformNotSupported $(MSBuildThisFileDirectory)ILLink\ true true @@ -2614,30 +2615,21 @@ - - - - - - - - - - - - - - - - - - + + + + + + + + + - - + + @@ -2646,8 +2638,7 @@ - - + @@ -2665,6 +2656,7 @@ + @@ -2803,4 +2795,4 @@ - \ No newline at end of file + diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs index 9c034adaaf5d8..f57f1d1c9fde1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs @@ -10,7 +10,7 @@ namespace System.Runtime.Intrinsics.X86 { /// Provides access to X86 CLMUL hardware instructions via intrinsics. [CLSCompliant(false)] - public abstract class Pclmulqdq : Sse2 + public abstract partial class Pclmulqdq : Sse2 { internal Pclmulqdq() { } @@ -30,48 +30,6 @@ internal X64() { } public static new bool IsSupported { [Intrinsic] get { return false; } } } - public abstract class V256 - { - internal V256() { } - - /// Gets a value that indicates whether the APIs in this class are supported. - /// if the APIs are supported; otherwise, . - /// A value of indicates that the APIs will throw . - public static bool IsSupported { [Intrinsic] get { return false; } } - - /// - /// __m256i _mm256_clmulepi64_epi128 (__m256i a, __m256i b, const int imm8) - /// VPCLMULQDQ ymm1, ymm2, ymm3/m256, imm8 - /// - public static Vector256 CarrylessMultiply(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_clmulepi64_epi128 (__m256i a, __m256i b, const int imm8) - /// VPCLMULQDQ ymm1, ymm2, ymm3/m256, imm8 - /// - public static Vector256 CarrylessMultiply(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - } - - public abstract class V512 - { - internal V512() { } - - /// Gets a value that indicates whether the APIs in this class are supported. - /// if the APIs are supported; otherwise, . - /// A value of indicates that the APIs will throw . - public static bool IsSupported { [Intrinsic] get { return false; } } - - /// - /// __m512i _mm512_clmulepi64_epi128 (__m512i a, __m512i b, const int imm8) - /// VPCLMULQDQ zmm1, zmm2, zmm3/m512, imm8 - /// - public static Vector512 CarrylessMultiply(Vector512 left, Vector512 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// - /// __m512i _mm512_clmulepi64_epi128 (__m512i a, __m512i b, const int imm8) - /// VPCLMULQDQ zmm1, zmm2, zmm3/m512, imm8 - /// - public static Vector512 CarrylessMultiply(Vector512 left, Vector512 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - } - /// /// __m128i _mm_clmulepi64_si128 (__m128i a, __m128i b, const int imm8) /// PCLMULQDQ xmm1, xmm2/m128, imm8 diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.Wide.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.Wide.PlatformNotSupported.cs new file mode 100644 index 0000000000000..795323e6c626f --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.Wide.PlatformNotSupported.cs @@ -0,0 +1,54 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; + +namespace System.Runtime.Intrinsics.X86 +{ + public abstract partial class Pclmulqdq : Sse2 + { + public abstract class V256 + { + internal V256() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { [Intrinsic] get { return false; } } + + /// + /// __m256i _mm256_clmulepi64_epi128 (__m256i a, __m256i b, const int imm8) + /// VPCLMULQDQ ymm1, ymm2, ymm3/m256, imm8 + /// + public static Vector256 CarrylessMultiply(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_clmulepi64_epi128 (__m256i a, __m256i b, const int imm8) + /// VPCLMULQDQ ymm1, ymm2, ymm3/m256, imm8 + /// + public static Vector256 CarrylessMultiply(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + } + + public abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { [Intrinsic] get { return false; } } + + /// + /// __m512i _mm512_clmulepi64_epi128 (__m512i a, __m512i b, const int imm8) + /// VPCLMULQDQ zmm1, zmm2, zmm3/m512, imm8 + /// + public static Vector512 CarrylessMultiply(Vector512 left, Vector512 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + /// + /// __m512i _mm512_clmulepi64_epi128 (__m512i a, __m512i b, const int imm8) + /// VPCLMULQDQ zmm1, zmm2, zmm3/m512, imm8 + /// + public static Vector512 CarrylessMultiply(Vector512 left, Vector512 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.Wide.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.Wide.cs new file mode 100644 index 0000000000000..b68d020707a3a --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.Wide.cs @@ -0,0 +1,55 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + public abstract partial class Pclmulqdq : Sse2 + { + [Intrinsic] + public abstract class V256 + { + internal V256() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { get => IsSupported; } + + /// + /// __m256i _mm256_clmulepi64_epi128 (__m256i a, __m256i b, const int imm8) + /// VPCLMULQDQ ymm1, ymm2, ymm3/m256, imm8 + /// + public static Vector256 CarrylessMultiply(Vector256 left, Vector256 right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control); + /// + /// __m256i _mm256_clmulepi64_epi128 (__m256i a, __m256i b, const int imm8) + /// VPCLMULQDQ ymm1, ymm2, ymm3/m256, imm8 + /// + public static Vector256 CarrylessMultiply(Vector256 left, Vector256 right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control); + } + + [Intrinsic] + public abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { get => IsSupported; } + + /// + /// __m512i _mm512_clmulepi64_epi128 (__m512i a, __m512i b, const int imm8) + /// VPCLMULQDQ zmm1, zmm2, zmm3/m512, imm8 + /// + public static Vector512 CarrylessMultiply(Vector512 left, Vector512 right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control); + /// + /// __m512i _mm512_clmulepi64_epi128 (__m512i a, __m512i b, const int imm8) + /// VPCLMULQDQ zmm1, zmm2, zmm3/m512, imm8 + /// + public static Vector512 CarrylessMultiply(Vector512 left, Vector512 right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control); + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs index f32f65859a7af..9daed97ffea23 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs @@ -9,7 +9,7 @@ namespace System.Runtime.Intrinsics.X86 /// Provides access to X86 CLMUL hardware instructions via intrinsics. [Intrinsic] [CLSCompliant(false)] - public abstract class Pclmulqdq : Sse2 + public abstract partial class Pclmulqdq : Sse2 { internal Pclmulqdq() { } @@ -30,50 +30,6 @@ internal X64() { } public static new bool IsSupported { get => IsSupported; } } - [Intrinsic] - public abstract class V256 - { - internal V256() { } - - /// Gets a value that indicates whether the APIs in this class are supported. - /// if the APIs are supported; otherwise, . - /// A value of indicates that the APIs will throw . - public static bool IsSupported { get => IsSupported; } - - /// - /// __m256i _mm256_clmulepi64_epi128 (__m256i a, __m256i b, const int imm8) - /// VPCLMULQDQ ymm1, ymm2, ymm3/m256, imm8 - /// - public static Vector256 CarrylessMultiply(Vector256 left, Vector256 right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control); - /// - /// __m256i _mm256_clmulepi64_epi128 (__m256i a, __m256i b, const int imm8) - /// VPCLMULQDQ ymm1, ymm2, ymm3/m256, imm8 - /// - public static Vector256 CarrylessMultiply(Vector256 left, Vector256 right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control); - } - - [Intrinsic] - public abstract class V512 - { - internal V512() { } - - /// Gets a value that indicates whether the APIs in this class are supported. - /// if the APIs are supported; otherwise, . - /// A value of indicates that the APIs will throw . - public static bool IsSupported { get => IsSupported; } - - /// - /// __m512i _mm512_clmulepi64_epi128 (__m512i a, __m512i b, const int imm8) - /// VPCLMULQDQ zmm1, zmm2, zmm3/m512, imm8 - /// - public static Vector512 CarrylessMultiply(Vector512 left, Vector512 right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control); - /// - /// __m512i _mm512_clmulepi64_epi128 (__m512i a, __m512i b, const int imm8) - /// VPCLMULQDQ zmm1, zmm2, zmm3/m512, imm8 - /// - public static Vector512 CarrylessMultiply(Vector512 left, Vector512 right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control); - } - /// /// __m128i _mm_clmulepi64_si128 (__m128i a, __m128i b, const int imm8) /// PCLMULQDQ xmm1, xmm2/m128, imm8 diff --git a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml index 37307c726c5cf..7050d50dc71c5 100644 --- a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml +++ b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml @@ -81,6 +81,12 @@ + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs b/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs index 922c6392f7e5d..f03c8671ae9ae 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs @@ -10,7 +10,7 @@ namespace IntelHardwareIntrinsicTest.General { public partial class Program { - [Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/75767", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoLLVMAOT))] + [Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/91392", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoLLVMAOT))] [Fact] public static void IsSupported() { @@ -46,13 +46,47 @@ public static void IsSupported() Convert.ToBoolean(typeof(Bmi2).GetMethod(issupported).Invoke(null, null)) != Bmi2.IsSupported || Convert.ToBoolean(typeof(Sse.X64).GetMethod(issupported).Invoke(null, null)) != Sse.X64.IsSupported || Convert.ToBoolean(typeof(Sse2.X64).GetMethod(issupported).Invoke(null, null)) != Sse2.X64.IsSupported || + Convert.ToBoolean(typeof(Sse3.X64).GetMethod(issupported).Invoke(null, null)) != Sse3.X64.IsSupported || + Convert.ToBoolean(typeof(Ssse3.X64).GetMethod(issupported).Invoke(null, null)) != Ssse3.X64.IsSupported || Convert.ToBoolean(typeof(Sse41.X64).GetMethod(issupported).Invoke(null, null)) != Sse41.X64.IsSupported || Convert.ToBoolean(typeof(Sse42.X64).GetMethod(issupported).Invoke(null, null)) != Sse42.X64.IsSupported || + Convert.ToBoolean(typeof(Avx.X64).GetMethod(issupported).Invoke(null, null)) != Avx.X64.IsSupported || + Convert.ToBoolean(typeof(Avx2.X64).GetMethod(issupported).Invoke(null, null)) != Avx2.X64.IsSupported || Convert.ToBoolean(typeof(Lzcnt.X64).GetMethod(issupported).Invoke(null, null)) != Lzcnt.X64.IsSupported || Convert.ToBoolean(typeof(Popcnt.X64).GetMethod(issupported).Invoke(null, null)) != Popcnt.X64.IsSupported || Convert.ToBoolean(typeof(Bmi1.X64).GetMethod(issupported).Invoke(null, null)) != Bmi1.X64.IsSupported || Convert.ToBoolean(typeof(Bmi2.X64).GetMethod(issupported).Invoke(null, null)) != Bmi2.X64.IsSupported || + Convert.ToBoolean(typeof(Aes).GetMethod(issupported).Invoke(null, null)) != Aes.IsSupported || + Convert.ToBoolean(typeof(Aes.X64).GetMethod(issupported).Invoke(null, null)) != Aes.X64.IsSupported || + Convert.ToBoolean(typeof(Avx512BW).GetMethod(issupported).Invoke(null, null)) != Avx512BW.IsSupported || + Convert.ToBoolean(typeof(Avx512BW.VL).GetMethod(issupported).Invoke(null, null)) != Avx512BW.VL.IsSupported || + Convert.ToBoolean(typeof(Avx512BW.X64).GetMethod(issupported).Invoke(null, null)) != Avx512BW.X64.IsSupported || + Convert.ToBoolean(typeof(Avx512CD).GetMethod(issupported).Invoke(null, null)) != Avx512CD.IsSupported || + Convert.ToBoolean(typeof(Avx512CD.VL).GetMethod(issupported).Invoke(null, null)) != Avx512CD.VL.IsSupported || + Convert.ToBoolean(typeof(Avx512CD.X64).GetMethod(issupported).Invoke(null, null)) != Avx512CD.X64.IsSupported || + Convert.ToBoolean(typeof(Avx512DQ).GetMethod(issupported).Invoke(null, null)) != Avx512DQ.IsSupported || + Convert.ToBoolean(typeof(Avx512DQ.VL).GetMethod(issupported).Invoke(null, null)) != Avx512DQ.VL.IsSupported || + Convert.ToBoolean(typeof(Avx512DQ.X64).GetMethod(issupported).Invoke(null, null)) != Avx512DQ.X64.IsSupported || + Convert.ToBoolean(typeof(Avx512F).GetMethod(issupported).Invoke(null, null)) != Avx512F.IsSupported || + Convert.ToBoolean(typeof(Avx512F.VL).GetMethod(issupported).Invoke(null, null)) != Avx512F.VL.IsSupported || + Convert.ToBoolean(typeof(Avx512F.X64).GetMethod(issupported).Invoke(null, null)) != Avx512F.X64.IsSupported || + Convert.ToBoolean(typeof(Avx512Vbmi).GetMethod(issupported).Invoke(null, null)) != Avx512Vbmi.IsSupported || + Convert.ToBoolean(typeof(Avx512Vbmi.VL).GetMethod(issupported).Invoke(null, null)) != Avx512Vbmi.VL.IsSupported || + Convert.ToBoolean(typeof(Avx512Vbmi.X64).GetMethod(issupported).Invoke(null, null)) != Avx512Vbmi.X64.IsSupported || + Convert.ToBoolean(typeof(AvxVnni).GetMethod(issupported).Invoke(null, null)) != AvxVnni.IsSupported || + Convert.ToBoolean(typeof(AvxVnni.X64).GetMethod(issupported).Invoke(null, null)) != AvxVnni.X64.IsSupported || + Convert.ToBoolean(typeof(Fma).GetMethod(issupported).Invoke(null, null)) != Fma.IsSupported || + Convert.ToBoolean(typeof(Fma.X64).GetMethod(issupported).Invoke(null, null)) != Fma.X64.IsSupported || + Convert.ToBoolean(typeof(Pclmulqdq).GetMethod(issupported).Invoke(null, null)) != Pclmulqdq.IsSupported || + Convert.ToBoolean(typeof(Pclmulqdq.V256).GetMethod(issupported).Invoke(null, null)) != Pclmulqdq.V256.IsSupported || + Convert.ToBoolean(typeof(Pclmulqdq.V512).GetMethod(issupported).Invoke(null, null)) != Pclmulqdq.V512.IsSupported || + Convert.ToBoolean(typeof(Pclmulqdq.X64).GetMethod(issupported).Invoke(null, null)) != Pclmulqdq.X64.IsSupported || + Convert.ToBoolean(typeof(X86Base).GetMethod(issupported).Invoke(null, null)) != X86Base.IsSupported || + Convert.ToBoolean(typeof(X86Base.X64).GetMethod(issupported).Invoke(null, null)) != X86Base.X64.IsSupported || + Convert.ToBoolean(typeof(X86Serialize).GetMethod(issupported).Invoke(null, null)) != X86Serialize.IsSupported || + Convert.ToBoolean(typeof(X86Serialize.X64).GetMethod(issupported).Invoke(null, null)) != X86Serialize.X64.IsSupported || Convert.ToBoolean(typeof(Avx10v1).GetMethod(issupported).Invoke(null, null)) != Avx10v1.IsSupported || + Convert.ToBoolean(typeof(Avx10v1.X64).GetMethod(issupported).Invoke(null, null)) != Avx10v1.X64.IsSupported || Convert.ToBoolean(typeof(Avx10v1.V512).GetMethod(issupported).Invoke(null, null)) != Avx10v1.V512.IsSupported || Convert.ToBoolean(typeof(Avx10v1.V512.X64).GetMethod(issupported).Invoke(null, null)) != Avx10v1.V512.X64.IsSupported) { diff --git a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs index 800085e5eb247..48c3421ac83e6 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs @@ -284,16 +284,30 @@ public unsafe static void CpuId() testResult = Fail; } + isHierarchyDisabled = isAvxHierarchyDisabled; + + if (IsBitIncorrect(ecx, 10, typeof(Pclmulqdq.V256), Pclmulqdq.V256.IsSupported, "VPCLMULQDQ", ref isHierarchyDisabled)) + { + testResult = Fail; + } + + isHierarchyDisabled = isAvx512HierarchyDisabled; + + if (IsBitIncorrect(ecx, 10, typeof(Pclmulqdq.V512), Pclmulqdq.V512.IsSupported, "VPCLMULQDQ", ref isHierarchyDisabled)) + { + testResult = Fail; + } + (eax, ebx, ecx, edx) = X86Base.CpuId(0x00000007, 0x00000001); isHierarchyDisabled = isAvx2HierarchyDisabled; -#pragma warning disable CA2252 // No need to opt into preview feature for an internal test if (IsBitIncorrect(eax, 4, typeof(AvxVnni), AvxVnni.IsSupported, "AVXVNNI", ref isHierarchyDisabled)) { testResult = Fail; } -#pragma warning restore CA2252 + + isHierarchyDisabled = isAvxHierarchyDisabled | isFmaHierarchyDisabled; if (IsBitIncorrect(edx, 19, typeof(Avx10v1), Avx10v1.IsSupported, "AVX10V1", ref isHierarchyDisabled)) { diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index a7709ab5db7fb..4323a9a3b589f 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -52,6 +52,8 @@ static int Main() bool? ExpectedAes = null; bool? ExpectedLzcnt = null; bool? ExpectedPclmulqdq = null; + bool? ExpectedPclmulqdqV256 = null; + bool? ExpectedPclmulqdqV512 = null; bool? ExpectedSse41 = null; bool? ExpectedSse42 = null; bool? ExpectedPopcnt = null; @@ -75,6 +77,8 @@ static int Main() bool? ExpectedAes = null; bool? ExpectedLzcnt = null; bool? ExpectedPclmulqdq = null; + bool? ExpectedPclmulqdqV256 = null; + bool? ExpectedPclmulqdqV512 = null; bool? ExpectedSse41 = true; bool? ExpectedSse42 = true; bool? ExpectedPopcnt = null; @@ -98,6 +102,8 @@ static int Main() bool? ExpectedAes = null; bool? ExpectedLzcnt = null; bool? ExpectedPclmulqdq = null; + bool? ExpectedPclmulqdqV256 = null; + bool? ExpectedPclmulqdqV512 = null; bool? ExpectedSse41 = true; bool? ExpectedSse42 = true; bool? ExpectedPopcnt = null; @@ -121,6 +127,8 @@ static int Main() bool? ExpectedAes = null; bool? ExpectedLzcnt = null; bool? ExpectedPclmulqdq = null; + bool? ExpectedPclmulqdqV256 = null; + bool? ExpectedPclmulqdqV512 = null; bool? ExpectedSse41 = true; bool? ExpectedSse42 = true; bool? ExpectedPopcnt = null; @@ -144,6 +152,8 @@ static int Main() bool? ExpectedAes = null; bool? ExpectedLzcnt = null; bool? ExpectedPclmulqdq = null; + bool? ExpectedPclmulqdqV256 = null; + bool? ExpectedPclmulqdqV512 = null; bool? ExpectedSse41 = true; bool? ExpectedSse42 = true; bool? ExpectedPopcnt = null; @@ -223,6 +233,8 @@ static int Main() Check("Lzcnt.X64", ExpectedLzcnt, &LzcntX64IsSupported, Lzcnt.X64.IsSupported, () => Lzcnt.X64.LeadingZeroCount(0) == 64); Check("Pclmulqdq", ExpectedPclmulqdq, &PclmulqdqIsSupported, Pclmulqdq.IsSupported, () => Pclmulqdq.CarrylessMultiply(Vector128.Zero, Vector128.Zero, 0).Equals(Vector128.Zero)); + Check("Pclmulqdq.V256", ExpectedPclmulqdqV256, &PclmulqdqV256IsSupported, Pclmulqdq.V256.IsSupported, () => Pclmulqdq.V256.CarrylessMultiply(Vector256.Zero, Vector256.Zero, 0).Equals(Vector256.Zero)); + Check("Pclmulqdq.V512", ExpectedPclmulqdqV512, &PclmulqdqV512IsSupported, Pclmulqdq.V512.IsSupported, () => Pclmulqdq.V512.CarrylessMultiply(Vector512.Zero, Vector512.Zero, 0).Equals(Vector512.Zero)); Check("Pclmulqdq.X64", ExpectedPclmulqdq, &PclmulqdqX64IsSupported, Pclmulqdq.X64.IsSupported, null); Check("Popcnt", ExpectedPopcnt, &PopcntIsSupported, Popcnt.IsSupported, () => Popcnt.PopCount(0) == 0); @@ -293,6 +305,8 @@ static int Main() static bool LzcntIsSupported() => Lzcnt.IsSupported; static bool LzcntX64IsSupported() => Lzcnt.X64.IsSupported; static bool PclmulqdqIsSupported() => Pclmulqdq.IsSupported; + static bool PclmulqdqV256IsSupported() => Pclmulqdq.V256.IsSupported; + static bool PclmulqdqV512IsSupported() => Pclmulqdq.V512.IsSupported; static bool PclmulqdqX64IsSupported() => Pclmulqdq.X64.IsSupported; static bool PopcntIsSupported() => Popcnt.IsSupported; static bool PopcntX64IsSupported() => Popcnt.X64.IsSupported; From db0a2714831c2b442d0d1078446499774d38547d Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Fri, 8 Nov 2024 16:29:47 -0800 Subject: [PATCH 11/15] add missing brace --- .../Intrinsics/X86/Pclmulqdq.Wide.PlatformNotSupported.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.Wide.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.Wide.PlatformNotSupported.cs index 795323e6c626f..1555870acefbc 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.Wide.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.Wide.PlatformNotSupported.cs @@ -51,4 +51,5 @@ internal V512() { } /// public static Vector512 CarrylessMultiply(Vector512 left, Vector512 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } } + } } From cc11a01b26bcb87e832d10aee4b09c784c02e74e Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Fri, 8 Nov 2024 17:16:04 -0800 Subject: [PATCH 12/15] fix smoketest expected results --- .../SmokeTests/HardwareIntrinsics/Program.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index 4323a9a3b589f..7d891dbac1a41 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -52,8 +52,8 @@ static int Main() bool? ExpectedAes = null; bool? ExpectedLzcnt = null; bool? ExpectedPclmulqdq = null; - bool? ExpectedPclmulqdqV256 = null; - bool? ExpectedPclmulqdqV512 = null; + bool? ExpectedPclmulqdqV256 = false; + bool? ExpectedPclmulqdqV512 = false; bool? ExpectedSse41 = null; bool? ExpectedSse42 = null; bool? ExpectedPopcnt = null; @@ -77,8 +77,8 @@ static int Main() bool? ExpectedAes = null; bool? ExpectedLzcnt = null; bool? ExpectedPclmulqdq = null; - bool? ExpectedPclmulqdqV256 = null; - bool? ExpectedPclmulqdqV512 = null; + bool? ExpectedPclmulqdqV256 = false; + bool? ExpectedPclmulqdqV512 = false; bool? ExpectedSse41 = true; bool? ExpectedSse42 = true; bool? ExpectedPopcnt = null; @@ -103,7 +103,7 @@ static int Main() bool? ExpectedLzcnt = null; bool? ExpectedPclmulqdq = null; bool? ExpectedPclmulqdqV256 = null; - bool? ExpectedPclmulqdqV512 = null; + bool? ExpectedPclmulqdqV512 = false; bool? ExpectedSse41 = true; bool? ExpectedSse42 = true; bool? ExpectedPopcnt = null; @@ -128,7 +128,7 @@ static int Main() bool? ExpectedLzcnt = null; bool? ExpectedPclmulqdq = null; bool? ExpectedPclmulqdqV256 = null; - bool? ExpectedPclmulqdqV512 = null; + bool? ExpectedPclmulqdqV512 = false; bool? ExpectedSse41 = true; bool? ExpectedSse42 = true; bool? ExpectedPopcnt = null; From 4d19213b420e1ecfaf82e70552643d598ae1528d Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Fri, 8 Nov 2024 19:18:25 -0800 Subject: [PATCH 13/15] fix suffix order --- src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index 34ffde36997cf..1717368480c72 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -78,7 +78,7 @@ public static string GetHardwareIntrinsicId(TargetArchitecture architecture, Typ string suffix = ""; while (potentialType.ContainingType is MetadataType containingType) { - suffix += $"_{potentialType.Name}"; + suffix = $"_{potentialType.Name}{suffix}"; potentialType = containingType; } From ff959adafed7b7596bc741190f83facd3f2ab64c Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Sun, 10 Nov 2024 15:19:37 -0800 Subject: [PATCH 14/15] handle implied V512 support in AOT --- src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index 1717368480c72..6afe17d2c1cdc 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -352,11 +352,14 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, if (_supportedInstructionSets.Contains("avx512vbmi")) _supportedInstructionSets.Add("avx512vbmi_vl"); - // Having AVX10V1 and any AVX-512 instruction sets enabled, - // automatically implies AVX10V1-V512 as well. + // These ISAs should automatically extend to 512-bit if + // AVX-512 is enabled. if (_supportedInstructionSets.Contains("avx10v1")) _supportedInstructionSets.Add("avx10v1_v512"); + + if (_supportedInstructionSets.Contains("vpclmul")) + _supportedInstructionSets.Add("vpclmul_v512"); } foreach (string supported in _supportedInstructionSets) From 2b466f7ca7426e3bc7142f61d8f8fe12e7c21f32 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Sun, 10 Nov 2024 19:50:34 -0800 Subject: [PATCH 15/15] remove more unnecessary X64 ISA variants --- src/coreclr/inc/corinfoinstructionset.h | 52 +++++---------- src/coreclr/jit/hwintrinsic.cpp | 2 - src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 1 - src/coreclr/jit/hwintrinsicxarch.cpp | 2 - .../Compiler/HardwareIntrinsicHelpers.cs | 2 - .../Runtime/ReadyToRunInstructionSetHelper.cs | 4 -- .../JitInterface/CorInfoInstructionSet.cs | 66 +++++-------------- .../ThunkGenerator/InstructionSetDesc.txt | 2 - 8 files changed, 32 insertions(+), 99 deletions(-) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 698778bfd1e47..f0ce61ebfc4cb 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -100,16 +100,14 @@ enum CORINFO_InstructionSet InstructionSet_PCLMULQDQ_X64=55, InstructionSet_POPCNT_X64=56, InstructionSet_AVXVNNI_X64=57, - InstructionSet_MOVBE_X64=58, - InstructionSet_X86Serialize_X64=59, - InstructionSet_EVEX_X64=60, - InstructionSet_AVX512F_X64=61, - InstructionSet_AVX512BW_X64=62, - InstructionSet_AVX512CD_X64=63, - InstructionSet_AVX512DQ_X64=64, - InstructionSet_AVX512VBMI_X64=65, - InstructionSet_AVX10v1_X64=66, - InstructionSet_AVX10v1_V512_X64=67, + InstructionSet_X86Serialize_X64=58, + InstructionSet_AVX512F_X64=59, + InstructionSet_AVX512BW_X64=60, + InstructionSet_AVX512CD_X64=61, + InstructionSet_AVX512DQ_X64=62, + InstructionSet_AVX512VBMI_X64=63, + InstructionSet_AVX10v1_X64=64, + InstructionSet_AVX10v1_V512_X64=65, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -169,16 +167,14 @@ enum CORINFO_InstructionSet InstructionSet_PCLMULQDQ_X64=55, InstructionSet_POPCNT_X64=56, InstructionSet_AVXVNNI_X64=57, - InstructionSet_MOVBE_X64=58, - InstructionSet_X86Serialize_X64=59, - InstructionSet_EVEX_X64=60, - InstructionSet_AVX512F_X64=61, - InstructionSet_AVX512BW_X64=62, - InstructionSet_AVX512CD_X64=63, - InstructionSet_AVX512DQ_X64=64, - InstructionSet_AVX512VBMI_X64=65, - InstructionSet_AVX10v1_X64=66, - InstructionSet_AVX10v1_V512_X64=67, + InstructionSet_X86Serialize_X64=58, + InstructionSet_AVX512F_X64=59, + InstructionSet_AVX512BW_X64=60, + InstructionSet_AVX512CD_X64=61, + InstructionSet_AVX512DQ_X64=62, + InstructionSet_AVX512VBMI_X64=63, + InstructionSet_AVX10v1_X64=64, + InstructionSet_AVX10v1_V512_X64=65, #endif // TARGET_X86 }; @@ -328,12 +324,8 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_POPCNT_X64); if (HasInstructionSet(InstructionSet_AVXVNNI)) AddInstructionSet(InstructionSet_AVXVNNI_X64); - if (HasInstructionSet(InstructionSet_MOVBE)) - AddInstructionSet(InstructionSet_MOVBE_X64); if (HasInstructionSet(InstructionSet_X86Serialize)) AddInstructionSet(InstructionSet_X86Serialize_X64); - if (HasInstructionSet(InstructionSet_EVEX)) - AddInstructionSet(InstructionSet_EVEX_X64); if (HasInstructionSet(InstructionSet_AVX512F)) AddInstructionSet(InstructionSet_AVX512F_X64); if (HasInstructionSet(InstructionSet_AVX512BW)) @@ -496,18 +488,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI_X64); - if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_MOVBE_X64)) - resultflags.RemoveInstructionSet(InstructionSet_MOVBE); - if (resultflags.HasInstructionSet(InstructionSet_MOVBE_X64) && !resultflags.HasInstructionSet(InstructionSet_MOVBE)) - resultflags.RemoveInstructionSet(InstructionSet_MOVBE_X64); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Serialize_X64)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Serialize)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize_X64); - if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_EVEX_X64)) - resultflags.RemoveInstructionSet(InstructionSet_EVEX); - if (resultflags.HasInstructionSet(InstructionSet_EVEX_X64) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) - resultflags.RemoveInstructionSet(InstructionSet_EVEX_X64); if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_X64)) resultflags.RemoveInstructionSet(InstructionSet_AVX512F); if (resultflags.HasInstructionSet(InstructionSet_AVX512F_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) @@ -910,16 +894,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "AVXVNNI_X64"; case InstructionSet_MOVBE : return "MOVBE"; - case InstructionSet_MOVBE_X64 : - return "MOVBE_X64"; case InstructionSet_X86Serialize : return "X86Serialize"; case InstructionSet_X86Serialize_X64 : return "X86Serialize_X64"; case InstructionSet_EVEX : return "EVEX"; - case InstructionSet_EVEX_X64 : - return "EVEX_X64"; case InstructionSet_AVX512F : return "AVX512F"; case InstructionSet_AVX512F_X64 : diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 3eee5703ed446..a99b02991af79 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -823,9 +823,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // PCLMULQDQ_X64 { FIRST_NI_POPCNT_X64, LAST_NI_POPCNT_X64 }, { NI_Illegal, NI_Illegal }, // AVXVNNI_X64 - { NI_Illegal, NI_Illegal }, // MOVBE_X64 { NI_Illegal, NI_Illegal }, // X86Serialize_X64 - { NI_Illegal, NI_Illegal }, // EVEX_X64 { FIRST_NI_AVX512F_X64, LAST_NI_AVX512F_X64 }, { NI_Illegal, NI_Illegal }, // AVX512BW_X64 { NI_Illegal, NI_Illegal }, // AVX512CD_X64 diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index c5b875a9630c2..db24d76f40f21 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -782,7 +782,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case InstructionSet_AVX10v1_V512: case InstructionSet_AVX10v1_V512_X64: case InstructionSet_EVEX: - case InstructionSet_EVEX_X64: { genAvxFamilyIntrinsic(node, instOptions); break; diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 28d415fcd9d93..8828c4e77322d 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -68,8 +68,6 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_POPCNT_X64; case InstructionSet_X86Serialize: return InstructionSet_X86Serialize_X64; - case InstructionSet_EVEX: - return InstructionSet_EVEX_X64; default: return InstructionSet_NONE; } diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index 7ebfec4097f6d..677c99a32264e 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -182,7 +182,6 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVXVNNI => AvxVnni, InstructionSet.X64_AVXVNNI_X64 => AvxVnni, InstructionSet.X64_MOVBE => Movbe, - InstructionSet.X64_MOVBE_X64 => Movbe, InstructionSet.X64_AVX512F => Avx512, InstructionSet.X64_AVX512F_X64 => Avx512, InstructionSet.X64_AVX512F_VL => Avx512, @@ -205,7 +204,6 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVX10v1_V512 => (Avx10v1 | Avx512), InstructionSet.X64_AVX10v1_V512_X64 => (Avx10v1 | Avx512), InstructionSet.X64_EVEX => Evex, - InstructionSet.X64_EVEX_X64 => Evex, InstructionSet.X64_PCLMULQDQ_V256 => Vpclmulqdq, InstructionSet.X64_PCLMULQDQ_V512 => (Vpclmulqdq | Avx512), diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 27cbcc5a210ef..09c4c039b8795 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -97,11 +97,9 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni; case InstructionSet.X64_AVXVNNI_X64: return ReadyToRunInstructionSet.AvxVnni; case InstructionSet.X64_MOVBE: return ReadyToRunInstructionSet.Movbe; - case InstructionSet.X64_MOVBE_X64: return ReadyToRunInstructionSet.Movbe; case InstructionSet.X64_X86Serialize: return ReadyToRunInstructionSet.X86Serialize; case InstructionSet.X64_X86Serialize_X64: return ReadyToRunInstructionSet.X86Serialize; case InstructionSet.X64_EVEX: return ReadyToRunInstructionSet.EVEX; - case InstructionSet.X64_EVEX_X64: return ReadyToRunInstructionSet.EVEX; case InstructionSet.X64_AVX512F: return ReadyToRunInstructionSet.Avx512F; case InstructionSet.X64_AVX512F_X64: return ReadyToRunInstructionSet.Avx512F; case InstructionSet.X64_AVX512F_VL: return ReadyToRunInstructionSet.Avx512F_VL; @@ -173,11 +171,9 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni; case InstructionSet.X86_AVXVNNI_X64: return null; case InstructionSet.X86_MOVBE: return ReadyToRunInstructionSet.Movbe; - case InstructionSet.X86_MOVBE_X64: return null; case InstructionSet.X86_X86Serialize: return ReadyToRunInstructionSet.X86Serialize; case InstructionSet.X86_X86Serialize_X64: return null; case InstructionSet.X86_EVEX: return ReadyToRunInstructionSet.EVEX; - case InstructionSet.X86_EVEX_X64: return null; case InstructionSet.X86_AVX512F: return ReadyToRunInstructionSet.Avx512F; case InstructionSet.X86_AVX512F_X64: return null; case InstructionSet.X86_AVX512F_VL: return ReadyToRunInstructionSet.Avx512F_VL; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 6ed64fc23dd5e..94b5f7d857e73 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -98,9 +98,7 @@ public enum InstructionSet X64_PCLMULQDQ_X64 = InstructionSet_X64.PCLMULQDQ_X64, X64_POPCNT_X64 = InstructionSet_X64.POPCNT_X64, X64_AVXVNNI_X64 = InstructionSet_X64.AVXVNNI_X64, - X64_MOVBE_X64 = InstructionSet_X64.MOVBE_X64, X64_X86Serialize_X64 = InstructionSet_X64.X86Serialize_X64, - X64_EVEX_X64 = InstructionSet_X64.EVEX_X64, X64_AVX512F_X64 = InstructionSet_X64.AVX512F_X64, X64_AVX512BW_X64 = InstructionSet_X64.AVX512BW_X64, X64_AVX512CD_X64 = InstructionSet_X64.AVX512CD_X64, @@ -165,9 +163,7 @@ public enum InstructionSet X86_PCLMULQDQ_X64 = InstructionSet_X86.PCLMULQDQ_X64, X86_POPCNT_X64 = InstructionSet_X86.POPCNT_X64, X86_AVXVNNI_X64 = InstructionSet_X86.AVXVNNI_X64, - X86_MOVBE_X64 = InstructionSet_X86.MOVBE_X64, X86_X86Serialize_X64 = InstructionSet_X86.X86Serialize_X64, - X86_EVEX_X64 = InstructionSet_X86.EVEX_X64, X86_AVX512F_X64 = InstructionSet_X86.AVX512F_X64, X86_AVX512BW_X64 = InstructionSet_X86.AVX512BW_X64, X86_AVX512CD_X64 = InstructionSet_X86.AVX512CD_X64, @@ -268,16 +264,14 @@ public enum InstructionSet_X64 PCLMULQDQ_X64 = 55, POPCNT_X64 = 56, AVXVNNI_X64 = 57, - MOVBE_X64 = 58, - X86Serialize_X64 = 59, - EVEX_X64 = 60, - AVX512F_X64 = 61, - AVX512BW_X64 = 62, - AVX512CD_X64 = 63, - AVX512DQ_X64 = 64, - AVX512VBMI_X64 = 65, - AVX10v1_X64 = 66, - AVX10v1_V512_X64 = 67, + X86Serialize_X64 = 58, + AVX512F_X64 = 59, + AVX512BW_X64 = 60, + AVX512CD_X64 = 61, + AVX512DQ_X64 = 62, + AVX512VBMI_X64 = 63, + AVX10v1_X64 = 64, + AVX10v1_V512_X64 = 65, } public enum InstructionSet_X86 @@ -341,16 +335,14 @@ public enum InstructionSet_X86 PCLMULQDQ_X64 = 55, POPCNT_X64 = 56, AVXVNNI_X64 = 57, - MOVBE_X64 = 58, - X86Serialize_X64 = 59, - EVEX_X64 = 60, - AVX512F_X64 = 61, - AVX512BW_X64 = 62, - AVX512CD_X64 = 63, - AVX512DQ_X64 = 64, - AVX512VBMI_X64 = 65, - AVX10v1_X64 = 66, - AVX10v1_V512_X64 = 67, + X86Serialize_X64 = 58, + AVX512F_X64 = 59, + AVX512BW_X64 = 60, + AVX512CD_X64 = 61, + AVX512DQ_X64 = 62, + AVX512VBMI_X64 = 63, + AVX10v1_X64 = 64, + AVX10v1_V512_X64 = 65, } public unsafe struct InstructionSetFlags : IEnumerable @@ -638,18 +630,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); - if (resultflags.HasInstructionSet(InstructionSet.X64_MOVBE)) - resultflags.AddInstructionSet(InstructionSet.X64_MOVBE_X64); - if (resultflags.HasInstructionSet(InstructionSet.X64_MOVBE_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_MOVBE); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); - if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX)) - resultflags.AddInstructionSet(InstructionSet.X64_EVEX_X64); - if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_EVEX); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_X64)) @@ -1005,12 +989,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_POPCNT); if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); - if (resultflags.HasInstructionSet(InstructionSet.X64_MOVBE_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_MOVBE); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); - if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_EVEX); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_X64)) @@ -1476,12 +1456,8 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_POPCNT_X64); if (HasInstructionSet(InstructionSet.X64_AVXVNNI)) AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); - if (HasInstructionSet(InstructionSet.X64_MOVBE)) - AddInstructionSet(InstructionSet.X64_MOVBE_X64); if (HasInstructionSet(InstructionSet.X64_X86Serialize)) AddInstructionSet(InstructionSet.X64_X86Serialize_X64); - if (HasInstructionSet(InstructionSet.X64_EVEX)) - AddInstructionSet(InstructionSet.X64_EVEX_X64); if (HasInstructionSet(InstructionSet.X64_AVX512F)) AddInstructionSet(InstructionSet.X64_AVX512F_X64); if (HasInstructionSet(InstructionSet.X64_AVX512BW)) @@ -1538,9 +1514,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64); AddInstructionSet(InstructionSet.X64_POPCNT_X64); AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); - AddInstructionSet(InstructionSet.X64_MOVBE_X64); AddInstructionSet(InstructionSet.X64_X86Serialize_X64); - AddInstructionSet(InstructionSet.X64_EVEX_X64); AddInstructionSet(InstructionSet.X64_AVX512F_X64); AddInstructionSet(InstructionSet.X64_AVX512BW_X64); AddInstructionSet(InstructionSet.X64_AVX512CD_X64); @@ -1568,9 +1542,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_PCLMULQDQ_X64); AddInstructionSet(InstructionSet.X86_POPCNT_X64); AddInstructionSet(InstructionSet.X86_AVXVNNI_X64); - AddInstructionSet(InstructionSet.X86_MOVBE_X64); AddInstructionSet(InstructionSet.X86_X86Serialize_X64); - AddInstructionSet(InstructionSet.X86_EVEX_X64); AddInstructionSet(InstructionSet.X86_AVX512F_X64); AddInstructionSet(InstructionSet.X86_AVX512BW_X64); AddInstructionSet(InstructionSet.X86_AVX512CD_X64); @@ -1802,9 +1774,6 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite { return InstructionSet.X64_AVXVNNI; } case "Movbe": - if (nestedTypeName == "X64") - { return InstructionSet.X64_MOVBE_X64; } - else { return InstructionSet.X64_MOVBE; } case "X86Serialize": @@ -1814,9 +1783,6 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite { return InstructionSet.X64_X86Serialize; } case "EVEX": - if (nestedTypeName == "X64") - { return InstructionSet.X64_EVEX_X64; } - else { return InstructionSet.X64_EVEX; } case "Avx512F": diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 193e0aa8d6b0c..4535f90fc64ef 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -83,9 +83,7 @@ instructionset64bit,X86 ,LZCNT instructionset64bit,X86 ,PCLMULQDQ instructionset64bit,X86 ,POPCNT instructionset64bit,X86 ,AVXVNNI -instructionset64bit,X86 ,MOVBE instructionset64bit,X86 ,X86Serialize -instructionset64bit,X86 ,EVEX instructionset64bit,X86 ,AVX512F instructionset64bit,X86 ,AVX512BW instructionset64bit,X86 ,AVX512CD